[HUDI-607] Fix to allow creation/syncing of Hive tables partitioned by Date type columns (#1330)
This commit is contained in:
@@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
package org.apache.hudi;
|
package org.apache.hudi;
|
||||||
|
|
||||||
|
import org.apache.avro.LogicalTypes;
|
||||||
|
import org.apache.avro.Schema;
|
||||||
import org.apache.hudi.client.HoodieReadClient;
|
import org.apache.hudi.client.HoodieReadClient;
|
||||||
import org.apache.hudi.client.HoodieWriteClient;
|
import org.apache.hudi.client.HoodieWriteClient;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
@@ -45,6 +47,7 @@ import org.apache.spark.api.java.JavaRDD;
|
|||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.time.LocalDate;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -80,7 +83,8 @@ public class DataSourceUtils {
|
|||||||
|
|
||||||
// return, if last part of name
|
// return, if last part of name
|
||||||
if (i == parts.length - 1) {
|
if (i == parts.length - 1) {
|
||||||
return val;
|
Schema fieldSchema = valueNode.getSchema().getField(part).schema();
|
||||||
|
return convertValueForSpecificDataTypes(fieldSchema, val);
|
||||||
} else {
|
} else {
|
||||||
// VC: Need a test here
|
// VC: Need a test here
|
||||||
if (!(val instanceof GenericRecord)) {
|
if (!(val instanceof GenericRecord)) {
|
||||||
@@ -99,6 +103,40 @@ public class DataSourceUtils {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method converts values for fields with certain Avro/Parquet data types that require special handling.
|
||||||
|
*
|
||||||
|
* Logical Date Type is converted to actual Date value instead of Epoch Integer which is how it is
|
||||||
|
* represented/stored in parquet.
|
||||||
|
*
|
||||||
|
* @param fieldSchema avro field schema
|
||||||
|
* @param fieldValue avro field value
|
||||||
|
* @return field value either converted (for certain data types) or as it is.
|
||||||
|
*/
|
||||||
|
private static Object convertValueForSpecificDataTypes(Schema fieldSchema, Object fieldValue) {
|
||||||
|
if (fieldSchema == null) {
|
||||||
|
return fieldValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLogicalTypeDate(fieldSchema)) {
|
||||||
|
return LocalDate.ofEpochDay(Long.parseLong(fieldValue.toString()));
|
||||||
|
}
|
||||||
|
return fieldValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an Avro field schema checks whether the field is of Logical Date Type or not.
|
||||||
|
*
|
||||||
|
* @param fieldSchema avro field schema
|
||||||
|
* @return boolean indicating whether fieldSchema is of Avro's Date Logical Type
|
||||||
|
*/
|
||||||
|
private static boolean isLogicalTypeDate(Schema fieldSchema) {
|
||||||
|
if (fieldSchema.getType() == Schema.Type.UNION) {
|
||||||
|
return fieldSchema.getTypes().stream().anyMatch(schema -> schema.getLogicalType() == LogicalTypes.date());
|
||||||
|
}
|
||||||
|
return fieldSchema.getLogicalType() == LogicalTypes.date();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a key generator class via reflection, passing in any configs needed.
|
* Create a key generator class via reflection, passing in any configs needed.
|
||||||
* <p>
|
* <p>
|
||||||
|
|||||||
61
hudi-spark/src/test/java/DataSourceUtilsTest.java
Normal file
61
hudi-spark/src/test/java/DataSourceUtilsTest.java
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.generic.GenericData;
|
||||||
|
import org.apache.avro.generic.GenericRecord;
|
||||||
|
import org.apache.hudi.DataSourceUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
public class DataSourceUtilsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAvroRecordsFieldConversion() {
|
||||||
|
// There are fields event_date1, event_date2, event_date3 with logical type as Date. event_date1 & event_date3 are
|
||||||
|
// of UNION schema type, which is a union of null and date type in different orders. event_date2 is non-union
|
||||||
|
// date type
|
||||||
|
String avroSchemaString = "{\"type\": \"record\"," + "\"name\": \"events\"," + "\"fields\": [ "
|
||||||
|
+ "{\"name\": \"event_date1\", \"type\" : [{\"type\" : \"int\", \"logicalType\" : \"date\"}, \"null\"]},"
|
||||||
|
+ "{\"name\": \"event_date2\", \"type\" : {\"type\": \"int\", \"logicalType\" : \"date\"}},"
|
||||||
|
+ "{\"name\": \"event_date3\", \"type\" : [\"null\", {\"type\" : \"int\", \"logicalType\" : \"date\"}]},"
|
||||||
|
+ "{\"name\": \"event_name\", \"type\": \"string\"},"
|
||||||
|
+ "{\"name\": \"event_organizer\", \"type\": \"string\"}"
|
||||||
|
+ "]}";
|
||||||
|
|
||||||
|
Schema avroSchema = new Schema.Parser().parse(avroSchemaString);
|
||||||
|
GenericRecord record = new GenericData.Record(avroSchema);
|
||||||
|
record.put("event_date1", 18000);
|
||||||
|
record.put("event_date2", 18001);
|
||||||
|
record.put("event_date3", 18002);
|
||||||
|
record.put("event_name", "Hudi Meetup");
|
||||||
|
record.put("event_organizer", "Hudi PMC");
|
||||||
|
|
||||||
|
assertEquals(LocalDate.ofEpochDay(18000).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date1",
|
||||||
|
true));
|
||||||
|
assertEquals(LocalDate.ofEpochDay(18001).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date2",
|
||||||
|
true));
|
||||||
|
assertEquals(LocalDate.ofEpochDay(18002).toString(), DataSourceUtils.getNestedFieldValAsString(record, "event_date3",
|
||||||
|
true));
|
||||||
|
assertEquals("Hudi Meetup", DataSourceUtils.getNestedFieldValAsString(record, "event_name", true));
|
||||||
|
assertEquals("Hudi PMC", DataSourceUtils.getNestedFieldValAsString(record, "event_organizer", true));
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user