1
0

[HUDI-406]: added default partition path in TimestampBasedKeyGenerator

This commit is contained in:
Pratyaksh Sharma
2020-01-03 16:51:39 +05:30
committed by Balaji Varadarajan
parent 2d5b79d96f
commit 8f935e779a
7 changed files with 24 additions and 27 deletions

View File

@@ -61,7 +61,7 @@ public class ComplexKeyGenerator extends KeyGenerator {
boolean keyIsNullEmpty = true;
StringBuilder recordKey = new StringBuilder();
for (String recordKeyField : recordKeyFields) {
String recordKeyValue = DataSourceUtils.getNullableNestedFieldValAsString(record, recordKeyField);
String recordKeyValue = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField, true);
if (recordKeyValue == null) {
recordKey.append(recordKeyField + ":" + NULL_RECORDKEY_PLACEHOLDER + ",");
} else if (recordKeyValue.isEmpty()) {
@@ -79,7 +79,7 @@ public class ComplexKeyGenerator extends KeyGenerator {
StringBuilder partitionPath = new StringBuilder();
for (String partitionPathField : partitionPathFields) {
String fieldVal = DataSourceUtils.getNullableNestedFieldValAsString(record, partitionPathField);
String fieldVal = DataSourceUtils.getNestedFieldValAsString(record, partitionPathField, true);
if (fieldVal == null || fieldVal.isEmpty()) {
partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + DEFAULT_PARTITION_PATH
: DEFAULT_PARTITION_PATH);

View File

@@ -52,29 +52,18 @@ import java.util.stream.Collectors;
*/
public class DataSourceUtils {
/**
* Obtain value of the provided nullable field as string, denoted by dot notation. e.g: a.b.c
*/
public static String getNullableNestedFieldValAsString(GenericRecord record, String fieldName) {
try {
return getNestedFieldValAsString(record, fieldName);
} catch (HoodieException e) {
return null;
}
}
/**
* Obtain value of the provided field as string, denoted by dot notation. e.g: a.b.c
*/
public static String getNestedFieldValAsString(GenericRecord record, String fieldName) {
Object obj = getNestedFieldVal(record, fieldName);
return obj.toString();
public static String getNestedFieldValAsString(GenericRecord record, String fieldName, boolean returnNullIfNotFound) {
Object obj = getNestedFieldVal(record, fieldName, returnNullIfNotFound);
return (obj == null) ? null : obj.toString();
}
/**
* Obtain value of the provided field, denoted by dot notation. e.g: a.b.c
*/
public static Object getNestedFieldVal(GenericRecord record, String fieldName) {
public static Object getNestedFieldVal(GenericRecord record, String fieldName, boolean returnNullIfNotFound) {
String[] parts = fieldName.split("\\.");
GenericRecord valueNode = record;
int i = 0;
@@ -96,9 +85,14 @@ public class DataSourceUtils {
valueNode = (GenericRecord) val;
}
}
throw new HoodieException(
if (returnNullIfNotFound) {
return null;
} else {
throw new HoodieException(
fieldName + "(Part -" + parts[i] + ") field not found in record. Acceptable fields were :"
+ valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
+ valueNode.getSchema().getFields().stream().map(Field::name).collect(Collectors.toList()));
}
}
/**

View File

@@ -37,7 +37,7 @@ public class NonpartitionedKeyGenerator extends SimpleKeyGenerator {
@Override
public HoodieKey getKey(GenericRecord record) {
String recordKey = DataSourceUtils.getNullableNestedFieldValAsString(record, recordKeyField);
String recordKey = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField, true);
if (recordKey == null || recordKey.isEmpty()) {
throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for field: \"" + recordKeyField + "\" cannot be null or empty.");
}

View File

@@ -51,12 +51,12 @@ public class SimpleKeyGenerator extends KeyGenerator {
throw new HoodieKeyException("Unable to find field names for record key or partition path in cfg");
}
String recordKey = DataSourceUtils.getNullableNestedFieldValAsString(record, recordKeyField);
String recordKey = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField, true);
if (recordKey == null || recordKey.isEmpty()) {
throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for field: \"" + recordKeyField + "\" cannot be null or empty.");
}
String partitionPath = DataSourceUtils.getNullableNestedFieldValAsString(record, partitionPathField);
String partitionPath = DataSourceUtils.getNestedFieldValAsString(record, partitionPathField, true);
if (partitionPath == null || partitionPath.isEmpty()) {
partitionPath = DEFAULT_PARTITION_PATH;
}

View File

@@ -98,7 +98,7 @@ private[hudi] object HoodieSparkSqlWriter {
val genericRecords: RDD[GenericRecord] = AvroConversionUtils.createRdd(df, structName, nameSpace)
val hoodieAllIncomingRecords = genericRecords.map(gr => {
val orderingVal = DataSourceUtils.getNestedFieldValAsString(
gr, parameters(PRECOMBINE_FIELD_OPT_KEY)).asInstanceOf[Comparable[_]]
gr, parameters(PRECOMBINE_FIELD_OPT_KEY), false).asInstanceOf[Comparable[_]]
DataSourceUtils.createHoodieRecord(gr,
orderingVal, keyGenerator.getKey(gr), parameters(PAYLOAD_CLASS_OPT_KEY))
}).toJavaRDD()

View File

@@ -313,7 +313,7 @@ public class DeltaSync implements Serializable {
JavaRDD<GenericRecord> avroRDD = avroRDDOptional.get();
JavaRDD<HoodieRecord> records = avroRDD.map(gr -> {
HoodieRecordPayload payload = DataSourceUtils.createPayload(cfg.payloadClassName, gr,
(Comparable) DataSourceUtils.getNestedFieldVal(gr, cfg.sourceOrderingField));
(Comparable) DataSourceUtils.getNestedFieldVal(gr, cfg.sourceOrderingField, false));
return new HoodieRecord<>(keyGenerator.getKey(gr), payload);
});

View File

@@ -81,7 +81,10 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
@Override
public HoodieKey getKey(GenericRecord record) {
Object partitionVal = DataSourceUtils.getNestedFieldVal(record, partitionPathField);
Object partitionVal = DataSourceUtils.getNestedFieldVal(record, partitionPathField, true);
if (partitionVal == null) {
partitionVal = 1L;
}
SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat);
partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
@@ -97,11 +100,11 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
unixTime = inputDateFormat.parse(partitionVal.toString()).getTime() / 1000;
} else {
throw new HoodieNotSupportedException(
"Unexpected type for partition field: " + partitionVal.getClass().getName());
"Unexpected type for partition field: " + partitionVal.getClass().getName());
}
Date timestamp = this.timestampType == TimestampType.EPOCHMILLISECONDS ? new Date(unixTime) : new Date(unixTime * 1000);
String recordKey = DataSourceUtils.getNullableNestedFieldValAsString(record, recordKeyField);
String recordKey = DataSourceUtils.getNestedFieldValAsString(record, recordKeyField, true);
if (recordKey == null || recordKey.isEmpty()) {
throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for field: \"" + recordKeyField + "\" cannot be null or empty.");
}