[HUDI-2909] Handle logical type in TimestampBasedKeyGenerator (#4203)
* [HUDI-2909] Handle logical type in TimestampBasedKeyGenerator Timestampbased key generator was returning diff values for row writer and non row writer path. this patch fixes it and is guarded by a config flag (`hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled`)
This commit is contained in:
@@ -108,7 +108,6 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
|
||||
return writeMetadata;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Execute clustering to write inputRecords into new files as defined by rules in strategy parameters.
|
||||
* The number of new file groups created is bounded by numOutputGroups.
|
||||
@@ -141,7 +140,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
|
||||
getWriteConfig(), HoodieAvroUtils.addMetadataFields(schema)));
|
||||
} else if (strategyParams.containsKey(PLAN_STRATEGY_SORT_COLUMNS.key())) {
|
||||
return Option.of(new RDDCustomColumnsSortPartitioner(strategyParams.get(PLAN_STRATEGY_SORT_COLUMNS.key()).split(","),
|
||||
HoodieAvroUtils.addMetadataFields(schema)));
|
||||
HoodieAvroUtils.addMetadataFields(schema), getWriteConfig().isConsistentLogicalTimestampEnabled()));
|
||||
} else {
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
@@ -39,15 +39,18 @@ public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
|
||||
|
||||
private final String[] sortColumnNames;
|
||||
private final SerializableSchema serializableSchema;
|
||||
private final boolean consistentLogicalTimestampEnabled;
|
||||
|
||||
public RDDCustomColumnsSortPartitioner(HoodieWriteConfig config) {
|
||||
this.serializableSchema = new SerializableSchema(new Schema.Parser().parse(config.getSchema()));
|
||||
this.sortColumnNames = getSortColumnName(config);
|
||||
this.consistentLogicalTimestampEnabled = config.isConsistentLogicalTimestampEnabled();
|
||||
}
|
||||
|
||||
public RDDCustomColumnsSortPartitioner(String[] columnNames, Schema schema) {
|
||||
public RDDCustomColumnsSortPartitioner(String[] columnNames, Schema schema, boolean consistentLogicalTimestampEnabled) {
|
||||
this.sortColumnNames = columnNames;
|
||||
this.serializableSchema = new SerializableSchema(schema);
|
||||
this.consistentLogicalTimestampEnabled = consistentLogicalTimestampEnabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -55,9 +58,10 @@ public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
|
||||
int outputSparkPartitions) {
|
||||
final String[] sortColumns = this.sortColumnNames;
|
||||
final SerializableSchema schema = this.serializableSchema;
|
||||
final boolean consistentLogicalTimestampEnabled = this.consistentLogicalTimestampEnabled;
|
||||
return records.sortBy(
|
||||
record -> {
|
||||
Object recordValue = HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema);
|
||||
Object recordValue = HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema, consistentLogicalTimestampEnabled);
|
||||
// null values are replaced with empty string for null_first order
|
||||
if (recordValue == null) {
|
||||
return StringUtils.EMPTY_STRING;
|
||||
@@ -66,7 +70,6 @@ public class RDDCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
|
||||
}
|
||||
},
|
||||
true, outputSparkPartitions);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user