[HUDI-2909] Handle logical type in TimestampBasedKeyGenerator (#4203)
* [HUDI-2909] Handle logical type in TimestampBasedKeyGenerator Timestampbased key generator was returning diff values for row writer and non row writer path. this patch fixes it and is guarded by a config flag (`hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled`)
This commit is contained in:
@@ -124,7 +124,8 @@ public abstract class JavaExecutionStrategy<T extends HoodieRecordPayload<T>>
|
||||
if (strategyParams.containsKey(PLAN_STRATEGY_SORT_COLUMNS.key())) {
|
||||
return Option.of(new JavaCustomColumnsSortPartitioner(
|
||||
strategyParams.get(PLAN_STRATEGY_SORT_COLUMNS.key()).split(","),
|
||||
HoodieAvroUtils.addMetadataFields(schema)));
|
||||
HoodieAvroUtils.addMetadataFields(schema),
|
||||
getWriteConfig().isConsistentLogicalTimestampEnabled()));
|
||||
} else {
|
||||
return Option.empty();
|
||||
}
|
||||
|
||||
@@ -39,18 +39,20 @@ public class JavaCustomColumnsSortPartitioner<T extends HoodieRecordPayload>
|
||||
|
||||
private final String[] sortColumnNames;
|
||||
private final Schema schema;
|
||||
private final boolean consistentLogicalTimestampEnabled;
|
||||
|
||||
public JavaCustomColumnsSortPartitioner(String[] columnNames, Schema schema) {
|
||||
public JavaCustomColumnsSortPartitioner(String[] columnNames, Schema schema, boolean consistentLogicalTimestampEnabled) {
|
||||
this.sortColumnNames = columnNames;
|
||||
this.schema = schema;
|
||||
this.consistentLogicalTimestampEnabled = consistentLogicalTimestampEnabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HoodieRecord<T>> repartitionRecords(
|
||||
List<HoodieRecord<T>> records, int outputSparkPartitions) {
|
||||
return records.stream().sorted((o1, o2) -> {
|
||||
Object values1 = HoodieAvroUtils.getRecordColumnValues(o1, sortColumnNames, schema);
|
||||
Object values2 = HoodieAvroUtils.getRecordColumnValues(o2, sortColumnNames, schema);
|
||||
Object values1 = HoodieAvroUtils.getRecordColumnValues(o1, sortColumnNames, schema, consistentLogicalTimestampEnabled);
|
||||
Object values2 = HoodieAvroUtils.getRecordColumnValues(o2, sortColumnNames, schema, consistentLogicalTimestampEnabled);
|
||||
return values1.toString().compareTo(values2.toString());
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -64,13 +64,13 @@ public class TestJavaBulkInsertInternalPartitioner extends HoodieJavaClientTestB
|
||||
|
||||
List<HoodieRecord> records = generateTestRecordsForBulkInsert(1000);
|
||||
testBulkInsertInternalPartitioner(
|
||||
new JavaCustomColumnsSortPartitioner(sortColumns, HoodieTestDataGenerator.AVRO_SCHEMA),
|
||||
new JavaCustomColumnsSortPartitioner(sortColumns, HoodieTestDataGenerator.AVRO_SCHEMA, false),
|
||||
records, true, generatePartitionNumRecords(records), Option.of(columnComparator));
|
||||
}
|
||||
|
||||
private Comparator<HoodieRecord> getCustomColumnComparator(Schema schema, String[] sortColumns) {
|
||||
return Comparator.comparing(
|
||||
record -> HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema).toString());
|
||||
record -> HoodieAvroUtils.getRecordColumnValues(record, sortColumns, schema, false).toString());
|
||||
}
|
||||
|
||||
private void verifyRecordAscendingOrder(List<HoodieRecord> records,
|
||||
|
||||
Reference in New Issue
Block a user