1
0

[HUDI-2170] [HUDI-1763] Always choose the latest record for HoodieRecordPayload (#3401)

This commit is contained in:
swuferhong
2021-08-11 10:20:55 +08:00
committed by GitHub
parent d1b4aa59bf
commit 5448cdde7e
17 changed files with 110 additions and 46 deletions

View File

@@ -176,6 +176,7 @@ public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPa
HoodieTableConfig tableConfig = table.getMetaClient().getTableConfig();
recordIterators.add(HoodieFileSliceReader.getFileSliceReader(baseFileReader, scanner, readerSchema,
tableConfig.getPayloadClass(),
tableConfig.getPreCombineField(),
tableConfig.populateMetaFields() ? Option.empty() : Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(),
tableConfig.getPartitionFieldProp()))));
} catch (IOException e) {

View File

@@ -58,7 +58,7 @@ public class SparkWriteHelper<T extends HoodieRecordPayload,R> extends AbstractW
return new Tuple2<>(key, record);
}).reduceByKey((rec1, rec2) -> {
@SuppressWarnings("unchecked")
T reducedData = (T) rec1.getData().preCombine(rec2.getData());
T reducedData = (T) rec2.getData().preCombine(rec1.getData());
HoodieKey reducedKey = rec1.getData().equals(reducedData) ? rec1.getKey() : rec2.getKey();
return new HoodieRecord<T>(reducedKey, reducedData);