1
0

[HUDI-1196] Update HoodieKey when deduplicating records with global index (#2248)

- Works only for overwrite payload (default)
- Does not alter current semantics otherwise 

Co-authored-by: Ryan Pifer <ryanpife@amazon.com>
This commit is contained in:
rmpifer
2020-12-01 13:50:46 -08:00
committed by GitHub
parent ac23d2587f
commit 78fd122594
2 changed files with 4 additions and 4 deletions

View File

@@ -59,10 +59,9 @@ public class SparkWriteHelper<T extends HoodieRecordPayload,R> extends AbstractW
}).reduceByKey((rec1, rec2) -> {
@SuppressWarnings("unchecked")
T reducedData = (T) rec1.getData().preCombine(rec2.getData());
// we cannot allow the user to change the key or partitionPath, since that will affect
// everything
// so pick it from one of the records.
return new HoodieRecord<T>(rec1.getKey(), reducedData);
HoodieKey reducedKey = rec1.getData().equals(reducedData) ? rec1.getKey() : rec2.getKey();
return new HoodieRecord<T>(reducedKey, reducedData);
}, parallelism).map(Tuple2::_2);
}