1
0

Fix for updating duplicate records in same/different files in same parition

This commit is contained in:
Sunil Ramaiah
2018-04-23 15:23:42 -07:00
committed by vinoth chandar
parent fa73a911cc
commit 4d1fba24c9
3 changed files with 294 additions and 3 deletions

View File

@@ -394,6 +394,13 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
if (v1._2().isPresent()) {
String filename = v1._2().get();
if (filename != null && !filename.isEmpty()) {
// When you have a record in multiple files in the same partition, then rowKeyRecordPairRDD will have 2
// entries with the same exact in memory copy of the HoodieRecord and the 2 separate filenames that the
// record is found in. This will result in setting currentLocation 2 times and it will fail the second time.
// This check will create a new in memory copy of the hoodie record.
if (record.getCurrentLocation() != null) {
record = new HoodieRecord<T>(record.getKey(), record.getData());
}
record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
FSUtils.getFileId(filename)));
}