Fix for updating duplicate records in same/different files in same parition
This commit is contained in:
committed by
vinoth chandar
parent
fa73a911cc
commit
4d1fba24c9
@@ -394,6 +394,13 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
||||
if (v1._2().isPresent()) {
|
||||
String filename = v1._2().get();
|
||||
if (filename != null && !filename.isEmpty()) {
|
||||
// When you have a record in multiple files in the same partition, then rowKeyRecordPairRDD will have 2
|
||||
// entries with the same exact in memory copy of the HoodieRecord and the 2 separate filenames that the
|
||||
// record is found in. This will result in setting currentLocation 2 times and it will fail the second time.
|
||||
// This check will create a new in memory copy of the hoodie record.
|
||||
if (record.getCurrentLocation() != null) {
|
||||
record = new HoodieRecord<T>(record.getKey(), record.getData());
|
||||
}
|
||||
record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
|
||||
FSUtils.getFileId(filename)));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user