1
0

[HUDI-1435] Fix bug in Marker File Reconciliation for Non-Partitioned datasets (#2301)

This commit is contained in:
Balaji Varadarajan
2020-12-14 22:24:12 -08:00
committed by GitHub
parent facde4c16f
commit 069a1dcf24

View File

@@ -460,7 +460,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
if (!invalidDataPaths.isEmpty()) {
LOG.info("Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
Map<String, List<Pair<String, String>>> invalidPathsByPartition = invalidDataPaths.stream()
.map(dp -> Pair.of(new Path(dp).getParent().toString(), new Path(basePath, dp).toString()))
.map(dp -> Pair.of(new Path(basePath, dp).getParent().toString(), new Path(basePath, dp).toString()))
.collect(Collectors.groupingBy(Pair::getKey));
// Ensure all files in delete list is actually present. This is mandatory for an eventually consistent FS.