[HUDI-1435] Fix bug in Marker File Reconciliation for Non-Partitioned datasets (#2301)
This commit is contained in:
committed by
GitHub
parent
facde4c16f
commit
069a1dcf24
@@ -460,7 +460,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
|||||||
if (!invalidDataPaths.isEmpty()) {
|
if (!invalidDataPaths.isEmpty()) {
|
||||||
LOG.info("Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
|
LOG.info("Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
|
||||||
Map<String, List<Pair<String, String>>> invalidPathsByPartition = invalidDataPaths.stream()
|
Map<String, List<Pair<String, String>>> invalidPathsByPartition = invalidDataPaths.stream()
|
||||||
.map(dp -> Pair.of(new Path(dp).getParent().toString(), new Path(basePath, dp).toString()))
|
.map(dp -> Pair.of(new Path(basePath, dp).getParent().toString(), new Path(basePath, dp).toString()))
|
||||||
.collect(Collectors.groupingBy(Pair::getKey));
|
.collect(Collectors.groupingBy(Pair::getKey));
|
||||||
|
|
||||||
// Ensure all files in delete list is actually present. This is mandatory for an eventually consistent FS.
|
// Ensure all files in delete list is actually present. This is mandatory for an eventually consistent FS.
|
||||||
|
|||||||
Reference in New Issue
Block a user