1
0

[HUDI-3322][HUDI-3343] Fixing Metadata Table Records Duplication Issues (#4716)

This change is addressing issues in regards to Metadata Table observing ingesting duplicated records leading to it persisting incorrect file-sizes for the files referred to in those records.

There are multiple issues that were leading to that:

- [HUDI-3322] Incorrect Rollback Plan generation: Rollback Plan generated for MOR tables was overly expansively listing all log-files with the latest base-instant as the ones that have been affected by the rollback, leading to invalid MT records being ingested referring to those.
- [HUDI-3343] Metadata Table including Uncommitted Log Files during Bootstrap: Since MT is bootstrapped at the end of the commit operation execution (after FS activity, but before committing to the timeline), it was actually incorrectly ingesting some files that were part of the intermediate state of the operation being committed.

This change will unblock Stack of PRs based off #4556
This commit is contained in:
Alexey Kudinkin
2022-02-02 13:10:51 -08:00
committed by GitHub
parent a68e1dc2db
commit 819e8018ff
19 changed files with 175 additions and 198 deletions

View File

@@ -270,8 +270,7 @@ public class TestTimelineUtils extends HoodieCommonTestHarness {
List<HoodieInstant> rollbacks = new ArrayList<>();
rollbacks.add(new HoodieInstant(false, actionType, commitTs));
HoodieRollbackStat rollbackStat = new HoodieRollbackStat(partition, deletedFiles, Collections.emptyList(), Collections.emptyMap(),
Collections.EMPTY_MAP);
HoodieRollbackStat rollbackStat = new HoodieRollbackStat(partition, deletedFiles, Collections.emptyList(), Collections.emptyMap());
List<HoodieRollbackStat> rollbackStats = new ArrayList<>();
rollbackStats.add(rollbackStat);
return TimelineMetadataUtils.convertRollbackMetadata(commitTs, Option.empty(), rollbacks, rollbackStats);

View File

@@ -556,7 +556,7 @@ public class TestIncrementalFSViewSync extends HoodieCommonTestHarness {
boolean isRestore) throws IOException {
Map<String, List<String>> partititonToFiles = deleteFiles(files);
List<HoodieRollbackStat> rollbackStats = partititonToFiles.entrySet().stream().map(e ->
new HoodieRollbackStat(e.getKey(), e.getValue(), new ArrayList<>(), new HashMap<>(), new HashMap<>())
new HoodieRollbackStat(e.getKey(), e.getValue(), new ArrayList<>(), new HashMap<>())
).collect(Collectors.toList());
List<HoodieInstant> rollbacks = new ArrayList<>();

View File

@@ -354,7 +354,6 @@ public class HoodieTestTable {
rollbackPartitionMetadata.setPartitionPath(entry.getKey());
rollbackPartitionMetadata.setSuccessDeleteFiles(entry.getValue());
rollbackPartitionMetadata.setFailedDeleteFiles(new ArrayList<>());
rollbackPartitionMetadata.setWrittenLogFiles(getWrittenLogFiles(instantTimeToDelete, entry));
long rollbackLogFileSize = 50 + RANDOM.nextInt(500);
String fileId = UUID.randomUUID().toString();
String logFileName = logFileName(instantTimeToDelete, fileId, 0);