1
0

[HUDI-3365] Make sure Metadata Table records are updated appropriately on HDFS (#4739)

- This change makes sure MT records are updated appropriately on HDFS: previously after Log File append operations MT records were updated w/ just the size of the deltas being appended to the original files, which have been found to be the cause of issues in case of Rollbacks that were instead updating MT with records bearing the full file-size.

- To make sure that we hedge against similar issues going f/w, this PR alleviates this discrepancy and streamlines the flow of MT table always ingesting records bearing full file-sizes.
This commit is contained in:
Alexey Kudinkin
2022-03-07 12:38:27 -08:00
committed by GitHub
parent f0bcee3c01
commit a66fd40692
18 changed files with 415 additions and 255 deletions

View File

@@ -1077,6 +1077,8 @@ public class TestCleaner extends HoodieClientTestBase {
writeStat.setPartitionPath(partition);
writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
writeStat.setFileId(newFileId);
writeStat.setTotalWriteBytes(1);
writeStat.setFileSizeInBytes(1);
replaceMetadata.addWriteStat(partition, writeStat);
}
return Pair.of(requestedReplaceMetadata, replaceMetadata);
@@ -1756,6 +1758,8 @@ public class TestCleaner extends HoodieClientTestBase {
writeStat.setPartitionPath(partitionPath);
writeStat.setPath(partitionPath + "/" + getBaseFilename(instantTime, f));
writeStat.setFileId(f);
writeStat.setTotalWriteBytes(1);
writeStat.setFileSizeInBytes(1);
metadata.addWriteStat(partitionPath, writeStat);
}));
return metadata;

View File

@@ -102,6 +102,7 @@ import scala.Tuple2;
import static org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertLinesMatch;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
@@ -617,21 +618,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
Collections.sort(fsFileNames);
Collections.sort(metadataFilenames);
if ((fsFileNames.size() != metadataFilenames.size()) || (!fsFileNames.equals(metadataFilenames))) {
LOG.info("*** File system listing = " + Arrays.toString(fsFileNames.toArray()));
LOG.info("*** Metadata listing = " + Arrays.toString(metadataFilenames.toArray()));
for (String fileName : fsFileNames) {
if (!metadataFilenames.contains(fileName)) {
LOG.error(partition + "FsFilename " + fileName + " not found in Meta data");
}
}
for (String fileName : metadataFilenames) {
if (!fsFileNames.contains(fileName)) {
LOG.error(partition + "Metadata file " + fileName + " not found in original FS");
}
}
}
assertLinesMatch(fsFileNames, metadataFilenames);
assertEquals(fsStatuses.length, partitionToFilesMap.get(partitionPath.toString()).length);
// Block sizes should be valid