[HUDI-1740] Fix insert-overwrite API archival (#2784)
- fix problem of archiving replace commits - Fix problem when getting empty replacecommit.requested - Improved the logic of handling empty and non-empty requested/inflight commit files. Added unit tests to cover both empty and non-empty inflight files cases and cleaned up some unused test util methods Co-authored-by: yorkzero831 <yorkzero8312@gmail.com> Co-authored-by: zheren.yu <zheren.yu@paypay-corp.co.jp>
This commit is contained in:
@@ -241,6 +241,10 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
|
||||
.build();
|
||||
|
||||
// when using insert_overwrite or insert_overwrite_table
|
||||
// first commit may without replaceFileIds
|
||||
createReplaceMetadataWithoutReplaceFileId("000");
|
||||
|
||||
int numCommits = 4;
|
||||
int commitInstant = 100;
|
||||
for (int i = 0; i < numCommits; i++) {
|
||||
@@ -251,7 +255,7 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||
assertEquals(4, timeline.countInstants(), "Loaded 4 commits and the count should match");
|
||||
assertEquals(5, timeline.countInstants(), "Loaded 5 commits and the count should match");
|
||||
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
|
||||
boolean result = archiveLog.archiveIfRequired(context);
|
||||
assertTrue(result);
|
||||
@@ -513,13 +517,7 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
|
||||
HoodieCommitMetadata hoodieCommitMetadata = new HoodieCommitMetadata();
|
||||
hoodieCommitMetadata.setOperationType(WriteOperationType.INSERT);
|
||||
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-commitMetadata-converter")
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
|
||||
.build();
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
|
||||
|
||||
org.apache.hudi.avro.model.HoodieCommitMetadata expectedCommitMetadata = MetadataConversionUtils
|
||||
.convertCommitMetadata(hoodieCommitMetadata);
|
||||
@@ -681,6 +679,22 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
|
||||
assertEquals(notArchivedInstants, Arrays.asList(notArchivedInstant1, notArchivedInstant2, notArchivedInstant3), "");
|
||||
}
|
||||
|
||||
private void createReplaceMetadataWithoutReplaceFileId(String instantTime) throws Exception {
|
||||
|
||||
// create replace instant without a previous replace commit
|
||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
||||
.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE.toString())
|
||||
.setVersion(1)
|
||||
.setExtraMetadata(Collections.emptyMap())
|
||||
.build();
|
||||
HoodieReplaceCommitMetadata completeReplaceMetadata = new HoodieReplaceCommitMetadata();
|
||||
HoodieCommitMetadata inflightReplaceMetadata = new HoodieCommitMetadata();
|
||||
completeReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE);
|
||||
inflightReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE);
|
||||
HoodieTestTable.of(metaClient)
|
||||
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.of(inflightReplaceMetadata), completeReplaceMetadata);
|
||||
}
|
||||
|
||||
private void createReplaceMetadata(String instantTime) throws Exception {
|
||||
String fileId1 = "file-" + instantTime + "-1";
|
||||
String fileId2 = "file-" + instantTime + "-2";
|
||||
@@ -691,11 +705,13 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
|
||||
.setVersion(1)
|
||||
.setExtraMetadata(Collections.emptyMap())
|
||||
.build();
|
||||
HoodieReplaceCommitMetadata replaceMetadata = new HoodieReplaceCommitMetadata();
|
||||
replaceMetadata.addReplaceFileId(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1);
|
||||
replaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE);
|
||||
HoodieReplaceCommitMetadata completeReplaceMetadata = new HoodieReplaceCommitMetadata();
|
||||
HoodieCommitMetadata inflightReplaceMetadata = new HoodieCommitMetadata();
|
||||
completeReplaceMetadata.addReplaceFileId(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1);
|
||||
completeReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE);
|
||||
inflightReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE);
|
||||
HoodieTestTable.of(metaClient)
|
||||
.addReplaceCommit(instantTime, requestedReplaceMetadata, replaceMetadata)
|
||||
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.of(inflightReplaceMetadata), completeReplaceMetadata)
|
||||
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
|
||||
}
|
||||
|
||||
|
||||
@@ -843,10 +843,11 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
|
||||
|
||||
// make next replacecommit, with 1 clustering operation. logically delete p0. No change to p1
|
||||
// notice that clustering generates empty inflight commit files
|
||||
Map<String, String> partitionAndFileId002 = testTable.forReplaceCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0);
|
||||
String file2P0C1 = partitionAndFileId002.get(p0);
|
||||
Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata = generateReplaceCommitMetadata(p0, file1P0C0, file2P0C1);
|
||||
testTable.addReplaceCommit("00000000000002", replaceMetadata.getKey(), replaceMetadata.getValue());
|
||||
testTable.addReplaceCommit("00000000000002", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
|
||||
|
||||
// run cleaner
|
||||
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
|
||||
@@ -856,10 +857,11 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
|
||||
|
||||
// make next replacecommit, with 1 clustering operation. Replace data in p1. No change to p0
|
||||
// notice that clustering generates empty inflight commit files
|
||||
Map<String, String> partitionAndFileId003 = testTable.forReplaceCommit("00000000000003").getFileIdsWithBaseFilesInPartitions(p1);
|
||||
String file3P1C2 = partitionAndFileId003.get(p1);
|
||||
replaceMetadata = generateReplaceCommitMetadata(p1, file1P1C0, file3P1C2);
|
||||
testTable.addReplaceCommit("00000000000003", replaceMetadata.getKey(), replaceMetadata.getValue());
|
||||
testTable.addReplaceCommit("00000000000003", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
|
||||
|
||||
// run cleaner
|
||||
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
|
||||
@@ -870,10 +872,11 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
|
||||
|
||||
// make next replacecommit, with 1 clustering operation. Replace data in p0 again
|
||||
// notice that clustering generates empty inflight commit files
|
||||
Map<String, String> partitionAndFileId004 = testTable.forReplaceCommit("00000000000004").getFileIdsWithBaseFilesInPartitions(p0);
|
||||
String file4P0C3 = partitionAndFileId004.get(p0);
|
||||
replaceMetadata = generateReplaceCommitMetadata(p0, file2P0C1, file4P0C3);
|
||||
testTable.addReplaceCommit("00000000000004", replaceMetadata.getKey(), replaceMetadata.getValue());
|
||||
testTable.addReplaceCommit("00000000000004", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
|
||||
|
||||
// run cleaner
|
||||
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
|
||||
@@ -885,10 +888,11 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
|
||||
|
||||
// make next replacecommit, with 1 clustering operation. Replace all data in p1. no new files created
|
||||
// notice that clustering generates empty inflight commit files
|
||||
Map<String, String> partitionAndFileId005 = testTable.forReplaceCommit("00000000000005").getFileIdsWithBaseFilesInPartitions(p1);
|
||||
String file4P1C4 = partitionAndFileId005.get(p1);
|
||||
replaceMetadata = generateReplaceCommitMetadata(p0, file3P1C2, file4P1C4);
|
||||
testTable.addReplaceCommit("00000000000005", replaceMetadata.getKey(), replaceMetadata.getValue());
|
||||
testTable.addReplaceCommit("00000000000005", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, 2);
|
||||
assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
|
||||
|
||||
Reference in New Issue
Block a user