1
0

[HUDI-1740] Fix insert-overwrite API archival (#2784)

- fix problem of archiving replace commits
- Fix problem when getting empty replacecommit.requested
- Improved the logic of handling empty and non-empty requested/inflight commit files. Added unit tests to cover both empty and non-empty inflight files cases and cleaned up some unused test util methods

Co-authored-by: yorkzero831 <yorkzero8312@gmail.com>
Co-authored-by: zheren.yu <zheren.yu@paypay-corp.co.jp>
This commit is contained in:
Susu Dong
2021-05-22 05:52:13 +09:00
committed by GitHub
parent 99b14a78e3
commit 685f77b5dd
12 changed files with 169 additions and 107 deletions

View File

@@ -383,7 +383,7 @@ public class TestSimpleConcurrentFileWritesConflictResolutionStrategy extends Ho
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
HoodieTestTable.of(metaClient)
.addRequestedReplace(instantTime, requestedReplaceMetadata)
.addRequestedReplace(instantTime, Option.of(requestedReplaceMetadata))
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
@@ -413,7 +413,7 @@ public class TestSimpleConcurrentFileWritesConflictResolutionStrategy extends Ho
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
HoodieTestTable.of(metaClient)
.addReplaceCommit(instantTime, requestedReplaceMetadata, replaceMetadata)
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.empty(), replaceMetadata)
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}

View File

@@ -20,6 +20,7 @@ package org.apache.hudi.utils;
import static org.apache.hudi.common.util.CleanerUtils.convertCleanMetadata;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import java.io.IOException;
import java.util.ArrayList;
@@ -80,13 +81,43 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
@Test
public void testCompletedReplace() throws Exception {
String newCommitTime = HoodieTestTable.makeNewCommitTime();
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE);
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE, true);
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
new HoodieInstant(State.COMPLETED, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
assertEquals(metaEntry.getActionState(), State.COMPLETED.toString());
assertEquals(metaEntry.getHoodieReplaceCommitMetadata().getOperationType(), WriteOperationType.INSERT_OVERWRITE.toString());
}
@Test
public void testEmptyRequestedReplace() throws Exception {
String newCommitTime = HoodieTestTable.makeNewCommitTime();
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, false);
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
new HoodieInstant(State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
assertEquals(metaEntry.getActionState(), State.REQUESTED.toString());
assertNull(metaEntry.getHoodieRequestedReplaceMetadata());
}
@Test
public void testEmptyInflightReplace() throws Exception {
String newCommitTime = HoodieTestTable.makeNewCommitTime();
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, true);
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
assertEquals(metaEntry.getActionState(), State.INFLIGHT.toString());
assertNull(metaEntry.getHoodieInflightReplaceMetadata());
}
@Test
public void testNonEmptyInflightReplace() throws Exception {
String newCommitTime = HoodieTestTable.makeNewCommitTime();
createReplace(newCommitTime, WriteOperationType.INSERT_OVERWRITE_TABLE, false);
HoodieArchivedMetaEntry metaEntry = MetadataConversionUtils.createMetaWrapper(
new HoodieInstant(State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, newCommitTime), metaClient);
assertEquals(metaEntry.getActionState(), State.INFLIGHT.toString());
assertEquals(metaEntry.getHoodieInflightReplaceMetadata().getOperationType(), WriteOperationType.INSERT_OVERWRITE_TABLE.name());
}
@Test
public void testCompletedCommitOrDeltaCommit() throws Exception {
String newCommitTime = HoodieTestTable.makeNewCommitTime();
@@ -169,7 +200,8 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
private void createReplace(String instantTime, WriteOperationType writeOperationType) throws Exception {
private void createReplace(String instantTime, WriteOperationType writeOperationType, Boolean isClustering)
throws Exception {
String fileId1 = "file-1";
String fileId2 = "file-2";
@@ -182,18 +214,29 @@ public class TestMetadataConversionUtils extends HoodieCommonTestHarness {
writeStat.setFileId("file-1");
replaceMetadata.addWriteStat(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, writeStat);
replaceMetadata.setOperationType(writeOperationType);
// create replace instant to mark fileId1 as deleted
HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
requestedReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE.name());
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
// some cases requestedReplaceMetadata will be null
// e.g. insert_overwrite_table or insert_overwrite without clustering
HoodieRequestedReplaceMetadata requestedReplaceMetadata = null;
HoodieCommitMetadata inflightReplaceMetadata = null;
if (isClustering) {
requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
requestedReplaceMetadata.setOperationType(writeOperationType.name());
HoodieClusteringPlan clusteringPlan = new HoodieClusteringPlan();
HoodieClusteringGroup clusteringGroup = new HoodieClusteringGroup();
HoodieSliceInfo sliceInfo = new HoodieSliceInfo();
clusteringGroup.setSlices(Arrays.asList(sliceInfo));
clusteringPlan.setInputGroups(Arrays.asList(clusteringGroup));
requestedReplaceMetadata.setClusteringPlan(clusteringPlan);
requestedReplaceMetadata.setVersion(TimelineLayoutVersion.CURR_VERSION);
} else {
// inflightReplaceMetadata will be null in clustering but not null
// in insert_overwrite or insert_overwrite_table
inflightReplaceMetadata = new HoodieCommitMetadata();
inflightReplaceMetadata.setOperationType(writeOperationType);
inflightReplaceMetadata.setCompacted(false);
}
HoodieTestTable.of(metaClient)
.addReplaceCommit(instantTime, requestedReplaceMetadata, replaceMetadata)
.addReplaceCommit(instantTime, Option.ofNullable(requestedReplaceMetadata), Option.ofNullable(inflightReplaceMetadata), replaceMetadata)
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}