1
0

[HUDI-1518] Remove the logic that delete replaced file when archive (#3310)

* remove delete replaced file when archive

* done

* remove unsed import

* remove delete replaced files when archive realted UT

* code reviewed

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
zhangyue19921010
2021-08-12 01:54:44 +08:00
committed by GitHub
parent 4783176554
commit 9e8308527a
3 changed files with 0 additions and 162 deletions

View File

@@ -21,15 +21,12 @@ package org.apache.hudi.io;
import org.apache.hudi.avro.model.HoodieActionInstant;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.client.utils.MetadataConversionUtils;
import org.apache.hudi.common.HoodieCleanStat;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -50,7 +47,6 @@ import org.apache.hudi.table.HoodieTimelineArchiveLog;
import org.apache.hudi.testutils.HoodieClientTestHarness;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -233,45 +229,6 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
verifyInflightInstants(metaClient, 2);
}
@Test
public void testArchiveTableWithReplacedFiles() throws Exception {
HoodieTestUtils.init(hadoopConf, basePath);
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
.withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
.build();
// when using insert_overwrite or insert_overwrite_table
// first commit may without replaceFileIds
createReplaceMetadataWithoutReplaceFileId("000");
int numCommits = 4;
int commitInstant = 100;
for (int i = 0; i < numCommits; i++) {
createReplaceMetadata(String.valueOf(commitInstant));
commitInstant += 100;
}
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
assertEquals(5, timeline.countInstants(), "Loaded 5 commits and the count should match");
HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
boolean result = archiveLog.archiveIfRequired(context);
assertTrue(result);
FileStatus[] allFiles = metaClient.getFs().listStatus(new Path(basePath + "/" + HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH));
Set<String> allFileIds = Arrays.stream(allFiles).map(fs -> FSUtils.getFileIdFromFilePath(fs.getPath())).collect(Collectors.toSet());
// verify 100-1,200-1 are deleted by archival
assertFalse(allFileIds.contains("file-100-1"));
assertFalse(allFileIds.contains("file-200-1"));
assertTrue(allFileIds.contains("file-100-2"));
assertTrue(allFileIds.contains("file-200-2"));
assertTrue(allFileIds.contains("file-300-1"));
assertTrue(allFileIds.contains("file-400-1"));
}
@Test
public void testArchiveTableWithNoArchival() throws IOException {
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
@@ -679,42 +636,6 @@ public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
assertEquals(notArchivedInstants, Arrays.asList(notArchivedInstant1, notArchivedInstant2, notArchivedInstant3), "");
}
private void createReplaceMetadataWithoutReplaceFileId(String instantTime) throws Exception {
// create replace instant without a previous replace commit
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE.toString())
.setVersion(1)
.setExtraMetadata(Collections.emptyMap())
.build();
HoodieReplaceCommitMetadata completeReplaceMetadata = new HoodieReplaceCommitMetadata();
HoodieCommitMetadata inflightReplaceMetadata = new HoodieCommitMetadata();
completeReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE);
inflightReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE_TABLE);
HoodieTestTable.of(metaClient)
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.of(inflightReplaceMetadata), completeReplaceMetadata);
}
private void createReplaceMetadata(String instantTime) throws Exception {
String fileId1 = "file-" + instantTime + "-1";
String fileId2 = "file-" + instantTime + "-2";
// create replace instant to mark fileId1 as deleted
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
.setOperationType(WriteOperationType.INSERT_OVERWRITE.toString())
.setVersion(1)
.setExtraMetadata(Collections.emptyMap())
.build();
HoodieReplaceCommitMetadata completeReplaceMetadata = new HoodieReplaceCommitMetadata();
HoodieCommitMetadata inflightReplaceMetadata = new HoodieCommitMetadata();
completeReplaceMetadata.addReplaceFileId(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1);
completeReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE);
inflightReplaceMetadata.setOperationType(WriteOperationType.INSERT_OVERWRITE);
HoodieTestTable.of(metaClient)
.addReplaceCommit(instantTime, Option.of(requestedReplaceMetadata), Option.of(inflightReplaceMetadata), completeReplaceMetadata)
.withBaseFilesInPartition(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fileId1, fileId2);
}
private HoodieInstant createCleanMetadata(String instantTime, boolean inflightOnly) throws IOException {
HoodieCleanerPlan cleanerPlan = new HoodieCleanerPlan(new HoodieActionInstant("", "", ""), "", new HashMap<>(),
CleanPlanV2MigrationHandler.VERSION, new HashMap<>());