[HUDI-808] Support cleaning bootstrap source data (#1870)
Co-authored-by: Wenning Ding <wenningd@amazon.com> Co-authored-by: Balaji Varadarajan <vbalaji@apache.org>
This commit is contained in:
@@ -18,14 +18,19 @@
|
||||
|
||||
package org.apache.hudi.table;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieActionInstant;
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanPartitionMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCleanerPlan;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||
import org.apache.hudi.client.HoodieWriteClient;
|
||||
import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.common.HoodieCleanStat;
|
||||
import org.apache.hudi.common.bootstrap.TestBootstrapIndex;
|
||||
import org.apache.hudi.common.fs.ConsistencyGuardConfig;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.BootstrapFileMapping;
|
||||
import org.apache.hudi.common.model.FileSlice;
|
||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
@@ -42,6 +47,8 @@ import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigrator;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator;
|
||||
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler;
|
||||
import org.apache.hudi.common.table.view.TableFileSystemView;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
@@ -55,6 +62,7 @@ import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.table.action.clean.CleanPlanner;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
|
||||
@@ -64,6 +72,7 @@ import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Paths;
|
||||
@@ -76,6 +85,7 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.UUID;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
@@ -467,7 +477,7 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
});
|
||||
}
|
||||
|
||||
return cleanMetadata1.getPartitionMetadata().values().stream()
|
||||
Map<String, HoodieCleanStat> cleanStatMap = cleanMetadata1.getPartitionMetadata().values().stream()
|
||||
.map(x -> new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath())
|
||||
.withFailedDeletes(x.getFailedDeleteFiles()).withSuccessfulDeletes(x.getSuccessDeleteFiles())
|
||||
.withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(x.getDeletePathPatterns())
|
||||
@@ -475,88 +485,144 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
? new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "000")
|
||||
: null))
|
||||
.build())
|
||||
.collect(Collectors.toList());
|
||||
.collect(Collectors.toMap(HoodieCleanStat::getPartitionPath, x -> x));
|
||||
cleanMetadata1.getBootstrapPartitionMetadata().values().forEach(x -> {
|
||||
HoodieCleanStat s = cleanStatMap.get(x.getPartitionPath());
|
||||
cleanStatMap.put(x.getPartitionPath(), new HoodieCleanStat.Builder().withPartitionPath(x.getPartitionPath())
|
||||
.withFailedDeletes(s.getFailedDeleteFiles()).withSuccessfulDeletes(s.getSuccessDeleteFiles())
|
||||
.withPolicy(HoodieCleaningPolicy.valueOf(x.getPolicy())).withDeletePathPattern(s.getDeletePathPatterns())
|
||||
.withEarliestCommitRetained(Option.ofNullable(s.getEarliestCommitToRetain())
|
||||
.map(y -> new HoodieInstant(State.COMPLETED, HoodieTimeline.COMMIT_ACTION, y)))
|
||||
.withSuccessfulDeleteBootstrapBaseFiles(x.getSuccessDeleteFiles())
|
||||
.withFailedDeleteBootstrapBaseFiles(x.getFailedDeleteFiles())
|
||||
.withDeleteBootstrapBasePathPatterns(x.getDeletePathPatterns()).build());
|
||||
});
|
||||
return new ArrayList<>(cleanStatMap.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by versions for COW table.
|
||||
*/
|
||||
@Test
|
||||
public void testKeepLatestFileVersions() throws IOException {
|
||||
testKeepLatestFileVersions(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by version logic for COW table with Bootstrap source file clean enable.
|
||||
*/
|
||||
@Test
|
||||
public void testBootstrapSourceFileCleanWithKeepLatestFileVersions() throws IOException {
|
||||
testKeepLatestFileVersions(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by versions logic.
|
||||
*/
|
||||
@Test
|
||||
public void testKeepLatestFileVersions() throws IOException {
|
||||
public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throws IOException {
|
||||
HoodieWriteConfig config =
|
||||
HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
|
||||
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
|
||||
.build();
|
||||
|
||||
// make 1 commit, with 1 file per partition
|
||||
HoodieTestUtils.createCommitFiles(basePath, "000");
|
||||
HoodieTestUtils.createCommitFiles(basePath, "00000000000001");
|
||||
|
||||
String file1P0C0 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
|
||||
String file1P1C0 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData() : null;
|
||||
|
||||
String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).get(0).getFileId()
|
||||
: UUID.randomUUID().toString();
|
||||
String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).get(0).getFileId()
|
||||
: UUID.randomUUID().toString();
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001", file1P0C0); // insert
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000001", file1P1C0); // insert
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config);
|
||||
assertEquals(0, hoodieCleanStatsOne.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000001",
|
||||
file1P1C0));
|
||||
|
||||
// make next commit, with 1 insert & 1 update per partition
|
||||
HoodieTestUtils.createCommitFiles(basePath, "001");
|
||||
HoodieTestUtils.createCommitFiles(basePath, "00000000000002");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
String file2P0C1 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002"); // insert
|
||||
String file2P1C1 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002"); // insert
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002", file1P0C0); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002", file1P1C0); // update
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config);
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size(), "Must clean 1 file");
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size(), "Must clean 1 file");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
// enableBootstrapSourceClean would delete the bootstrap base file as the same time
|
||||
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
|
||||
assertEquals(enableBootstrapSourceClean ? 2 : 1, cleanStat.getSuccessDeleteFiles().size()
|
||||
+ (cleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0
|
||||
: cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file");
|
||||
if (enableBootstrapSourceClean) {
|
||||
HoodieFileStatus fstatus =
|
||||
bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).get(0).getBoostrapFileStatus();
|
||||
// This ensures full path is recorded in metadata.
|
||||
assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()),
|
||||
"Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles()
|
||||
+ " but did not contain " + fstatus.getPath().getUri());
|
||||
assertFalse(new File(bootstrapMapping.get(
|
||||
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).get(0).getBoostrapFileStatus().getPath().getUri()).exists());
|
||||
}
|
||||
cleanStat = getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH);
|
||||
assertEquals(enableBootstrapSourceClean ? 2 : 1, cleanStat.getSuccessDeleteFiles().size()
|
||||
+ (cleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0
|
||||
: cleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least 1 file");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002",
|
||||
file2P1C1));
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
if (enableBootstrapSourceClean) {
|
||||
HoodieFileStatus fstatus =
|
||||
bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).get(0).getBoostrapFileStatus();
|
||||
// This ensures full path is recorded in metadata.
|
||||
assertTrue(cleanStat.getSuccessDeleteBootstrapBaseFiles().contains(fstatus.getPath().getUri()),
|
||||
"Successful delete files were " + cleanStat.getSuccessDeleteBootstrapBaseFiles()
|
||||
+ " but did not contain " + fstatus.getPath().getUri());
|
||||
assertFalse(new File(bootstrapMapping.get(
|
||||
HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).get(0).getBoostrapFileStatus().getPath().getUri()).exists());
|
||||
}
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH,
|
||||
"000", file1P1C0));
|
||||
"00000000000001", file1P1C0));
|
||||
|
||||
// make next commit, with 2 updates to existing files, and 1 insert
|
||||
HoodieTestUtils.createCommitFiles(basePath, "002");
|
||||
HoodieTestUtils.createCommitFiles(basePath, "00000000000003");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003", file1P0C0); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003", file2P0C1); // update
|
||||
String file3P0C2 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003");
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config);
|
||||
assertEquals(2,
|
||||
getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
|
||||
.getSuccessDeleteFiles().size(), "Must clean two files");
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file1P0C0));
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003",
|
||||
file3P0C2));
|
||||
|
||||
// No cleaning on partially written file, with no commit.
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
|
||||
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000004", file3P0C2); // update
|
||||
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
|
||||
assertEquals(0, hoodieCleanStatsFour.size(), "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003",
|
||||
file3P0C2));
|
||||
}
|
||||
|
||||
@@ -604,7 +670,7 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUpgradeDowngrade() {
|
||||
public void testCleanMetadataUpgradeDowngrade() {
|
||||
String instantTime = "000";
|
||||
|
||||
String partition1 = DEFAULT_PARTITION_PATHS[0];
|
||||
@@ -693,6 +759,68 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
assertEquals(policies1, policies2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCleanPlanUpgradeDowngrade() {
|
||||
String instantTime = "000";
|
||||
|
||||
String partition1 = DEFAULT_PARTITION_PATHS[0];
|
||||
String partition2 = DEFAULT_PARTITION_PATHS[1];
|
||||
|
||||
String fileName1 = "data1_1_000.parquet";
|
||||
String fileName2 = "data2_1_000.parquet";
|
||||
|
||||
Map<String, List<String>> filesToBeCleanedPerPartition = new HashMap<>();
|
||||
filesToBeCleanedPerPartition.put(partition1, Arrays.asList(fileName1));
|
||||
filesToBeCleanedPerPartition.put(partition2, Arrays.asList(fileName2));
|
||||
|
||||
HoodieCleanerPlan version1Plan =
|
||||
HoodieCleanerPlan.newBuilder().setEarliestInstantToRetain(HoodieActionInstant.newBuilder()
|
||||
.setAction(HoodieTimeline.COMMIT_ACTION)
|
||||
.setTimestamp(instantTime).setState(State.COMPLETED.name()).build())
|
||||
.setPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
|
||||
.setFilesToBeDeletedPerPartition(filesToBeCleanedPerPartition)
|
||||
.setVersion(CleanPlanV1MigrationHandler.VERSION)
|
||||
.build();
|
||||
|
||||
// Upgrade and Verify version 2 plan
|
||||
HoodieCleanerPlan version2Plan =
|
||||
new CleanPlanMigrator(metaClient).upgradeToLatest(version1Plan, version1Plan.getVersion());
|
||||
assertEquals(version1Plan.getEarliestInstantToRetain(), version2Plan.getEarliestInstantToRetain());
|
||||
assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
|
||||
assertEquals(CleanPlanner.LATEST_CLEAN_PLAN_VERSION, version2Plan.getVersion());
|
||||
// Deprecated Field is not used.
|
||||
assertEquals(0, version2Plan.getFilesToBeDeletedPerPartition().size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(),
|
||||
version2Plan.getFilePathsToBeDeletedPerPartition().size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(),
|
||||
version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(),
|
||||
version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).size());
|
||||
assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition1), fileName1).toString(),
|
||||
version2Plan.getFilePathsToBeDeletedPerPartition().get(partition1).get(0).getFilePath());
|
||||
assertEquals(new Path(FSUtils.getPartitionPath(metaClient.getBasePath(), partition2), fileName2).toString(),
|
||||
version2Plan.getFilePathsToBeDeletedPerPartition().get(partition2).get(0).getFilePath());
|
||||
|
||||
// Downgrade and verify version 1 plan
|
||||
HoodieCleanerPlan gotVersion1Plan = new CleanPlanMigrator(metaClient).migrateToVersion(version2Plan,
|
||||
version2Plan.getVersion(), version1Plan.getVersion());
|
||||
assertEquals(version1Plan.getEarliestInstantToRetain(), gotVersion1Plan.getEarliestInstantToRetain());
|
||||
assertEquals(version1Plan.getPolicy(), version2Plan.getPolicy());
|
||||
assertEquals(version1Plan.getVersion(), gotVersion1Plan.getVersion());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().size(),
|
||||
gotVersion1Plan.getFilesToBeDeletedPerPartition().size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).size(),
|
||||
gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).size(),
|
||||
gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).size());
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0),
|
||||
gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition1).get(0));
|
||||
assertEquals(version1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0),
|
||||
gotVersion1Plan.getFilesToBeDeletedPerPartition().get(partition2).get(0));
|
||||
assertTrue(gotVersion1Plan.getFilePathsToBeDeletedPerPartition().isEmpty());
|
||||
assertNull(version1Plan.getFilePathsToBeDeletedPerPartition());
|
||||
}
|
||||
|
||||
private void testCleanMetadataPathEquality(HoodieCleanMetadata metadata, Map<String, Tuple3> expected) {
|
||||
|
||||
Map<String, HoodieCleanPartitionMetadata> partitionMetadataMap = metadata.getPartitionMetadata();
|
||||
@@ -708,47 +836,62 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files.
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for COW table.
|
||||
*/
|
||||
@Test
|
||||
public void testKeepLatestCommits() throws IOException {
|
||||
testKeepLatestCommits(false, false);
|
||||
testKeepLatestCommits(false, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files. Here the operations are simulated
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for COW table. Here the operations are simulated
|
||||
* such that first clean attempt failed after files were cleaned and a subsequent cleanup succeeds.
|
||||
*/
|
||||
@Test
|
||||
public void testKeepLatestCommitsWithFailureRetry() throws IOException {
|
||||
testKeepLatestCommits(true, false);
|
||||
testKeepLatestCommits(true, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files.
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for COW table.
|
||||
*/
|
||||
@Test
|
||||
public void testKeepLatestCommitsIncrMode() throws IOException {
|
||||
testKeepLatestCommits(false, true);
|
||||
testKeepLatestCommits(false, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for MOR table with Log files.
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for COW table with Bootstrap source file clean enable.
|
||||
*/
|
||||
private void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIncrementalClean) throws IOException {
|
||||
@Test
|
||||
public void testBootstrapSourceFileCleanWithKeepLatestCommits() throws IOException {
|
||||
testKeepLatestCommits(false, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test HoodieTable.clean() Cleaning by commit logic for COW table.
|
||||
*/
|
||||
private void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws IOException {
|
||||
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withIncrementalCleaningMode(enableIncrementalClean)
|
||||
.withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
|
||||
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
|
||||
.build();
|
||||
|
||||
// make 1 commit, with 1 file per partition
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "000");
|
||||
Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData() : null;
|
||||
|
||||
String file1P0C0 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
|
||||
String file1P1C0 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
|
||||
// make 1 commit, with 1 file per partition
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "00000000000001");
|
||||
|
||||
String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).get(0).getFileId()
|
||||
: UUID.randomUUID().toString();
|
||||
String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).get(0).getFileId()
|
||||
: UUID.randomUUID().toString();
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001", file1P0C0); // insert
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000001", file1P1C0); // insert
|
||||
|
||||
HoodieCommitMetadata commitMetadata = generateCommitMetadata(
|
||||
Collections.unmodifiableMap(new HashMap<String, List<String>>() {
|
||||
@@ -759,32 +902,32 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
})
|
||||
);
|
||||
metaClient.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"),
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000001",
|
||||
file1P1C0));
|
||||
|
||||
// make next commit, with 1 insert & 1 update per partition
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "001");
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "00000000000002");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
String file2P0C1 =
|
||||
HoodieTestUtils
|
||||
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
|
||||
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002"); // insert
|
||||
String file2P1C1 =
|
||||
HoodieTestUtils
|
||||
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
|
||||
.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002"); // insert
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002", file1P0C0); // update
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002", file1P1C0); // update
|
||||
commitMetadata = generateCommitMetadata(new HashMap<String, List<String>>() {
|
||||
{
|
||||
put(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
|
||||
@@ -792,99 +935,132 @@ public class TestCleaner extends HoodieClientTestBase {
|
||||
}
|
||||
});
|
||||
metaClient.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "001"),
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000002"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals(0, hoodieCleanStatsTwo.size(), "Must not scan any partitions and clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000002",
|
||||
file2P1C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "00000000000001",
|
||||
file1P1C0));
|
||||
|
||||
// make next commit, with 2 updates to existing files, and 1 insert
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "002");
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "00000000000003");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003", file1P0C0); // update
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003", file2P0C1); // update
|
||||
String file3P0C2 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003");
|
||||
|
||||
commitMetadata = generateCommitMetadata(CollectionUtils
|
||||
.createImmutableMap(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
|
||||
metaClient.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "002"),
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000003"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals(0, hoodieCleanStatsThree.size(),
|
||||
"Must not clean any file. We have to keep 1 version before the latest commit time to keep");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
|
||||
// make next commit, with 2 updates to existing files, and 1 insert
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "003");
|
||||
HoodieTestUtils.createInflightCommitFiles(basePath, "00000000000004");
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file1P0C0); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000004", file1P0C0); // update
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file2P0C1); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000004", file2P0C1); // update
|
||||
String file4P0C3 =
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003");
|
||||
HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000004");
|
||||
commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(
|
||||
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
|
||||
metaClient.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "003"),
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000004"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, simulateFailureRetry);
|
||||
assertEquals(1,
|
||||
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
|
||||
.size(), "Must not clean one old file");
|
||||
// enableBootstrapSourceClean would delete the bootstrap base file as the same time
|
||||
HoodieCleanStat partitionCleanStat =
|
||||
getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
|
||||
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
|
||||
assertEquals(enableBootstrapSourceClean ? 2 : 1, partitionCleanStat.getSuccessDeleteFiles().size()
|
||||
+ (partitionCleanStat.getSuccessDeleteBootstrapBaseFiles() == null ? 0
|
||||
: partitionCleanStat.getSuccessDeleteBootstrapBaseFiles().size()), "Must clean at least one old file");
|
||||
|
||||
assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000001",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
if (enableBootstrapSourceClean) {
|
||||
assertFalse(new File(bootstrapMapping.get(
|
||||
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).get(0).getBoostrapFileStatus().getPath().getUri()).exists());
|
||||
}
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003",
|
||||
file2P0C1));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000003",
|
||||
file3P0C2));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000004",
|
||||
file4P0C3));
|
||||
|
||||
// No cleaning on partially written file, with no commit.
|
||||
HoodieTestUtils
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
|
||||
.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000005", file3P0C2); // update
|
||||
commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
|
||||
CollectionUtils.createImmutableList(file3P0C2)));
|
||||
metaClient.getActiveTimeline().createNewInstant(
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "004"));
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000005"));
|
||||
metaClient.getActiveTimeline().transitionRequestedToInflight(
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "004"),
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000005"),
|
||||
Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, simulateFailureRetry);
|
||||
HoodieCleanStat cleanStat = getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
|
||||
assertEquals(0,
|
||||
cleanStat != null ? cleanStat.getSuccessDeleteFiles().size() : 0, "Must not clean any files");
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file1P0C0));
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
|
||||
assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "00000000000002",
|
||||
file2P0C1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate Bootstrap index, bootstrap base file and corresponding metaClient.
|
||||
* @return Partition to BootstrapFileMapping Map
|
||||
* @throws IOException
|
||||
*/
|
||||
private Map<String, List<BootstrapFileMapping>> generateBootstrapIndexAndSourceData() throws IOException {
|
||||
// create bootstrap source data path
|
||||
java.nio.file.Path sourcePath = tempDir.resolve("data");
|
||||
java.nio.file.Files.createDirectories(sourcePath);
|
||||
assertTrue(new File(sourcePath.toString()).exists());
|
||||
|
||||
// recreate metaClient with Bootstrap base path
|
||||
metaClient = HoodieTestUtils.init(basePath, getTableType(), sourcePath.toString());
|
||||
|
||||
// generate bootstrap index
|
||||
Map<String, List<BootstrapFileMapping>> bootstrapMapping = TestBootstrapIndex.generateBootstrapIndex(metaClient, sourcePath.toString(),
|
||||
new String[] {HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH}, 1);
|
||||
|
||||
for (Map.Entry<String, List<BootstrapFileMapping>> entry : bootstrapMapping.entrySet()) {
|
||||
new File(sourcePath.toString() + "/" + entry.getKey()).mkdirs();
|
||||
assertTrue(new File(entry.getValue().get(0).getBoostrapFileStatus().getPath().getUri()).createNewFile());
|
||||
}
|
||||
return bootstrapMapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Cleaning functionality of table.rollback() API.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user