1
0

[HUDI-995] Migrate HoodieTestUtils APIs to HoodieTestTable (#2167)

Remove APIs in `HoodieTestUtils`
- `createCommitFiles`
- `createDataFile`
- `createNewLogFile`
- `createCompactionRequest`

Migrated usages in `TestCleaner#testPendingCompactions`.

Also improved some API names in `HoodieTestTable`.
This commit is contained in:
Raymond Xu
2020-10-11 23:39:10 -07:00
committed by GitHub
parent c0472d3317
commit c5e10d668f
6 changed files with 112 additions and 156 deletions

View File

@@ -87,7 +87,7 @@ public class ITTestRepairsCommand extends AbstractShellIntegrationTest {
testTable.addCommit("20160401010101") testTable.addCommit("20160401010101")
.withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "1", hoodieRecords1) .withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "1", hoodieRecords1)
.withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "2", hoodieRecords2) .withInserts(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "2", hoodieRecords2)
.withLogFile(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH); .getFileIdWithLogFile(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "4", hoodieRecords1) testTable.withInserts(HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "4", hoodieRecords1)
.withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "6", hoodieRecords1); .withInserts(HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH, "6", hoodieRecords1);

View File

@@ -51,7 +51,6 @@ import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataMigra
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator; import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator;
import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler; import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanV1MigrationHandler;
import org.apache.hudi.common.table.view.TableFileSystemView; import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestTable; import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.common.testutils.HoodieTestUtils; import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.CleanerUtils; import org.apache.hudi.common.util.CleanerUtils;
@@ -155,7 +154,7 @@ public class TestCleaner extends HoodieClientTestBase {
assertTrue(table.getCompletedCleanTimeline().empty()); assertTrue(table.getCompletedCleanTimeline().empty());
HoodieIndex index = SparkHoodieIndex.createIndex(cfg); HoodieIndex index = SparkHoodieIndex.createIndex(cfg);
List<HoodieRecord> taggedRecords = ((JavaRDD<HoodieRecord>)index.tagLocation(jsc.parallelize(records, 1), context, table)).collect(); List<HoodieRecord> taggedRecords = ((JavaRDD<HoodieRecord>) index.tagLocation(jsc.parallelize(records, 1), context, table)).collect();
checkTaggedRecords(taggedRecords, newCommitTime); checkTaggedRecords(taggedRecords, newCommitTime);
} }
@@ -550,7 +549,7 @@ public class TestCleaner extends HoodieClientTestBase {
Map<String, String> partitionAndFileId002 = testTable.addCommit("00000000000002") Map<String, String> partitionAndFileId002 = testTable.addCommit("00000000000002")
.withBaseFilesInPartition(p0, file1P0C0) .withBaseFilesInPartition(p0, file1P0C0)
.withBaseFilesInPartition(p1, file1P1C0) .withBaseFilesInPartition(p1, file1P1C0)
.withBaseFilesInPartitions(p0, p1); .getFileIdsWithBaseFilesInPartitions(p0, p1);
List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, 1); List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, 1);
// enableBootstrapSourceClean would delete the bootstrap base file as the same time // enableBootstrapSourceClean would delete the bootstrap base file as the same time
@@ -592,7 +591,7 @@ public class TestCleaner extends HoodieClientTestBase {
// make next commit, with 2 updates to existing files, and 1 insert // make next commit, with 2 updates to existing files, and 1 insert
String file3P0C2 = testTable.addCommit("00000000000003") String file3P0C2 = testTable.addCommit("00000000000003")
.withBaseFilesInPartition(p0, file1P0C0, file2P0C1) .withBaseFilesInPartition(p0, file1P0C0, file2P0C1)
.withBaseFilesInPartitions(p0).get(p0); .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, 3); List<HoodieCleanStat> hoodieCleanStatsThree = runCleaner(config, 3);
assertEquals(2, assertEquals(2,
getCleanStat(hoodieCleanStatsThree, p0) getCleanStat(hoodieCleanStatsThree, p0)
@@ -625,7 +624,7 @@ public class TestCleaner extends HoodieClientTestBase {
String p0 = "2020/01/01"; String p0 = "2020/01/01";
// Make 3 files, one base file and 2 log files associated with base file // Make 3 files, one base file and 2 log files associated with base file
String file1P0 = testTable.addDeltaCommit("000").withBaseFilesInPartitions(p0).get(p0); String file1P0 = testTable.addDeltaCommit("000").getFileIdsWithBaseFilesInPartitions(p0).get(p0);
testTable.forDeltaCommit("000") testTable.forDeltaCommit("000")
.withLogFile(p0, file1P0, 1) .withLogFile(p0, file1P0, 1)
.withLogFile(p0, file1P0, 2); .withLogFile(p0, file1P0, 2);
@@ -865,7 +864,7 @@ public class TestCleaner extends HoodieClientTestBase {
assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0)); assertTrue(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
// make next commit, with 1 insert & 1 update per partition // make next commit, with 1 insert & 1 update per partition
Map<String, String> partitionAndFileId002 = testTable.addInflightCommit("00000000000002").withBaseFilesInPartitions(p0, p1); Map<String, String> partitionAndFileId002 = testTable.addInflightCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0, p1);
String file2P0C1 = partitionAndFileId002.get(p0); String file2P0C1 = partitionAndFileId002.get(p0);
String file2P1C1 = partitionAndFileId002.get(p1); String file2P1C1 = partitionAndFileId002.get(p1);
testTable.forCommit("00000000000002").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0); testTable.forCommit("00000000000002").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
@@ -889,7 +888,7 @@ public class TestCleaner extends HoodieClientTestBase {
String file3P0C2 = testTable.addInflightCommit("00000000000003") String file3P0C2 = testTable.addInflightCommit("00000000000003")
.withBaseFilesInPartition(p0, file1P0C0) .withBaseFilesInPartition(p0, file1P0C0)
.withBaseFilesInPartition(p0, file2P0C1) .withBaseFilesInPartition(p0, file2P0C1)
.withBaseFilesInPartitions(p0).get(p0); .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
commitMetadata = generateCommitMetadata(CollectionUtils commitMetadata = generateCommitMetadata(CollectionUtils
.createImmutableMap(p0, .createImmutableMap(p0,
CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2))); CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
@@ -906,7 +905,7 @@ public class TestCleaner extends HoodieClientTestBase {
String file4P0C3 = testTable.addInflightCommit("00000000000004") String file4P0C3 = testTable.addInflightCommit("00000000000004")
.withBaseFilesInPartition(p0, file1P0C0) .withBaseFilesInPartition(p0, file1P0C0)
.withBaseFilesInPartition(p0, file2P0C1) .withBaseFilesInPartition(p0, file2P0C1)
.withBaseFilesInPartitions(p0).get(p0); .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap( commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(
p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3))); p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
metaClient.getActiveTimeline().saveAsComplete( metaClient.getActiveTimeline().saveAsComplete(
@@ -1021,7 +1020,7 @@ public class TestCleaner extends HoodieClientTestBase {
* Test Keep Latest Commits when there are pending compactions. * Test Keep Latest Commits when there are pending compactions.
*/ */
@Test @Test
public void testKeepLatestCommitsWithPendingCompactions() throws IOException { public void testKeepLatestCommitsWithPendingCompactions() throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder() .withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()) .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
@@ -1043,7 +1042,7 @@ public class TestCleaner extends HoodieClientTestBase {
*/ */
@ParameterizedTest @ParameterizedTest
@ValueSource(booleans = {false, true}) @ValueSource(booleans = {false, true})
public void testKeepLatestVersionsWithPendingCompactions(boolean retryFailure) throws IOException { public void testKeepLatestVersionsWithPendingCompactions(boolean retryFailure) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig config =
HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
.withCompactionConfig(HoodieCompactionConfig.newBuilder() .withCompactionConfig(HoodieCompactionConfig.newBuilder()
@@ -1098,73 +1097,82 @@ public class TestCleaner extends HoodieClientTestBase {
* @param expNumFilesDeleted Number of files deleted * @param expNumFilesDeleted Number of files deleted
*/ */
private void testPendingCompactions(HoodieWriteConfig config, int expNumFilesDeleted, private void testPendingCompactions(HoodieWriteConfig config, int expNumFilesDeleted,
int expNumFilesUnderCompactionDeleted, boolean retryFailure) throws IOException { int expNumFilesUnderCompactionDeleted, boolean retryFailure) throws Exception {
HoodieTableMetaClient metaClient = HoodieTableMetaClient metaClient =
HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ); HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.MERGE_ON_READ);
String[] instants = new String[] {"000", "001", "003", "005", "007", "009", "011", "013"}; final String partition = "2016/03/15";
String[] compactionInstants = new String[] {"002", "004", "006", "008", "010"}; Map<String, String> expFileIdToPendingCompaction = new HashMap<String, String>() {
Map<String, String> expFileIdToPendingCompaction = new HashMap<>(); {
Map<String, String> fileIdToLatestInstantBeforeCompaction = new HashMap<>(); put("fileId2", "004");
Map<String, List<FileSlice>> compactionInstantsToFileSlices = new HashMap<>(); put("fileId3", "006");
put("fileId4", "008");
for (String instant : instants) { put("fileId5", "010");
HoodieTestUtils.createCommitFiles(basePath, instant);
} }
};
Map<String, String> fileIdToLatestInstantBeforeCompaction = new HashMap<String, String>() {
{
put("fileId1", "000");
put("fileId2", "000");
put("fileId3", "001");
put("fileId4", "003");
put("fileId5", "005");
put("fileId6", "009");
put("fileId7", "011");
}
};
// Generate 7 file-groups. First one has only one slice and no pending compaction. File Slices (2 - 5) has // Generate 7 file-groups. First one has only one slice and no pending compaction. File Slices (2 - 5) has
// multiple versions with pending compaction. File Slices (6 - 7) have multiple file-slices but not under // multiple versions with pending compaction. File Slices (6 - 7) have multiple file-slices but not under
// compactions // compactions
// FileIds 2-5 will be under compaction // FileIds 2-5 will be under compaction
int maxNumFileIds = 7; HoodieTestTable.of(metaClient)
String[] fileIds = new String[] {"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"}; .addCommit("000")
int maxNumFileIdsForCompaction = 4; .withBaseFilesInPartition(partition, "fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7")
for (int i = 0; i < maxNumFileIds; i++) { .withLogFile(partition, "fileId1", 1, 2)
final String fileId = HoodieTestUtils.createDataFile(basePath, .withLogFile(partition, "fileId2", 1, 2)
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], fileIds[i]); .withLogFile(partition, "fileId3", 1, 2)
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], .withLogFile(partition, "fileId4", 1, 2)
fileId, Option.empty()); .withLogFile(partition, "fileId5", 1, 2)
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], .withLogFile(partition, "fileId6", 1, 2)
fileId, Option.of(2)); .withLogFile(partition, "fileId7", 1, 2)
fileIdToLatestInstantBeforeCompaction.put(fileId, instants[0]); .addCommit("001")
for (int j = 1; j <= i; j++) { .withBaseFilesInPartition(partition, "fileId3", "fileId4", "fileId5", "fileId6", "fileId7")
if (j == i && j <= maxNumFileIdsForCompaction) { .withLogFile(partition, "fileId3", 1, 2)
expFileIdToPendingCompaction.put(fileId, compactionInstants[j]); .withLogFile(partition, "fileId4", 1, 2)
metaClient = HoodieTableMetaClient.reload(metaClient); .withLogFile(partition, "fileId5", 1, 2)
HoodieTable table = HoodieSparkTable.create(config, context, metaClient); .withLogFile(partition, "fileId6", 1, 2)
FileSlice slice = .withLogFile(partition, "fileId7", 1, 2)
table.getSliceView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH) .addCommit("003")
.filter(fs -> fs.getFileId().equals(fileId)).findFirst().get(); .withBaseFilesInPartition(partition, "fileId4", "fileId5", "fileId6", "fileId7")
List<FileSlice> slices = new ArrayList<>(); .withLogFile(partition, "fileId4", 1, 2)
if (compactionInstantsToFileSlices.containsKey(compactionInstants[j])) { .withLogFile(partition, "fileId5", 1, 2)
slices = compactionInstantsToFileSlices.get(compactionInstants[j]); .withLogFile(partition, "fileId6", 1, 2)
} .withLogFile(partition, "fileId7", 1, 2)
slices.add(slice); .addRequestedCompaction("004", new FileSlice(partition, "000", "fileId2"))
compactionInstantsToFileSlices.put(compactionInstants[j], slices); .withLogFile(partition, "fileId2", 1, 2)
// Add log-files to simulate delta-commits after pending compaction .addCommit("005")
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, .withBaseFilesInPartition(partition, "fileId5", "fileId6", "fileId7")
compactionInstants[j], fileId, Option.empty()); .withLogFile(partition, "fileId5", 1, 2)
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, .withLogFile(partition, "fileId6", 1, 2)
compactionInstants[j], fileId, Option.of(2)); .withLogFile(partition, "fileId7", 1, 2)
} else { .addRequestedCompaction("006", new FileSlice(partition, "001", "fileId3"))
HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], .withLogFile(partition, "fileId3", 1, 2)
fileId); .addCommit("007")
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, .withBaseFilesInPartition(partition, "fileId6", "fileId7")
instants[j], fileId, Option.empty()); .withLogFile(partition, "fileId6", 1, 2)
HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, .withLogFile(partition, "fileId7", 1, 2)
instants[j], fileId, Option.of(2)); .addRequestedCompaction("008", new FileSlice(partition, "003", "fileId4"))
fileIdToLatestInstantBeforeCompaction.put(fileId, instants[j]); .withLogFile(partition, "fileId4", 1, 2)
} .addCommit("009")
} .withBaseFilesInPartition(partition, "fileId6", "fileId7")
} .withLogFile(partition, "fileId6", 1, 2)
.withLogFile(partition, "fileId7", 1, 2)
// Setup pending compaction plans .addRequestedCompaction("010", new FileSlice(partition, "005", "fileId5"))
for (String instant : compactionInstants) { .withLogFile(partition, "fileId5", 1, 2)
List<FileSlice> fileSliceList = compactionInstantsToFileSlices.get(instant); .addCommit("011")
if (null != fileSliceList) { .withBaseFilesInPartition(partition, "fileId7")
HoodieTestUtils.createCompactionRequest(metaClient, instant, fileSliceList.stream() .withLogFile(partition, "fileId7", 1, 2)
.map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs)).collect(Collectors.toList())); .addCommit("013");
}
}
// Clean now // Clean now
metaClient = HoodieTableMetaClient.reload(metaClient); metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -1177,7 +1185,7 @@ public class TestCleaner extends HoodieClientTestBase {
expFileIdToPendingCompaction.forEach((fileId, value) -> { expFileIdToPendingCompaction.forEach((fileId, value) -> {
String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId); String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getSliceView() Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getSliceView()
.getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction, .getLatestFileSlicesBeforeOrOn(partition, baseInstantForCompaction,
true) true)
.filter(fs -> fs.getFileId().equals(fileId)).findFirst()); .filter(fs -> fs.getFileId().equals(fileId)).findFirst());
assertTrue(fileSliceForCompaction.isPresent(), "Base Instant for Compaction must be preserved"); assertTrue(fileSliceForCompaction.isPresent(), "Base Instant for Compaction must be preserved");

View File

@@ -59,10 +59,10 @@ public class TestMarkerBasedRollbackStrategy extends HoodieClientTestBase {
// given: wrote some base files and corresponding markers // given: wrote some base files and corresponding markers
HoodieTestTable testTable = HoodieTestTable.of(metaClient); HoodieTestTable testTable = HoodieTestTable.of(metaClient);
String f0 = testTable.addRequestedCommit("000") String f0 = testTable.addRequestedCommit("000")
.withBaseFilesInPartitions("partA").get("partA"); .getFileIdsWithBaseFilesInPartitions("partA").get("partA");
String f1 = testTable.addCommit("001") String f1 = testTable.addCommit("001")
.withBaseFilesInPartition("partA", f0) .withBaseFilesInPartition("partA", f0)
.withBaseFilesInPartitions("partB").get("partB"); .getFileIdsWithBaseFilesInPartitions("partB").get("partB");
String f2 = "f2"; String f2 = "f2";
testTable.forCommit("001") testTable.forCommit("001")
.withMarkerFile("partA", f0, IOType.MERGE) .withMarkerFile("partA", f0, IOType.MERGE)
@@ -90,10 +90,10 @@ public class TestMarkerBasedRollbackStrategy extends HoodieClientTestBase {
// given: wrote some base + log files and corresponding markers // given: wrote some base + log files and corresponding markers
HoodieTestTable testTable = HoodieTestTable.of(metaClient); HoodieTestTable testTable = HoodieTestTable.of(metaClient);
String f2 = testTable.addRequestedDeltaCommit("000") String f2 = testTable.addRequestedDeltaCommit("000")
.withBaseFilesInPartitions("partA").get("partA"); .getFileIdsWithBaseFilesInPartitions("partA").get("partA");
String f1 = testTable.addDeltaCommit("001") String f1 = testTable.addDeltaCommit("001")
.withLogFile("partA", f2) .withLogFile("partA", f2)
.withBaseFilesInPartitions("partB").get("partB"); .getFileIdsWithBaseFilesInPartitions("partB").get("partB");
String f3 = "f3"; String f3 = "f3";
String f4 = "f4"; String f4 = "f4";
testTable.forDeltaCommit("001") testTable.forDeltaCommit("001")

View File

@@ -21,12 +21,19 @@ package org.apache.hudi.common.testutils;
import org.apache.hudi.avro.model.HoodieCleanMetadata; import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCleanerPlan; import org.apache.hudi.avro.model.HoodieCleanerPlan;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata; import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.IOType; import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline; import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@@ -189,6 +196,20 @@ public class HoodieTestTable {
return this; return this;
} }
public HoodieTestTable addRequestedCompaction(String instantTime, HoodieCompactionPlan compactionPlan) throws IOException {
HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instantTime);
metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant,
TimelineMetadataUtils.serializeCompactionPlan(compactionPlan));
return addRequestedCompaction(instantTime);
}
public HoodieTestTable addRequestedCompaction(String instantTime, FileSlice... fileSlices) throws IOException {
HoodieCompactionPlan plan = CompactionUtils
.buildFromFileSlices(Arrays.stream(fileSlices).map(fs -> Pair.of(fs.getPartitionPath(), fs))
.collect(Collectors.toList()), Option.empty(), Option.empty());
return addRequestedCompaction(instantTime, plan);
}
public HoodieTestTable addCompaction(String instantTime) throws IOException { public HoodieTestTable addCompaction(String instantTime) throws IOException {
createRequestedCompaction(basePath, instantTime); createRequestedCompaction(basePath, instantTime);
createInflightCompaction(basePath, instantTime); createInflightCompaction(basePath, instantTime);
@@ -245,7 +266,7 @@ public class HoodieTestTable {
* *
* @return A {@link Map} of partition and its newly inserted file's id. * @return A {@link Map} of partition and its newly inserted file's id.
*/ */
public Map<String, String> withBaseFilesInPartitions(String... partitions) throws Exception { public Map<String, String> getFileIdsWithBaseFilesInPartitions(String... partitions) throws Exception {
Map<String, String> partitionFileIdMap = new HashMap<>(); Map<String, String> partitionFileIdMap = new HashMap<>();
for (String p : partitions) { for (String p : partitions) {
String fileId = UUID.randomUUID().toString(); String fileId = UUID.randomUUID().toString();
@@ -277,7 +298,7 @@ public class HoodieTestTable {
return this; return this;
} }
public String withLogFile(String partitionPath) throws Exception { public String getFileIdWithLogFile(String partitionPath) throws Exception {
String fileId = UUID.randomUUID().toString(); String fileId = UUID.randomUUID().toString();
withLogFile(partitionPath, fileId); withLogFile(partitionPath, fileId);
return fileId; return fileId;
@@ -287,8 +308,10 @@ public class HoodieTestTable {
return withLogFile(partitionPath, fileId, 0); return withLogFile(partitionPath, fileId, 0);
} }
public HoodieTestTable withLogFile(String partitionPath, String fileId, int version) throws Exception { public HoodieTestTable withLogFile(String partitionPath, String fileId, int... versions) throws Exception {
for (int version : versions) {
FileCreateUtils.createLogFile(basePath, partitionPath, currentInstantTime, fileId, version); FileCreateUtils.createLogFile(basePath, partitionPath, currentInstantTime, fileId, version);
}
return this; return this;
} }

View File

@@ -18,9 +18,6 @@
package org.apache.hudi.common.testutils; package org.apache.hudi.common.testutils;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieAvroPayload; import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieFileFormat; import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.HoodieTableType;
@@ -28,27 +25,15 @@ import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats; import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient; import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Input;
import com.esotericsoftware.kryo.io.Output; import com.esotericsoftware.kryo.io.Output;
import com.esotericsoftware.kryo.serializers.JavaSerializer; import com.esotericsoftware.kryo.serializers.JavaSerializer;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
@@ -122,66 +107,6 @@ public class HoodieTestUtils {
return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties); return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
} }
/**
* @deprecated Use {@link HoodieTestTable} instead.
*/
public static void createCommitFiles(String basePath, String... instantTimes) throws IOException {
for (String instantTime : instantTimes) {
new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeRequestedCommitFileName(instantTime)).createNewFile();
new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+ HoodieTimeline.makeInflightCommitFileName(instantTime)).createNewFile();
new File(
basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(instantTime))
.createNewFile();
}
}
/**
* @deprecated Use {@link HoodieTestTable} instead.
*/
public static String createDataFile(String basePath, String partitionPath, String instantTime, String fileID)
throws IOException {
String folderPath = basePath + "/" + partitionPath + "/";
new File(folderPath).mkdirs();
new File(folderPath + FSUtils.makeDataFileName(instantTime, DEFAULT_WRITE_TOKEN, fileID)).createNewFile();
return fileID;
}
/**
* @deprecated Use {@link HoodieTestTable} instead.
*/
public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String instantTime,
String fileID, Option<Integer> version) throws IOException {
String folderPath = basePath + "/" + partitionPath + "/";
boolean makeDir = fs.mkdirs(new Path(folderPath));
if (!makeDir) {
throw new IOException("cannot create directory for path " + folderPath);
}
boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log", instantTime,
version.orElse(DEFAULT_LOG_VERSION), HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
if (!createFile) {
throw new IOException(
StringUtils.format("cannot create data file for commit %s and fileId %s", instantTime, fileID));
}
return fileID;
}
/**
* TODO: incorporate into {@link HoodieTestTable}.
*
* @deprecated Use {@link HoodieTestTable} instead.
*/
public static void createCompactionRequest(HoodieTableMetaClient metaClient, String instant,
List<Pair<String, FileSlice>> fileSliceList) throws IOException {
HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(fileSliceList, Option.empty(), Option.empty());
HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant);
metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant,
TimelineMetadataUtils.serializeCompactionPlan(plan));
}
public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz) { public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz) {
// Using Kyro as the default serializer in Spark Jobs // Using Kyro as the default serializer in Spark Jobs
Kryo kryo = new Kryo(); Kryo kryo = new Kryo();

View File

@@ -88,7 +88,7 @@ public class TestHoodieROTablePathFilter extends HoodieCommonTestHarness {
public void testPartitionPathsAsNonHoodiePaths() throws Exception { public void testPartitionPathsAsNonHoodiePaths() throws Exception {
final String p1 = "2017/01/01"; final String p1 = "2017/01/01";
final String p2 = "2017/01/02"; final String p2 = "2017/01/02";
testTable.addCommit("001").withBaseFilesInPartitions(p1, p2); testTable.addCommit("001").getFileIdsWithBaseFilesInPartitions(p1, p2);
Path partitionPath1 = testTable.getPartitionPath(p1).getParent(); Path partitionPath1 = testTable.getPartitionPath(p1).getParent();
Path partitionPath2 = testTable.getPartitionPath(p2).getParent(); Path partitionPath2 = testTable.getPartitionPath(p2).getParent();
assertTrue(pathFilter.accept(partitionPath1), "Directories should be accepted"); assertTrue(pathFilter.accept(partitionPath1), "Directories should be accepted");