1
0

[HUDI-2952] Fixing metadata table for non-partitioned dataset (#4243)

This commit is contained in:
Sivabalan Narayanan
2021-12-10 11:11:42 -05:00
committed by GitHub
parent f194566ed4
commit be368264f4
9 changed files with 104 additions and 33 deletions

View File

@@ -149,7 +149,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
private static final Logger LOG = LogManager.getLogger(TestHoodieBackedMetadata.class);
public static List<Arguments> bootstrapAndTableOperationTestArgs() {
public static List<Arguments> tableTypeAndEnableOperationArgs() {
return asList(
Arguments.of(COPY_ON_WRITE, true),
Arguments.of(COPY_ON_WRITE, false),
@@ -162,7 +162,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
* Metadata Table bootstrap scenarios.
*/
@ParameterizedTest
@MethodSource("bootstrapAndTableOperationTestArgs")
@MethodSource("tableTypeAndEnableOperationArgs")
public void testMetadataTableBootstrap(HoodieTableType tableType, boolean addRollback) throws Exception {
init(tableType, false);
// bootstrap with few commits
@@ -243,7 +243,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
* Test various table operations sync to Metadata Table correctly.
*/
@ParameterizedTest
@MethodSource("bootstrapAndTableOperationTestArgs")
@MethodSource("tableTypeAndEnableOperationArgs")
public void testTableOperations(HoodieTableType tableType, boolean enableFullScan) throws Exception {
init(tableType, true, enableFullScan, false, false);
doWriteInsertAndUpsert(testTable);
@@ -319,6 +319,16 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable, emptyList(), true);
}
@Test
public void testMetadataInsertUpsertCleanNonPartitioned() throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;
init(tableType);
doWriteOperationNonPartitioned(testTable, "0000001", INSERT);
doWriteOperationNonPartitioned(testTable, "0000002", UPSERT);
testTable.doCleanBasedOnCommits("0000003", Arrays.asList("0000001"));
validateMetadata(testTable, emptyList(), true);
}
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testInsertUpsertCluster(HoodieTableType tableType) throws Exception {
@@ -509,7 +519,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
doWriteInsertAndUpsert(testTable);
// trigger an upsert
doWriteOperationAndValidate(testTable, "0000003");
doWriteOperation(testTable, "0000003", UPSERT);
// trigger a commit and rollback
doWriteOperation(testTable, "0000004");
@@ -549,6 +559,27 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable, true);
}
@Test
public void testRollbackOperationsNonPartitioned() throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;
init(tableType);
doWriteInsertAndUpsertNonPartitioned(testTable);
// trigger an upsert
doWriteOperationNonPartitioned(testTable, "0000003", UPSERT);
// trigger a commit and rollback
doWriteOperationNonPartitioned(testTable, "0000004", UPSERT);
doRollback(testTable, "0000004", "0000005");
validateMetadata(testTable);
// trigger few upserts and validate
for (int i = 6; i < 10; i++) {
doWriteOperationNonPartitioned(testTable, "000000" + i, UPSERT);
}
validateMetadata(testTable);
}
/**
* Test that manual rollbacks work correctly and enough timeline history is maintained on the metadata table
* timeline.
@@ -573,7 +604,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
.build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteInsertAndUpsert(testTable, "000001", "000002");
doWriteInsertAndUpsert(testTable, "000001", "000002", false);
for (int i = 3; i < 10; i++) {
doWriteOperation(testTable, "00000" + i);
@@ -674,8 +705,8 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testMetadataBootstrapLargeCommitList(HoodieTableType tableType) throws Exception {
@MethodSource("tableTypeAndEnableOperationArgs")
public void testMetadataBootstrapLargeCommitList(HoodieTableType tableType, boolean nonPartitionedDataset) throws Exception {
init(tableType, true, true, true, false);
long baseCommitTime = Long.parseLong(HoodieActiveTimeline.createNewInstantTime());
for (int i = 1; i < 25; i += 7) {
@@ -687,17 +718,17 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
long commitTime6 = getNextCommitTime(commitTime5);
long commitTime7 = getNextCommitTime(commitTime6);
baseCommitTime = commitTime7;
doWriteOperation(testTable, Long.toString(commitTime1), INSERT);
doWriteOperation(testTable, Long.toString(commitTime2));
doWriteOperation(testTable, Long.toString(commitTime1), INSERT, nonPartitionedDataset);
doWriteOperation(testTable, Long.toString(commitTime2), UPSERT, nonPartitionedDataset);
doClean(testTable, Long.toString(commitTime3), Arrays.asList(Long.toString(commitTime1)));
doWriteOperation(testTable, Long.toString(commitTime4));
doWriteOperation(testTable, Long.toString(commitTime4), UPSERT, nonPartitionedDataset);
if (tableType == MERGE_ON_READ) {
doCompaction(testTable, Long.toString(commitTime5));
doCompaction(testTable, Long.toString(commitTime5), nonPartitionedDataset);
}
doWriteOperation(testTable, Long.toString(commitTime6));
doWriteOperation(testTable, Long.toString(commitTime6), UPSERT, nonPartitionedDataset);
doRollback(testTable, Long.toString(commitTime6), Long.toString(commitTime7));
}
validateMetadata(testTable, emptyList(), true);
validateMetadata(testTable, emptyList(), nonPartitionedDataset);
}
// Some operations are not feasible with test table infra. hence using write client to test those cases.
@@ -1563,8 +1594,12 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
validateMetadata(testTable);
}
private void doWriteInsertAndUpsertNonPartitioned(HoodieTestTable testTable) throws Exception {
doWriteInsertAndUpsert(testTable, "0000001", "0000002", true);
}
private void doWriteInsertAndUpsert(HoodieTestTable testTable) throws Exception {
doWriteInsertAndUpsert(testTable, "0000001", "0000002");
doWriteInsertAndUpsert(testTable, "0000001", "0000002", false);
}
private HoodieWriteConfig getSmallInsertWriteConfig(int insertSplitSize, String schemaStr, long smallFileSize, boolean mergeAllowDuplicateInserts) {

View File

@@ -61,7 +61,7 @@ public class TestHoodieBackedTableMetadata extends TestHoodieMetadataBase {
}
private void doWriteInsertAndUpsert(HoodieTestTable testTable) throws Exception {
doWriteInsertAndUpsert(testTable, "0000001", "0000002");
doWriteInsertAndUpsert(testTable, "0000001", "0000002", false);
}
private void verifyBaseMetadataTable() throws IOException {

View File

@@ -109,10 +109,10 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
cleanupResources();
}
protected void doWriteInsertAndUpsert(HoodieTestTable testTable, String commit1, String commit2) throws Exception {
testTable.doWriteOperation(commit1, INSERT, asList("p1", "p2"), asList("p1", "p2"),
protected void doWriteInsertAndUpsert(HoodieTestTable testTable, String commit1, String commit2, boolean nonPartitioned) throws Exception {
testTable.doWriteOperation(commit1, INSERT, nonPartitioned ? asList("") : asList("p1", "p2"), nonPartitioned ? asList("") : asList("p1", "p2"),
4, false);
testTable.doWriteOperation(commit2, UPSERT, asList("p1", "p2"),
testTable.doWriteOperation(commit2, UPSERT, nonPartitioned ? asList("") : asList("p1", "p2"),
4, false);
validateMetadata(testTable);
}
@@ -135,6 +135,18 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
validateMetadata(testTable);
}
protected void doWriteOperationNonPartitioned(HoodieTestTable testTable, String commitTime, WriteOperationType operationType) throws Exception {
testTable.doWriteOperation(commitTime, operationType, emptyList(), asList(""), 3);
}
protected void doWriteOperation(HoodieTestTable testTable, String commitTime, WriteOperationType operationType, boolean nonPartitioned) throws Exception {
if (nonPartitioned) {
doWriteOperationNonPartitioned(testTable, commitTime, operationType);
} else {
doWriteOperation(testTable, commitTime, operationType);
}
}
protected void doWriteOperation(HoodieTestTable testTable, String commitTime, WriteOperationType operationType) throws Exception {
testTable.doWriteOperation(commitTime, operationType, emptyList(), asList("p1", "p2"), 3);
}
@@ -154,16 +166,28 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
}
}
protected void doCompactionNonPartitioned(HoodieTestTable testTable, String commitTime) throws Exception {
doCompactionInternal(testTable, commitTime, false, true);
}
protected void doCompaction(HoodieTestTable testTable, String commitTime, boolean nonPartitioned) throws Exception {
doCompactionInternal(testTable, commitTime, false, nonPartitioned);
}
protected void doCompaction(HoodieTestTable testTable, String commitTime) throws Exception {
doCompactionInternal(testTable, commitTime, false);
doCompactionInternal(testTable, commitTime, false, false);
}
protected void doCompactionNonPartitionedAndValidate(HoodieTestTable testTable, String commitTime) throws Exception {
doCompactionInternal(testTable, commitTime, true, true);
}
protected void doCompactionAndValidate(HoodieTestTable testTable, String commitTime) throws Exception {
doCompactionInternal(testTable, commitTime, true);
doCompactionInternal(testTable, commitTime, true, false);
}
private void doCompactionInternal(HoodieTestTable testTable, String commitTime, boolean validate) throws Exception {
testTable.doCompaction(commitTime, asList("p1", "p2"));
private void doCompactionInternal(HoodieTestTable testTable, String commitTime, boolean validate, boolean nonPartitioned) throws Exception {
testTable.doCompaction(commitTime, nonPartitioned ? asList("") : asList("p1", "p2"));
if (validate) {
validateMetadata(testTable);
}

View File

@@ -235,7 +235,7 @@ public class TestHoodieMetadataBootstrap extends TestHoodieMetadataBase {
}
private void doWriteInsertAndUpsert(HoodieTestTable testTable) throws Exception {
doWriteInsertAndUpsert(testTable, "0000100", "0000101");
doWriteInsertAndUpsert(testTable, "0000100", "0000101", false);
}
private HoodieWriteConfig getWriteConfig(int minArchivalCommits, int maxArchivalCommits) throws Exception {

View File

@@ -532,6 +532,9 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
List<java.nio.file.Path> fsPartitionPaths = testTable.getAllPartitionPaths();
List<String> fsPartitions = new ArrayList<>();
fsPartitionPaths.forEach(entry -> fsPartitions.add(entry.getFileName().toString()));
if (fsPartitions.isEmpty()) {
fsPartitions.add("");
}
List<String> metadataPartitions = tableMetadata.getAllPartitionPaths();
Collections.sort(fsPartitions);
@@ -618,7 +621,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
}
}
}
assertEquals(fsStatuses.length, partitionToFilesMap.get(basePath + "/" + partition).length);
assertEquals(fsStatuses.length, partitionToFilesMap.get(partitionPath.toString()).length);
// Block sizes should be valid
Arrays.stream(metaStatuses).forEach(s -> assertTrue(s.getBlockSize() > 0));