1
0

[HUDI-1294] Adding inline read and seek based read(batch get) for hfile log blocks in metadata table (#3762)

This commit is contained in:
Sivabalan Narayanan
2021-10-29 12:12:44 -04:00
committed by GitHub
parent 0223c442ec
commit 69ee790a47
17 changed files with 591 additions and 142 deletions

View File

@@ -160,9 +160,8 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
doRollbackAndValidate(testTable, "0000003", "0000004");
}
doWriteOperationAndValidate(testTable, "0000005");
// trigger an upsert and validate
// trigger couple of upserts
doWriteOperation(testTable, "0000005");
doWriteOperation(testTable, "0000006");
validateMetadata(testTable, true);
}
@@ -222,9 +221,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
* Test various table operations sync to Metadata Table correctly.
*/
@ParameterizedTest
@EnumSource(HoodieTableType.class)
public void testTableOperations(HoodieTableType tableType) throws Exception {
init(tableType);
@MethodSource("bootstrapAndTableOperationTestArgs")
public void testTableOperations(HoodieTableType tableType, boolean enableFullScan) throws Exception {
init(tableType, true, enableFullScan);
doWriteInsertAndUpsert(testTable);
// trigger an upsert
@@ -236,7 +235,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
// trigger an upsert
doWriteOperationAndValidate(testTable, "0000005");
doWriteOperation(testTable, "0000005");
// trigger clean
doCleanAndValidate(testTable, "0000006", singletonList("0000001"));
@@ -255,7 +254,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
doWriteOperation(testTable, "0000002");
doCleanAndValidate(testTable, "0000003", Arrays.asList("0000001"));
if (tableType == MERGE_ON_READ) {
doCompactionAndValidate(testTable, "0000004");
doCompaction(testTable, "0000004");
}
doWriteOperation(testTable, "0000005");
validateMetadata(testTable, emptyList(), true);
@@ -288,7 +287,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
doWriteOperationAndValidate(testTable, "0000003");
// trigger a commit and rollback
doWriteOperationAndValidate(testTable, "0000004");
doWriteOperation(testTable, "0000004");
doRollbackAndValidate(testTable, "0000004", "0000005");
// trigger few upserts and validate
@@ -297,7 +296,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
validateMetadata(testTable);
doWriteOperationAndValidate(testTable, "0000010");
doWriteOperation(testTable, "0000010");
// rollback last commit. and validate.
doRollbackAndValidate(testTable, "0000010", "0000011");
@@ -309,7 +308,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
// roll back of delete
doWriteOperationAndValidate(testTable, "0000014", DELETE);
doWriteOperation(testTable, "0000014", DELETE);
doRollbackAndValidate(testTable, "0000014", "0000015");
// rollback partial commit
@@ -394,9 +393,9 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
syncTableMetadata(writeConfig);
validateMetadata(testTable);
doWriteOperationAndValidate(testTable, "00000003", INSERT);
doWriteOperationAndValidate(testTable, "00000004", UPSERT);
doWriteOperationAndValidate(testTable, "00000005", UPSERT);
doWriteOperation(testTable, "00000003", INSERT);
doWriteOperation(testTable, "00000004", UPSERT);
doWriteOperation(testTable, "00000005", UPSERT);
// trigger compaction
if (MERGE_ON_READ.equals(tableType)) {
@@ -404,13 +403,13 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
}
// trigger an upsert
doWriteOperationAndValidate(testTable, "00000008");
doWriteOperation(testTable, "00000008");
// trigger delete
doWriteOperationAndValidate(testTable, "00000009", DELETE);
doWriteOperation(testTable, "00000009", DELETE);
// trigger clean
doCleanAndValidate(testTable, "00000010", asList("00000003", "00000004"));
// trigger another upsert
doWriteOperationAndValidate(testTable, "00000011");
doWriteOperation(testTable, "00000011");
// trigger clustering
doClusterAndValidate(testTable, "00000012");
@@ -528,7 +527,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
records = dataGen.generateUniqueUpdates(newCommitTime, 10);
writeStatuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
assertNoWriteErrors(writeStatuses);
validateMetadata(client);
// Write 4 (updates and inserts)
newCommitTime = "0000004";
@@ -552,7 +550,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
records = dataGen.generateUpdates(newCommitTime, 5);
writeStatuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
assertNoWriteErrors(writeStatuses);
validateMetadata(client);
// Compaction
if (metaClient.getTableType() == HoodieTableType.MERGE_ON_READ) {
@@ -568,7 +565,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
JavaRDD<HoodieKey> deleteKeys = jsc.parallelize(records, 1).map(r -> r.getKey());
client.startCommitWithTime(newCommitTime);
client.delete(deleteKeys, newCommitTime);
validateMetadata(client);
// Clean
newCommitTime = "0000009";
@@ -1128,7 +1124,7 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
Collections.sort(fsFileNames);
Collections.sort(metadataFilenames);
assertEquals(fsStatuses.length, partitionToFilesMap.get(basePath + "/" + partition).length);
assertEquals(fsStatuses.length, partitionToFilesMap.get(partitionPath.toString()).length);
// File sizes should be valid
Arrays.stream(metaStatuses).forEach(s -> assertTrue(s.getLen() > 0));

View File

@@ -72,6 +72,10 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
}
public void init(HoodieTableType tableType, boolean enableMetadataTable) throws IOException {
init(tableType, enableMetadataTable, true);
}
public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean enableFullScan) throws IOException {
this.tableType = tableType;
initPath();
initSparkContexts("TestHoodieMetadata");
@@ -80,7 +84,8 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
initMetaClient(tableType);
initTestDataGenerator();
metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
writeConfig = getWriteConfig(true, enableMetadataTable);
writeConfig = getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, enableMetadataTable, false,
enableFullScan).build();
initWriteConfigAndMetatableWriter(writeConfig, enableMetadataTable);
}
@@ -256,7 +261,13 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
return getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, autoCommit, useFileListingMetadata, enableMetrics);
}
protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy policy, boolean autoCommit, boolean useFileListingMetadata, boolean enableMetrics) {
protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy policy, boolean autoCommit, boolean useFileListingMetadata,
boolean enableMetrics) {
return getWriteConfigBuilder(policy, autoCommit, useFileListingMetadata, enableMetrics, true);
}
protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy policy, boolean autoCommit, boolean useFileListingMetadata,
boolean enableMetrics, boolean enableFullScan) {
return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA)
.withParallelism(2, 2).withDeleteParallelism(2).withRollbackParallelism(2).withFinalizeWriteParallelism(2)
.withAutoCommit(autoCommit)
@@ -271,6 +282,7 @@ public class TestHoodieMetadataBase extends HoodieClientTestHarness {
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(useFileListingMetadata)
.enableFullScan(enableFullScan)
.enableMetrics(enableMetrics).build())
.withMetricsConfig(HoodieMetricsConfig.newBuilder().on(enableMetrics)
.withExecutorMetrics(true).build())