1
0

[HUDI-2923] Fixing metadata table reader when metadata compaction is inflight (#4206)

* [HUDI-2923] Fixing metadata table reader when metadata compaction is inflight

* Fixing retry of pending compaction in metadata table and enhancing tests
This commit is contained in:
Sivabalan Narayanan
2021-12-04 00:44:50 -05:00
committed by GitHub
parent 94f45e928c
commit 1d4fb827e7
8 changed files with 117 additions and 6 deletions

View File

@@ -169,7 +169,7 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad
* The record is tagged with respective file slice's location based on its record key.
*/
private JavaRDD<HoodieRecord> prepRecords(JavaRDD<HoodieRecord> recordsRDD, String partitionName, int numFileGroups) {
List<FileSlice> fileSlices = HoodieTableMetadataUtil.loadPartitionFileGroupsWithLatestFileSlices(metadataMetaClient, partitionName);
List<FileSlice> fileSlices = HoodieTableMetadataUtil.loadPartitionFileGroupsWithLatestFileSlices(metadataMetaClient, partitionName, false);
ValidationUtils.checkArgument(fileSlices.size() == numFileGroups, String.format("Invalid number of file groups: found=%d, required=%d", fileSlices.size(), numFileGroups));
return recordsRDD.map(r -> {

View File

@@ -54,6 +54,7 @@ import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.table.view.TableFileSystemView;
import org.apache.hudi.common.testutils.FileCreateUtils;
import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.common.util.HoodieTimer;
@@ -413,6 +414,91 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase {
});
}
/**
* Tests that virtual key configs are honored in base files after compaction in metadata table.
*
*/
@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testMetadataTableWithPendingCompaction(boolean simulateFailedCompaction) throws Exception {
HoodieTableType tableType = COPY_ON_WRITE;
init(tableType, false);
writeConfig = getWriteConfigBuilder(true, true, false)
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(true)
.enableFullScan(true)
.enableMetrics(false)
.withMaxNumDeltaCommitsBeforeCompaction(3)
.build()).build();
initWriteConfigAndMetatableWriter(writeConfig, true);
doWriteOperation(testTable, "0000001", INSERT);
// create an inflight compaction in metadata table.
// not easy to create an inflight in metadata table directly, hence letting compaction succeed and then deleting the completed instant.
// this new write is expected to trigger metadata table compaction
String commitInstant = "0000002";
doWriteOperation(testTable, commitInstant, INSERT);
HoodieTableMetadata tableMetadata = metadata(writeConfig, context);
String metadataCompactionInstant = commitInstant + "001";
assertTrue(tableMetadata.getLatestCompactionTime().isPresent());
assertEquals(tableMetadata.getLatestCompactionTime().get(), metadataCompactionInstant);
validateMetadata(testTable);
// Fetch compaction Commit file and rename to some other file. completed compaction meta file should have some serialized info that table interprets
// for future upserts. so, renaming the file here to some temp name and later renaming it back to same name.
java.nio.file.Path parentPath = Paths.get(metadataTableBasePath, HoodieTableMetaClient.METAFOLDER_NAME);
java.nio.file.Path metaFilePath = parentPath.resolve(metadataCompactionInstant + HoodieTimeline.COMMIT_EXTENSION);
java.nio.file.Path tempFilePath = FileCreateUtils.renameFileToTemp(metaFilePath, metadataCompactionInstant);
metaClient.reloadActiveTimeline();
testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
// this validation will exercise the code path where a compaction is inflight in metadata table, but still metadata based file listing should match non
// metadata based file listing.
validateMetadata(testTable);
if (simulateFailedCompaction) {
// this should retry the compaction in metadata table.
doWriteOperation(testTable, "0000003", INSERT);
} else {
// let the compaction succeed in metadata and validation should succeed.
FileCreateUtils.renameTempToMetaFile(tempFilePath, metaFilePath);
}
validateMetadata(testTable);
// add few more write and validate
doWriteOperation(testTable, "0000004", INSERT);
doWriteOperation(testTable, "0000005", UPSERT);
validateMetadata(testTable);
if (simulateFailedCompaction) {
//trigger another compaction failure.
metadataCompactionInstant = "0000005001";
tableMetadata = metadata(writeConfig, context);
assertTrue(tableMetadata.getLatestCompactionTime().isPresent());
assertEquals(tableMetadata.getLatestCompactionTime().get(), metadataCompactionInstant);
// Fetch compaction Commit file and rename to some other file. completed compaction meta file should have some serialized info that table interprets
// for future upserts. so, renaming the file here to some temp name and later renaming it back to same name.
parentPath = Paths.get(metadataTableBasePath, HoodieTableMetaClient.METAFOLDER_NAME);
metaFilePath = parentPath.resolve(metadataCompactionInstant + HoodieTimeline.COMMIT_EXTENSION);
tempFilePath = FileCreateUtils.renameFileToTemp(metaFilePath, metadataCompactionInstant);
validateMetadata(testTable);
// this should retry the failed compaction in metadata table.
doWriteOperation(testTable, "0000006", INSERT);
validateMetadata(testTable);
// add few more write and validate
doWriteOperation(testTable, "0000007", INSERT);
doWriteOperation(testTable, "0000008", UPSERT);
validateMetadata(testTable);
}
}
/**
* Test rollback of various table operations sync to Metadata Table correctly.
*/