1
0

[HUDI-1500] Support replace commit in DeltaSync with commit metadata preserved (#3802)

This commit is contained in:
Sagar Sumit
2021-10-29 22:39:09 +05:30
committed by GitHub
parent 29574af239
commit 5b1992a92d
3 changed files with 17 additions and 10 deletions

View File

@@ -711,8 +711,9 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
deltaStreamerTestRunner(ds, null, condition);
}
@Test
public void testInlineClustering() throws Exception {
@ParameterizedTest
@ValueSource(strings = {"true", "false"})
public void testInlineClustering(String preserveCommitMetadata) throws Exception {
String tableBasePath = dfsBasePath + "/inlineClustering";
// Keep it higher than batch-size to test continuous mode
int totalRecords = 3000;
@@ -721,7 +722,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT);
cfg.continuousMode = true;
cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", ""));
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", "", preserveCommitMetadata));
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(ds, cfg, (r) -> {
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
@@ -812,6 +813,13 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
}
}
private List<String> getAsyncServicesConfigs(int totalRecords, String autoClean, String inlineCluster, String inlineClusterMaxCommit,
String asyncCluster, String asyncClusterMaxCommit, String preserveCommitMetadata) {
List<String> configs = getAsyncServicesConfigs(totalRecords, autoClean, inlineCluster, inlineClusterMaxCommit, asyncCluster, asyncClusterMaxCommit);
configs.add(String.format("%s=%s", HoodieClusteringConfig.PRESERVE_COMMIT_METADATA.key(), preserveCommitMetadata));
return configs;
}
private List<String> getAsyncServicesConfigs(int totalRecords, String autoClean, String inlineCluster,
String inlineClusterMaxCommit, String asyncCluster, String asyncClusterMaxCommit) {
List<String> configs = new ArrayList<>();
@@ -904,8 +912,9 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
});
}
@Test
public void testAsyncClusteringServiceWithCompaction() throws Exception {
@ParameterizedTest
@ValueSource(strings = {"true", "false"})
public void testAsyncClusteringServiceWithCompaction(String preserveCommitMetadata) throws Exception {
String tableBasePath = dfsBasePath + "/asyncClusteringCompaction";
// Keep it higher than batch-size to test continuous mode
int totalRecords = 3000;
@@ -914,7 +923,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
cfg.continuousMode = true;
cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "", "", "true", "2"));
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "", "", "true", "2", preserveCommitMetadata));
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(ds, cfg, (r) -> {
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);