1
0

[HUDI-2858] Fixing handling of cluster update reject exception in deltastreamer (#4120)

This commit is contained in:
Sivabalan Narayanan
2021-11-25 14:34:07 -05:00
committed by GitHub
parent f692078d32
commit 6a0f079866
2 changed files with 51 additions and 7 deletions

View File

@@ -152,7 +152,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
protected HoodieClusteringJob initialHoodieClusteringJob(String tableBasePath, String clusteringInstantTime, Boolean runSchedule, String scheduleAndExecute, Boolean retryLastFailedClusteringJob) {
HoodieClusteringJob.Config scheduleClusteringConfig = buildHoodieClusteringUtilConfig(tableBasePath,
clusteringInstantTime, runSchedule, scheduleAndExecute, retryLastFailedClusteringJob);
clusteringInstantTime, runSchedule, scheduleAndExecute, retryLastFailedClusteringJob);
return new HoodieClusteringJob(jsc, scheduleClusteringConfig);
}
@@ -931,6 +931,31 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
});
}
/**
* When deltastreamer writes clashes with pending clustering, deltastreamer should keep retrying and eventually succeed(once clustering completes)
* w/o failing mid way.
*
* @throws Exception
*/
@Test
public void testAsyncClusteringServiceWithConflicts() throws Exception {
String tableBasePath = dfsBasePath + "/asyncClusteringWithConflicts";
// Keep it higher than batch-size to test continuous mode
int totalRecords = 3000;
// Initial bulk insert
HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT);
cfg.continuousMode = true;
cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "", "", "true", "2"));
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
deltaStreamerTestRunner(ds, cfg, (r) -> {
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
return true;
});
}
@ParameterizedTest
@ValueSource(strings = {"true", "false"})
public void testAsyncClusteringServiceWithCompaction(String preserveCommitMetadata) throws Exception {
@@ -1319,7 +1344,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
// Properties used for testing delta-streamer with orc source
orcProps.setProperty("include", "base.properties");
orcProps.setProperty("hoodie.embed.timeline.server","false");
orcProps.setProperty("hoodie.embed.timeline.server", "false");
orcProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
orcProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
if (useSchemaProvider) {
@@ -1333,9 +1358,9 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
String tableBasePath = dfsBasePath + "/test_orc_source_table" + testNum;
HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ORCDFSSource.class.getName(),
transformerClassNames, PROPS_FILENAME_TEST_ORC, false,
useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ORCDFSSource.class.getName(),
transformerClassNames, PROPS_FILENAME_TEST_ORC, false,
useSchemaProvider, 100000, false, null, null, "timestamp", null), jsc);
deltaStreamer.sync();
TestHelpers.assertRecordCount(ORC_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
testNum++;
@@ -1844,8 +1869,8 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
private static Stream<Arguments> testORCDFSSource() {
// arg1 boolean useSchemaProvider, arg2 List<String> transformerClassNames
return Stream.of(
arguments(false, null),
arguments(true, Collections.singletonList(TripsWithDistanceTransformer.class.getName()))
arguments(false, null),
arguments(true, Collections.singletonList(TripsWithDistanceTransformer.class.getName()))
);
}