|
|
|
|
@@ -24,6 +24,7 @@ import java.util.ConcurrentModificationException;
|
|
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
|
|
import org.apache.hudi.DataSourceWriteOptions;
|
|
|
|
|
import org.apache.hudi.common.config.DFSPropertiesConfiguration;
|
|
|
|
|
import org.apache.hudi.common.config.HoodieConfig;
|
|
|
|
|
import org.apache.hudi.common.config.TypedProperties;
|
|
|
|
|
import org.apache.hudi.common.fs.FSUtils;
|
|
|
|
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
|
|
|
|
@@ -195,11 +196,11 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
|
|
|
|
|
|
|
|
|
|
// Hive Configs
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), "jdbc:hive2://127.0.0.1:9999/");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), "testdb1");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY(), "hive_trips");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), "datestr");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb1");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), "hive_trips");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(),
|
|
|
|
|
MultiPartKeysValueExtractor.class.getName());
|
|
|
|
|
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE);
|
|
|
|
|
|
|
|
|
|
@@ -280,11 +281,11 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
|
|
|
|
|
protected static void populateCommonHiveProps(TypedProperties props) {
|
|
|
|
|
// Hive Configs
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY(), "jdbc:hive2://127.0.0.1:9999/");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY(), "testdb2");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY(), "false");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY(), "datestr");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY(),
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb2");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY().key(), "false");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr");
|
|
|
|
|
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(),
|
|
|
|
|
MultiPartKeysValueExtractor.class.getName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -738,7 +739,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfg.continuousMode = true;
|
|
|
|
|
cfg.tableType = tableType.name();
|
|
|
|
|
cfg.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
|
|
|
|
|
deltaStreamerTestRunner(ds, cfg, (r) -> {
|
|
|
|
|
if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
|
|
|
|
|
@@ -769,7 +770,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfgIngestionJob.continuousMode = true;
|
|
|
|
|
cfgIngestionJob.tableType = tableType.name();
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer ingestionJob = new HoodieDeltaStreamer(cfgIngestionJob, jsc);
|
|
|
|
|
|
|
|
|
|
// Prepare base dataset with some commits
|
|
|
|
|
@@ -796,7 +797,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
.fromBytes(timeline.getInstantDetails(timeline.firstInstant().get()).get(), HoodieCommitMetadata.class);
|
|
|
|
|
cfgBackfillJob.checkpoint = commitMetadata.getMetadata(CHECKPOINT_KEY);
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer backfillJob = new HoodieDeltaStreamer(cfgBackfillJob, jsc);
|
|
|
|
|
|
|
|
|
|
// re-init ingestion job to start sync service
|
|
|
|
|
@@ -822,14 +823,14 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
.fromBytes(timeline.getInstantDetails(timeline.firstInstant().get()).get(), HoodieCommitMetadata.class);
|
|
|
|
|
cfgBackfillJob.checkpoint = commitMetadata.getMetadata(CHECKPOINT_KEY);
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
|
|
|
|
|
cfgIngestionJob = TestHelpers.makeConfig(tableBasePath, WriteOperationType.UPSERT,
|
|
|
|
|
Arrays.asList(TestIdentityTransformer.class.getName()), PROPS_FILENAME_TEST_MULTI_WRITER, false);
|
|
|
|
|
cfgIngestionJob.continuousMode = true;
|
|
|
|
|
cfgIngestionJob.tableType = tableType.name();
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
// re-init ingestion job
|
|
|
|
|
HoodieDeltaStreamer ingestionJob3 = new HoodieDeltaStreamer(cfgIngestionJob, jsc);
|
|
|
|
|
// re-init backfill job
|
|
|
|
|
@@ -856,7 +857,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfgIngestionJob.continuousMode = true;
|
|
|
|
|
cfgIngestionJob.tableType = tableType.name();
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer ingestionJob = new HoodieDeltaStreamer(cfgIngestionJob, jsc);
|
|
|
|
|
|
|
|
|
|
// Prepare base dataset with some commits
|
|
|
|
|
@@ -898,7 +899,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
// checkpoint will move from 00000 to 00001 for this backfill job
|
|
|
|
|
cfgBackfillJob.checkpoint = commitMetadataForFirstInstant.getMetadata(CHECKPOINT_KEY);
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfgBackfillJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer backfillJob = new HoodieDeltaStreamer(cfgBackfillJob, jsc);
|
|
|
|
|
backfillJob.sync();
|
|
|
|
|
|
|
|
|
|
@@ -987,9 +988,9 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfg.continuousMode = true;
|
|
|
|
|
cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
|
|
|
|
|
cfg.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfg.configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_PROP, "true"));
|
|
|
|
|
cfg.configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP, "2"));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
cfg.configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_PROP.key(), "true"));
|
|
|
|
|
cfg.configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP.key(), "2"));
|
|
|
|
|
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
|
|
|
|
|
deltaStreamerTestRunner(ds, cfg, (r) -> {
|
|
|
|
|
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(this.dfs.getConf()).setBasePath(tableBasePath).setLoadActiveTimelineOnLoad(true).build();
|
|
|
|
|
@@ -1023,8 +1024,8 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfg.continuousMode = true;
|
|
|
|
|
cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
|
|
|
|
|
cfg.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfg.configs.add(String.format("%s=true", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY));
|
|
|
|
|
cfg.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
cfg.configs.add(String.format("%s=true", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key()));
|
|
|
|
|
HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
|
|
|
|
|
deltaStreamerTestRunner(ds, cfg, (r) -> {
|
|
|
|
|
TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
|
|
|
|
|
@@ -1178,7 +1179,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
props.load(inputStream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assertEquals(props.getProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME), DummyAvroPayload.class.getName());
|
|
|
|
|
assertEquals(new HoodieConfig(props).getString(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP), DummyAvroPayload.class.getName());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
@@ -1204,7 +1205,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
props.load(inputStream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assertFalse(props.containsKey(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME));
|
|
|
|
|
assertFalse(props.containsKey(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP.key()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
@@ -1236,7 +1237,7 @@ public class TestHoodieDeltaStreamer extends UtilitiesTestBase {
|
|
|
|
|
cfg2.filterDupes = false;
|
|
|
|
|
cfg2.sourceLimit = 2000;
|
|
|
|
|
cfg2.operation = WriteOperationType.UPSERT;
|
|
|
|
|
cfg2.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP));
|
|
|
|
|
cfg2.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN_PROP.key()));
|
|
|
|
|
HoodieDeltaStreamer ds2 = new HoodieDeltaStreamer(cfg2, jsc);
|
|
|
|
|
ds2.sync();
|
|
|
|
|
mClient = HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(tableBasePath).setLoadActiveTimelineOnLoad(true).build();
|
|
|
|
|
|