[HUDI-2255] Refactor Datasource options (#3373)
Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -117,8 +117,8 @@ public class BootstrapExecutor implements Serializable {
|
||||
this.bootstrapBasePath = properties.getString(HoodieTableConfig.HOODIE_BOOTSTRAP_BASE_PATH_PROP.key());
|
||||
|
||||
// Add more defaults if full bootstrap requested
|
||||
this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY().key(),
|
||||
DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY().defaultValue());
|
||||
this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS().key(),
|
||||
DataSourceWriteOptions.PAYLOAD_CLASS().defaultValue());
|
||||
this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jssc);
|
||||
HoodieWriteConfig.Builder builder =
|
||||
HoodieWriteConfig.newBuilder().withPath(cfg.targetBasePath)
|
||||
|
||||
@@ -98,7 +98,7 @@ import java.util.stream.Collectors;
|
||||
import scala.collection.JavaConversions;
|
||||
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP;
|
||||
import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY;
|
||||
import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE;
|
||||
import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY;
|
||||
import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_RESET_KEY;
|
||||
import static org.apache.hudi.config.HoodieClusteringConfig.INLINE_CLUSTERING_PROP;
|
||||
@@ -687,7 +687,7 @@ public class DeltaSync implements Serializable {
|
||||
ValidationUtils.checkArgument(config.inlineClusteringEnabled() == cfg.isInlineClusteringEnabled(),
|
||||
String.format("%s should be set to %s", INLINE_CLUSTERING_PROP.key(), cfg.isInlineClusteringEnabled()));
|
||||
ValidationUtils.checkArgument(config.isAsyncClusteringEnabled() == cfg.isAsyncClusteringEnabled(),
|
||||
String.format("%s should be set to %s", ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), cfg.isAsyncClusteringEnabled()));
|
||||
String.format("%s should be set to %s", ASYNC_CLUSTERING_ENABLE.key(), cfg.isAsyncClusteringEnabled()));
|
||||
ValidationUtils.checkArgument(!config.shouldAutoCommit(),
|
||||
String.format("%s should be set to %s", HOODIE_AUTO_COMMIT_PROP.key(), autoCommit));
|
||||
ValidationUtils.checkArgument(config.shouldCombineBeforeInsert() == cfg.filterDupes,
|
||||
|
||||
@@ -362,7 +362,7 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
|
||||
public boolean isAsyncClusteringEnabled() {
|
||||
return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getConfig()
|
||||
.getOrDefault(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), false)));
|
||||
.getOrDefault(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), false)));
|
||||
}
|
||||
|
||||
public boolean isInlineClusteringEnabled() {
|
||||
|
||||
@@ -128,7 +128,7 @@ public class HoodieMultiTableDeltaStreamer {
|
||||
Helpers.deepCopyConfigs(config, cfg);
|
||||
String overriddenTargetBasePath = tableProperties.getString(Constants.TARGET_BASE_PATH_PROP, "");
|
||||
cfg.targetBasePath = StringUtils.isNullOrEmpty(overriddenTargetBasePath) ? targetBasePath : overriddenTargetBasePath;
|
||||
if (cfg.enableHiveSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), ""))) {
|
||||
if (cfg.enableHiveSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE().key(), ""))) {
|
||||
throw new HoodieException("Hive sync table field not provided!");
|
||||
}
|
||||
populateSchemaProviderProps(cfg, tableProperties);
|
||||
|
||||
@@ -115,9 +115,9 @@ public class HoodieIncrSource extends RowSource {
|
||||
|
||||
// Do Incr pull. Set end instant if available
|
||||
DataFrameReader reader = sparkSession.read().format("org.apache.hudi")
|
||||
.option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
|
||||
.option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY().key(), instantEndpts.getLeft())
|
||||
.option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY().key(), instantEndpts.getRight());
|
||||
.option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
|
||||
.option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), instantEndpts.getLeft())
|
||||
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
|
||||
|
||||
Dataset<Row> source = reader.load(srcPath);
|
||||
|
||||
|
||||
@@ -884,7 +884,7 @@ public class TestHoodieDeltaStreamer extends TestHoodieDeltaStreamerBase {
|
||||
configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP.key(), inlineClusterMaxCommit));
|
||||
}
|
||||
if (!StringUtils.isNullOrEmpty(asyncCluster)) {
|
||||
configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), asyncCluster));
|
||||
configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), asyncCluster));
|
||||
}
|
||||
if (!StringUtils.isNullOrEmpty(asyncClusterMaxCommit)) {
|
||||
configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_MAX_COMMIT_PROP.key(), asyncClusterMaxCommit));
|
||||
|
||||
@@ -116,11 +116,11 @@ public class TestHoodieDeltaStreamerBase extends UtilitiesTestBase {
|
||||
props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
|
||||
|
||||
// Hive Configs
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb1");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), "hive_trips");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(),
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "hive_trips");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
MultiPartKeysValueExtractor.class.getName());
|
||||
UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE);
|
||||
|
||||
@@ -212,11 +212,11 @@ public class TestHoodieDeltaStreamerBase extends UtilitiesTestBase {
|
||||
|
||||
protected static void populateCommonHiveProps(TypedProperties props) {
|
||||
// Hive Configs
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb2");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY().key(), "false");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(),
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb2");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION().key(), "false");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr");
|
||||
props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
MultiPartKeysValueExtractor.class.getName());
|
||||
}
|
||||
|
||||
|
||||
@@ -110,8 +110,8 @@ public class TestHoodieMultiTableDeltaStreamer extends TestHoodieDeltaStreamerBa
|
||||
assertEquals(dfsBasePath + "/multi_table_dataset/uber_db/dummy_table_uber", executionContext.getConfig().targetBasePath);
|
||||
assertEquals("uber_db.dummy_table_uber", executionContext.getConfig().targetTableName);
|
||||
assertEquals("topic1", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.KAFKA_TOPIC_PROP));
|
||||
assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key()));
|
||||
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key()));
|
||||
assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key()));
|
||||
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key()));
|
||||
assertEquals("uber_hive_dummy_table", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.HIVE_SYNC_TABLE_PROP));
|
||||
}
|
||||
|
||||
@@ -224,11 +224,11 @@ public class TestHoodieMultiTableDeltaStreamer extends TestHoodieDeltaStreamerBa
|
||||
tableExecutionContexts.forEach(tableExecutionContext -> {
|
||||
switch (tableExecutionContext.getTableName()) {
|
||||
case "dummy_table_short_trip":
|
||||
String tableLevelKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key());
|
||||
String tableLevelKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key());
|
||||
assertEquals(TestHoodieDeltaStreamer.TestTableLevelGenerator.class.getName(), tableLevelKeyGeneratorClass);
|
||||
break;
|
||||
default:
|
||||
String defaultKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key());
|
||||
String defaultKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key());
|
||||
assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), defaultKeyGeneratorClass);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user