diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands index e5120b2c5..e53203b14 100644 --- a/docker/demo/sparksql-incremental.commands +++ b/docker/demo/sparksql-incremental.commands @@ -27,8 +27,8 @@ import org.apache.hadoop.fs.FileSystem; val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) val beginInstantTime = HoodieDataSourceHelpers.listCommitsSince(fs, "/user/hive/warehouse/stock_ticks_cow", "00000").get(0) val hoodieIncQueryDF = spark.read.format("org.apache.hudi"). - option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL). - option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key(), beginInstantTime). + option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL). + option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), beginInstantTime). load("/user/hive/warehouse/stock_ticks_cow"); hoodieIncQueryDF.registerTempTable("stock_ticks_cow_incr") spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_incr where symbol = 'GOOG'").show(100, false); @@ -37,21 +37,21 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl write.format("org.apache.hudi"). option("hoodie.insert.shuffle.parallelism", "2"). option("hoodie.upsert.shuffle.parallelism","2"). - option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL). - option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL). - option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key(), "key"). - option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "datestr"). - option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key(), "ts"). + option(DataSourceWriteOptions.TABLE_TYPE.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL). + option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL). + option(DataSourceWriteOptions.RECORDKEY_FIELD.key(), "key"). + option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(), "datestr"). + option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "ts"). option(HoodieWriteConfig.TABLE_NAME.key(), "stock_ticks_derived_mor"). - option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY.key(), "stock_ticks_derived_mor"). - option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY.key(), "default"). - option(DataSourceWriteOptions.HIVE_URL_OPT_KEY.key(), "jdbc:hive2://hiveserver:10000"). - option(DataSourceWriteOptions.HIVE_USER_OPT_KEY.key(), "hive"). - option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY.key(), "hive"). - option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY.key(), "true"). - option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY.key(), "datestr"). - option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName). - option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key(), "true"). + option(DataSourceWriteOptions.HIVE_TABLE.key(), "stock_ticks_derived_mor"). + option(DataSourceWriteOptions.HIVE_DATABASE.key(), "default"). + option(DataSourceWriteOptions.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). + option(DataSourceWriteOptions.HIVE_USER.key(), "hive"). + option(DataSourceWriteOptions.HIVE_PASS.key(), "hive"). + option(DataSourceWriteOptions.HIVE_SYNC_ENABLED.key(), "true"). + option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS.key(), "datestr"). + option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName). + option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key(), "true"). mode(SaveMode.Overwrite). save("/user/hive/warehouse/stock_ticks_derived_mor"); @@ -59,8 +59,8 @@ spark.sql("select count(*) from stock_ticks_derived_mor_ro").show(20, false) spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false) val hoodieIncQueryBsDF = spark.read.format("org.apache.hudi"). - option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL). - option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key(), "00000000000001"). + option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL). + option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), "00000000000001"). load("/user/hive/warehouse/stock_ticks_cow_bs"); hoodieIncQueryBsDF.registerTempTable("stock_ticks_cow_bs_incr") spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs_incr where symbol = 'GOOG'").show(100, false); @@ -69,21 +69,21 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl write.format("org.apache.hudi"). option("hoodie.insert.shuffle.parallelism", "2"). option("hoodie.upsert.shuffle.parallelism","2"). - option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL). - option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL). - option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key(), "key"). - option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "datestr"). - option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key(), "ts"). + option(DataSourceWriteOptions.TABLE_TYPE.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL). + option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL). + option(DataSourceWriteOptions.RECORDKEY_FIELD.key(), "key"). + option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(), "datestr"). + option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "ts"). option(HoodieWriteConfig.TABLE_NAME.key(), "stock_ticks_derived_mor_bs"). - option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY.key(), "stock_ticks_derived_mor_bs"). - option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY.key(), "default"). - option(DataSourceWriteOptions.HIVE_URL_OPT_KEY.key(), "jdbc:hive2://hiveserver:10000"). - option(DataSourceWriteOptions.HIVE_USER_OPT_KEY.key(), "hive"). - option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY.key(), "hive"). - option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY.key(), "true"). - option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY.key(), "datestr"). - option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName). - option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key(), "true"). + option(DataSourceWriteOptions.HIVE_TABLE.key(), "stock_ticks_derived_mor_bs"). + option(DataSourceWriteOptions.HIVE_DATABASE.key(), "default"). + option(DataSourceWriteOptions.HIVE_URL.key(), "jdbc:hive2://hiveserver:10000"). + option(DataSourceWriteOptions.HIVE_USER.key(), "hive"). + option(DataSourceWriteOptions.HIVE_PASS.key(), "hive"). + option(DataSourceWriteOptions.HIVE_SYNC_ENABLED.key(), "true"). + option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS.key(), "datestr"). + option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS.key(), classOf[MultiPartKeysValueExtractor].getCanonicalName). + option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key(), "true"). mode(SaveMode.Overwrite). save("/user/hive/warehouse/stock_ticks_derived_mor_bs"); diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java index da661ff24..fdc6d9da5 100644 --- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java +++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java @@ -372,8 +372,8 @@ public class SparkMain { properties.setProperty(HoodieBootstrapConfig.FULL_BOOTSTRAP_INPUT_PROVIDER.key(), fullBootstrapInputProvider); properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key(), parallelism); properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR.key(), selectorClass); - properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), recordKeyCols); - properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), partitionFields); + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), recordKeyCols); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), partitionFields); HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config(); cfg.targetTableName = tableName; diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java index 521e53278..b2b873f37 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java @@ -119,7 +119,7 @@ public class HoodieClusteringConfig extends HoodieConfig { .withDocumentation("Determines how to handle updates, deletes to file groups that are under clustering." + " Default strategy just rejects the update"); - public static final ConfigProperty ASYNC_CLUSTERING_ENABLE_OPT_KEY = ConfigProperty + public static final ConfigProperty ASYNC_CLUSTERING_ENABLE = ConfigProperty .key("hoodie.clustering.async.enabled") .defaultValue("false") .sinceVersion("0.7.0") @@ -210,7 +210,7 @@ public class HoodieClusteringConfig extends HoodieConfig { } public Builder withAsyncClustering(Boolean asyncClustering) { - clusteringConfig.setValue(ASYNC_CLUSTERING_ENABLE_OPT_KEY, String.valueOf(asyncClustering)); + clusteringConfig.setValue(ASYNC_CLUSTERING_ENABLE, String.valueOf(asyncClustering)); return this; } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 6fdc6dd88..4c7a3c5af 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -697,7 +697,7 @@ public class HoodieWriteConfig extends HoodieConfig { } public boolean isAsyncClusteringEnabled() { - return getBoolean(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY); + return getBoolean(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE); } public boolean isClusteringEnabled() { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java index bb73c1367..87ddddd58 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java @@ -32,9 +32,9 @@ public class ComplexAvroKeyGenerator extends BaseKeyGenerator { public ComplexAvroKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()) + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()) .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()) + this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()) .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java index da678a24c..286e495c2 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/CustomAvroKeyGenerator.java @@ -55,8 +55,8 @@ public class CustomAvroKeyGenerator extends BaseKeyGenerator { public CustomAvroKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()).split(",")).map(String::trim).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()).split(",")).map(String::trim).collect(Collectors.toList()); } @Override diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/GlobalAvroDeleteKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/GlobalAvroDeleteKeyGenerator.java index aac28f50e..3b20c8a8f 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/GlobalAvroDeleteKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/GlobalAvroDeleteKeyGenerator.java @@ -35,7 +35,7 @@ public class GlobalAvroDeleteKeyGenerator extends BaseKeyGenerator { public GlobalAvroDeleteKeyGenerator(TypedProperties config) { super(config); - this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()).split(",")); + this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()).split(",")); } @Override diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java index feb3820dd..b1e0ca7a5 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/NonpartitionedAvroKeyGenerator.java @@ -36,7 +36,7 @@ public class NonpartitionedAvroKeyGenerator extends BaseKeyGenerator { public NonpartitionedAvroKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()) + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()) .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); this.partitionPathFields = EMPTY_PARTITION_FIELD_LIST; } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/SimpleAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/SimpleAvroKeyGenerator.java index 2dab3dcf8..2aa939207 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/SimpleAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/SimpleAvroKeyGenerator.java @@ -29,8 +29,8 @@ import java.util.Collections; public class SimpleAvroKeyGenerator extends BaseKeyGenerator { public SimpleAvroKeyGenerator(TypedProperties props) { - this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()), - props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key())); + this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()), + props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())); } SimpleAvroKeyGenerator(TypedProperties props, String partitionPathField) { diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java index 16b0c0f09..678591ec4 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java @@ -88,8 +88,8 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator { } public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException { - this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()), - config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key())); + this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()), + config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())); } TimestampBasedAvroKeyGenerator(TypedProperties config, String partitionPathField) throws IOException { @@ -119,8 +119,8 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator { default: timeUnit = null; } - this.encodePartitionPath = config.getBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key(), - Boolean.parseBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY.defaultValue())); + this.encodePartitionPath = config.getBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING.key(), + Boolean.parseBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING.defaultValue())); } @Override diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestCreateAvroKeyGeneratorByTypeWithFactory.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestCreateAvroKeyGeneratorByTypeWithFactory.java index 504831406..52ab09dd0 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestCreateAvroKeyGeneratorByTypeWithFactory.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestCreateAvroKeyGeneratorByTypeWithFactory.java @@ -52,9 +52,9 @@ public class TestCreateAvroKeyGeneratorByTypeWithFactory { @BeforeEach public void init() { props = new TypedProperties(); - props.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - props.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); - props.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + props.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + props.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); + props.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); // for timestamp based key generator props.put("hoodie.deltastreamer.keygen.timebased.timestamp.type", "DATE_STRING"); diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestHoodieAvroKeyGeneratorFactory.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestHoodieAvroKeyGeneratorFactory.java index 8f32b79f9..2ae2e3bd4 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestHoodieAvroKeyGeneratorFactory.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/keygen/factory/TestHoodieAvroKeyGeneratorFactory.java @@ -67,9 +67,9 @@ public class TestHoodieAvroKeyGeneratorFactory { private TypedProperties getCommonProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java index ab2b6a96d..6794fc16b 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java @@ -37,9 +37,9 @@ public class ComplexKeyGenerator extends BuiltinKeyGenerator { public ComplexKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()) + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()) .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()) + this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()) .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); complexAvroKeyGenerator = new ComplexAvroKeyGenerator(props); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java index 14a1c1677..2ad0ace88 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/CustomKeyGenerator.java @@ -53,8 +53,8 @@ public class CustomKeyGenerator extends BuiltinKeyGenerator { public CustomKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()).split(",")).map(String::trim).collect(Collectors.toList()); - this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()).split(",")).map(String::trim).collect(Collectors.toList()); + this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()).split(",")).map(String::trim).collect(Collectors.toList()); customAvroKeyGenerator = new CustomAvroKeyGenerator(props); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java index 1fe9cc49c..e90a1006e 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/GlobalDeleteKeyGenerator.java @@ -39,7 +39,7 @@ public class GlobalDeleteKeyGenerator extends BuiltinKeyGenerator { private final GlobalAvroDeleteKeyGenerator globalAvroDeleteKeyGenerator; public GlobalDeleteKeyGenerator(TypedProperties config) { super(config); - this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()).split(",")); + this.recordKeyFields = Arrays.asList(config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()).split(",")); globalAvroDeleteKeyGenerator = new GlobalAvroDeleteKeyGenerator(config); } diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java index 57dcf89a4..6a9dbafa7 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java @@ -39,7 +39,7 @@ public class NonpartitionedKeyGenerator extends BuiltinKeyGenerator { public NonpartitionedKeyGenerator(TypedProperties props) { super(props); - this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()) + this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()) .split(",")).map(String::trim).collect(Collectors.toList()); this.partitionPathFields = Collections.emptyList(); nonpartitionedAvroKeyGenerator = new NonpartitionedAvroKeyGenerator(props); diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java index 0c4b77000..e82bf75cc 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java @@ -36,8 +36,8 @@ public class SimpleKeyGenerator extends BuiltinKeyGenerator { private final SimpleAvroKeyGenerator simpleAvroKeyGenerator; public SimpleKeyGenerator(TypedProperties props) { - this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()), - props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key())); + this(props, props.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()), + props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())); } SimpleKeyGenerator(TypedProperties props, String partitionPathField) { diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java index 6ab9cb6f4..b4bca9f96 100644 --- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/TimestampBasedKeyGenerator.java @@ -41,8 +41,8 @@ public class TimestampBasedKeyGenerator extends SimpleKeyGenerator { private final TimestampBasedAvroKeyGenerator timestampBasedAvroKeyGenerator; public TimestampBasedKeyGenerator(TypedProperties config) throws IOException { - this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()), - config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key())); + this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD.key()), + config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key())); } TimestampBasedKeyGenerator(TypedProperties config, String partitionPathField) throws IOException { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java index d69828199..def41f9d1 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientOnCopyOnWriteStorage.java @@ -128,7 +128,7 @@ import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.NULL_SCHE import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA; import static org.apache.hudi.common.testutils.Transformations.randomSelectAsHoodieKeys; import static org.apache.hudi.common.testutils.Transformations.recordsToRecordKeySet; -import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY; +import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE; import static org.apache.hudi.config.HoodieClusteringConfig.CLUSTERING_EXECUTION_STRATEGY_CLASS; import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -969,7 +969,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase { final String testPartitionPath = "2016/09/26"; dataGen = new HoodieTestDataGenerator(new String[] {testPartitionPath}); Properties props = new Properties(); - props.setProperty(ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), "true"); + props.setProperty(ASYNC_CLUSTERING_ENABLE.key(), "true"); HoodieWriteConfig config = getSmallInsertWriteConfig(100, TRIP_EXAMPLE_SCHEMA, dataGen.getEstimatedFileSizeInBytes(150), true, props); SparkRDDWriteClient client = getHoodieWriteClient(config); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java index 1bfd40344..f4f6a6cef 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java @@ -39,11 +39,11 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getCommonProps(boolean getComplexRecordKey) { TypedProperties properties = new TypedProperties(); if (getComplexRecordKey) { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key, pii_col"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key, pii_col"); } else { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); } - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); return properties; } @@ -53,20 +53,20 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getPropertiesWithoutRecordKeyProp() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getWrongRecordKeyFieldProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_wrong_key"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_wrong_key"); return properties; } private TypedProperties getProps() { TypedProperties properties = getCommonProps(true); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp,ts_ms"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp,ts_ms"); return properties; } @@ -105,8 +105,8 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities { @Test public void testSingleValueKeyGenerator() { TypedProperties properties = new TypedProperties(); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties); assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1); assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1); @@ -128,8 +128,8 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities { @Test public void testMultipleValueKeyGenerator() { TypedProperties properties = new TypedProperties(); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key,timestamp"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "rider,driver"); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key,timestamp"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "rider,driver"); ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties); assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2); assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 2); @@ -154,8 +154,8 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities { @Test public void testMultipleValueKeyGeneratorNonPartitioned() { TypedProperties properties = new TypedProperties(); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key,timestamp"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), ""); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key,timestamp"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), ""); ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties); assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2); assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java index 3e55304ca..1d23429b4 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java @@ -48,53 +48,53 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getCommonProps(boolean getComplexRecordKey, boolean useKeyGeneratorClassName) { TypedProperties properties = new TypedProperties(); if (getComplexRecordKey) { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key, pii_col"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key, pii_col"); } else { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); } if (useKeyGeneratorClassName) { properties.put(HoodieWriteConfig.KEYGENERATOR_CLASS_PROP.key(), CustomKeyGenerator.class.getName()); } else { properties.put(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), KeyGeneratorType.CUSTOM.name()); } - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); return properties; } private TypedProperties getPropertiesForSimpleKeyGen(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(false, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp:simple"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp:simple"); return properties; } private TypedProperties getImproperPartitionFieldFormatProp(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(false, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getInvalidPartitionKeyTypeProps(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(false, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp:dummy"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp:dummy"); return properties; } private TypedProperties getComplexRecordKeyWithSimplePartitionProps(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(true, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp:simple"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp:simple"); return properties; } private TypedProperties getComplexRecordKeyAndPartitionPathProps(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(true, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp:simple,ts_ms:timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp:simple,ts_ms:timestamp"); populateNecessaryPropsForTimestampBasedKeyGen(properties); return properties; } private TypedProperties getPropsWithoutRecordKeyFieldProps(boolean useKeyGeneratorClassName) { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp:simple"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp:simple"); if (useKeyGeneratorClassName) { properties.put(HoodieWriteConfig.KEYGENERATOR_CLASS_PROP.key(), CustomKeyGenerator.class.getName()); } else { @@ -111,14 +111,14 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getPropertiesForTimestampBasedKeyGen(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(false, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "ts_ms:timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "ts_ms:timestamp"); populateNecessaryPropsForTimestampBasedKeyGen(properties); return properties; } private TypedProperties getPropertiesForNonPartitionedKeyGen(boolean useKeyGeneratorClassName) { TypedProperties properties = getCommonProps(false, useKeyGeneratorClassName); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), ""); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), ""); return properties; } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java index dd3cea470..31457bc3c 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java @@ -34,29 +34,29 @@ public class TestGlobalDeleteKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getCommonProps(boolean getComplexRecordKey) { TypedProperties properties = new TypedProperties(); if (getComplexRecordKey) { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key,pii_col"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key,pii_col"); } else { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); } - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); return properties; } private TypedProperties getPropertiesWithoutRecordKeyProp() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getWrongRecordKeyFieldProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_wrong_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_wrong_key"); return properties; } private TypedProperties getProps() { TypedProperties properties = getCommonProps(true); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp,ts_ms"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp,ts_ms"); return properties; } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java index 56d576e9d..99dfbdb62 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java @@ -37,11 +37,11 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getCommonProps(boolean getComplexRecordKey) { TypedProperties properties = new TypedProperties(); if (getComplexRecordKey) { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key, pii_col"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key, pii_col"); } else { - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); } - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); return properties; } @@ -51,19 +51,19 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getPropertiesWithPartitionPathProp() { TypedProperties properties = getCommonProps(true); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp,ts_ms"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp,ts_ms"); return properties; } private TypedProperties getPropertiesWithoutRecordKeyProp() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getWrongRecordKeyFieldProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_wrong_key"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_wrong_key"); return properties; } @@ -78,7 +78,7 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities { NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(properties); GenericRecord record = getRecord(); Row row = KeyGeneratorTestUtilities.getRow(record); - Assertions.assertEquals(properties.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()), "timestamp,ts_ms"); + Assertions.assertEquals(properties.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()), "timestamp,ts_ms"); Assertions.assertEquals(keyGenerator.getPartitionPath(row), ""); } @@ -101,8 +101,8 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities { @Test public void testSingleValueKeyGeneratorNonPartitioned() { TypedProperties properties = new TypedProperties(); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "timestamp"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), ""); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "timestamp"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), ""); NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(properties); assertEquals(keyGenerator.getRecordKeyFields().size(), 1); assertEquals(keyGenerator.getPartitionPathFields().size(), 0); @@ -118,8 +118,8 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities { @Test public void testMultipleValueKeyGeneratorNonPartitioned1() { TypedProperties properties = new TypedProperties(); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "timestamp,driver"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), ""); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "timestamp,driver"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), ""); NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(properties); assertEquals(keyGenerator.getRecordKeyFields().size(), 2); assertEquals(keyGenerator.getPartitionPathFields().size(), 0); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java index 419e337ae..5fbbd8dac 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java @@ -39,8 +39,8 @@ import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH; public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getCommonProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); return properties; } @@ -50,40 +50,40 @@ public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities { private TypedProperties getPropertiesWithoutRecordKeyProp() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getWrongRecordKeyFieldProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_wrong_key"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_wrong_key"); return properties; } private TypedProperties getWrongPartitionPathFieldProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "_wrong_partition_path"); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "_wrong_partition_path"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); return properties; } private TypedProperties getComplexRecordKeyProp() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key,pii_col"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key,pii_col"); return properties; } private TypedProperties getProps() { TypedProperties properties = getCommonProps(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } private TypedProperties getPropsWithNestedPartitionPathField() { TypedProperties properties = getCommonProps(); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "nested_col.prop1"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "nested_col.prop1"); return properties; } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java index a35442766..2a3841829 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java @@ -64,9 +64,9 @@ public class TestTimestampBasedKeyGenerator { baseRow = genericRecordToRow(baseRecord); internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow); - properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "field1"); - properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "createTime"); - properties.setProperty(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "false"); + properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "field1"); + properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "createTime"); + properties.setProperty(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "false"); } private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone, String scalarType) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java index f7e5c4399..0f54fd68d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java @@ -53,9 +53,9 @@ public class TestCreateKeyGeneratorByTypeWithFactory { @BeforeEach public void init() { props = new TypedProperties(); - props.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - props.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); - props.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + props.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + props.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); + props.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); // for timestamp based key generator props.put("hoodie.deltastreamer.keygen.timebased.timestamp.type", "DATE_STRING"); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java index 204cb5796..5b89c7ea8 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java @@ -74,9 +74,9 @@ public class TestHoodieSparkKeyGeneratorFactory { private TypedProperties getCommonProps() { TypedProperties properties = new TypedProperties(); - properties.put(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key(), "_row_key"); - properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), "true"); - properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "timestamp"); + properties.put(KeyGeneratorOptions.RECORDKEY_FIELD.key(), "_row_key"); + properties.put(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), "true"); + properties.put(KeyGeneratorOptions.PARTITIONPATH_FIELD.key(), "timestamp"); return properties; } } diff --git a/hudi-common/src/main/java/org/apache/hudi/keygen/BaseKeyGenerator.java b/hudi-common/src/main/java/org/apache/hudi/keygen/BaseKeyGenerator.java index 52c0309d6..654702ef5 100644 --- a/hudi-common/src/main/java/org/apache/hudi/keygen/BaseKeyGenerator.java +++ b/hudi-common/src/main/java/org/apache/hudi/keygen/BaseKeyGenerator.java @@ -35,10 +35,10 @@ public abstract class BaseKeyGenerator extends KeyGenerator { protected BaseKeyGenerator(TypedProperties config) { super(config); - this.encodePartitionPath = config.getBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key(), - Boolean.parseBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY.defaultValue())); - this.hiveStylePartitioning = config.getBoolean(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key(), - Boolean.parseBoolean(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.defaultValue())); + this.encodePartitionPath = config.getBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING.key(), + Boolean.parseBoolean(KeyGeneratorOptions.URL_ENCODE_PARTITIONING.defaultValue())); + this.hiveStylePartitioning = config.getBoolean(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key(), + Boolean.parseBoolean(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.defaultValue())); } /** diff --git a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java index 7c5b6f385..d781b3703 100644 --- a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java +++ b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java @@ -31,26 +31,26 @@ import org.apache.hudi.common.config.HoodieConfig; + "will extract these out of incoming records.") public class KeyGeneratorOptions extends HoodieConfig { - public static final ConfigProperty URL_ENCODE_PARTITIONING_OPT_KEY = ConfigProperty + public static final ConfigProperty URL_ENCODE_PARTITIONING = ConfigProperty .key("hoodie.datasource.write.partitionpath.urlencode") .defaultValue("false") .withDocumentation("Should we url encode the partition path value, before creating the folder structure."); - public static final ConfigProperty HIVE_STYLE_PARTITIONING_OPT_KEY = ConfigProperty + public static final ConfigProperty HIVE_STYLE_PARTITIONING = ConfigProperty .key("hoodie.datasource.write.hive_style_partitioning") .defaultValue("false") .withDocumentation("Flag to indicate whether to use Hive style partitioning.\n" + "If set true, the names of partition folders follow = format.\n" + "By default false (the names of partition folders are only partition values)"); - public static final ConfigProperty RECORDKEY_FIELD_OPT_KEY = ConfigProperty + public static final ConfigProperty RECORDKEY_FIELD = ConfigProperty .key("hoodie.datasource.write.recordkey.field") .defaultValue("uuid") .withDocumentation("Record key field. Value to be used as the `recordKey` component of `HoodieKey`.\n" + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using\n" + "the dot notation eg: `a.b.c`"); - public static final ConfigProperty PARTITIONPATH_FIELD_OPT_KEY = ConfigProperty + public static final ConfigProperty PARTITIONPATH_FIELD = ConfigProperty .key("hoodie.datasource.write.partitionpath.field") .defaultValue("partitionpath") .withDocumentation("Partition path field. Value to be used at the partitionPath component of HoodieKey. " diff --git a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala index ebd7c9e86..3072f6f0a 100644 --- a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala +++ b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala @@ -18,8 +18,8 @@ package org.apache.hudi.examples.spark -import org.apache.hudi.DataSourceReadOptions.{BEGIN_INSTANTTIME_OPT_KEY, END_INSTANTTIME_OPT_KEY, QUERY_TYPE_INCREMENTAL_OPT_VAL, QUERY_TYPE_OPT_KEY} -import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD_OPT_KEY, PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY} +import org.apache.hudi.DataSourceReadOptions.{BEGIN_INSTANTTIME, END_INSTANTTIME, QUERY_TYPE_INCREMENTAL_OPT_VAL, QUERY_TYPE} +import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD} import org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs import org.apache.hudi.common.model.HoodieAvroPayload import org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME @@ -76,9 +76,9 @@ object HoodieDataSourceExample { val df = spark.read.json(spark.sparkContext.parallelize(inserts, 1)) df.write.format("org.apache.hudi"). options(getQuickstartWriteConfigs). - option(PRECOMBINE_FIELD_OPT_KEY.key, "ts"). - option(RECORDKEY_FIELD_OPT_KEY.key, "uuid"). - option(PARTITIONPATH_FIELD_OPT_KEY.key, "partitionpath"). + option(PRECOMBINE_FIELD.key, "ts"). + option(RECORDKEY_FIELD.key, "uuid"). + option(PARTITIONPATH_FIELD.key, "partitionpath"). option(TABLE_NAME.key, tableName). mode(Overwrite). save(tablePath) @@ -121,9 +121,9 @@ object HoodieDataSourceExample { val df = spark.read.json(spark.sparkContext.parallelize(updates, 1)) df.write.format("org.apache.hudi"). options(getQuickstartWriteConfigs). - option(PRECOMBINE_FIELD_OPT_KEY.key, "ts"). - option(RECORDKEY_FIELD_OPT_KEY.key, "uuid"). - option(PARTITIONPATH_FIELD_OPT_KEY.key, "partitionpath"). + option(PRECOMBINE_FIELD.key, "ts"). + option(RECORDKEY_FIELD.key, "uuid"). + option(PARTITIONPATH_FIELD.key, "partitionpath"). option(TABLE_NAME.key, tableName). mode(Append). save(tablePath) @@ -143,8 +143,8 @@ object HoodieDataSourceExample { val incViewDF = spark. read. format("org.apache.hudi"). - option(QUERY_TYPE_OPT_KEY.key, QUERY_TYPE_INCREMENTAL_OPT_VAL). - option(BEGIN_INSTANTTIME_OPT_KEY.key, beginTime). + option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL). + option(BEGIN_INSTANTTIME.key, beginTime). load(tablePath) incViewDF.createOrReplaceTempView("hudi_incr_table") spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0").show() @@ -163,9 +163,9 @@ object HoodieDataSourceExample { //incrementally query data val incViewDF = spark.read.format("org.apache.hudi"). - option(QUERY_TYPE_OPT_KEY.key, QUERY_TYPE_INCREMENTAL_OPT_VAL). - option(BEGIN_INSTANTTIME_OPT_KEY.key, beginTime). - option(END_INSTANTTIME_OPT_KEY.key, endTime). + option(QUERY_TYPE.key, QUERY_TYPE_INCREMENTAL_OPT_VAL). + option(BEGIN_INSTANTTIME.key, beginTime). + option(END_INSTANTTIME.key, endTime). load(tablePath) incViewDF.createOrReplaceTempView("hudi_incr_table") spark.sql("select `_hoodie_commit_time`, fare, begin_lon, begin_lat, ts from hudi_incr_table where fare > 20.0").show() diff --git a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala index a35f22075..ace086703 100644 --- a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala +++ b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieMorCompactionJob.scala @@ -19,7 +19,7 @@ package org.apache.hudi.examples.spark -import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD_OPT_KEY, PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY, TABLE_TYPE_OPT_KEY} +import org.apache.hudi.DataSourceWriteOptions.{PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD, TABLE_TYPE} import org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs import org.apache.hudi.client.SparkRDDWriteClient import org.apache.hudi.client.common.HoodieSparkEngineContext @@ -86,11 +86,11 @@ object HoodieMorCompactionJob { val df = spark.read.json(spark.sparkContext.parallelize(inserts.asScala, 1)) df.write.format("org.apache.hudi"). options(getQuickstartWriteConfigs). - option(PRECOMBINE_FIELD_OPT_KEY.key, "ts"). - option(RECORDKEY_FIELD_OPT_KEY.key, "uuid"). - option(PARTITIONPATH_FIELD_OPT_KEY.key, "partitionpath"). + option(PRECOMBINE_FIELD.key, "ts"). + option(RECORDKEY_FIELD.key, "uuid"). + option(PARTITIONPATH_FIELD.key, "partitionpath"). option(TABLE_NAME.key, tableName). - option(TABLE_TYPE_OPT_KEY.key, tableType). + option(TABLE_TYPE.key, tableType). mode(Overwrite). save(tablePath) } @@ -102,11 +102,11 @@ object HoodieMorCompactionJob { val df = spark.read.json(spark.sparkContext.parallelize(updates.asScala, 1)) df.write.format("org.apache.hudi"). options(getQuickstartWriteConfigs). - option(PRECOMBINE_FIELD_OPT_KEY.key, "ts"). - option(RECORDKEY_FIELD_OPT_KEY.key, "uuid"). - option(PARTITIONPATH_FIELD_OPT_KEY.key, "partitionpath"). + option(PRECOMBINE_FIELD.key, "ts"). + option(RECORDKEY_FIELD.key, "uuid"). + option(PARTITIONPATH_FIELD.key, "partitionpath"). option(TABLE_NAME.key, tableName). - option(TABLE_TYPE_OPT_KEY.key, tableType). + option(TABLE_TYPE.key, tableType). mode(Append). save(tablePath) } diff --git a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java index 44aeb9036..f30e8b23b 100644 --- a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java +++ b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java @@ -247,7 +247,7 @@ public class FlinkOptions extends HoodieConfig { + "By default true (in favor of streaming progressing over data integrity)"); public static final ConfigOption RECORD_KEY_FIELD = ConfigOptions - .key(KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY.key()) + .key(KeyGeneratorOptions.RECORDKEY_FIELD.key()) .stringType() .defaultValue("uuid") .withDescription("Record key field. Value to be used as the `recordKey` component of `HoodieKey`.\n" @@ -255,20 +255,20 @@ public class FlinkOptions extends HoodieConfig { + "the dot notation eg: `a.b.c`"); public static final ConfigOption PARTITION_PATH_FIELD = ConfigOptions - .key(KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY.key()) + .key(KeyGeneratorOptions.PARTITIONPATH_FIELD.key()) .stringType() .defaultValue("") .withDescription("Partition path field. Value to be used at the `partitionPath` component of `HoodieKey`.\n" + "Actual value obtained by invoking .toString(), default ''"); public static final ConfigOption URL_ENCODE_PARTITIONING = ConfigOptions - .key(KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key()) + .key(KeyGeneratorOptions.URL_ENCODE_PARTITIONING.key()) .booleanType() .defaultValue(false) .withDescription("Whether to encode the partition path url, default false"); public static final ConfigOption HIVE_STYLE_PARTITIONING = ConfigOptions - .key(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key()) + .key(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING.key()) .booleanType() .defaultValue(false) .withDescription("Whether to use Hive style partitioning.\n" diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java index 789c7d082..09e27c257 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java @@ -79,8 +79,8 @@ public class ValidateDatasetNode extends DagNode { log.debug("Listing all Micro batches to be validated :: " + fileStatus.getPath().toString()); } - String recordKeyField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key()); - String partitionPathField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key()); + String recordKeyField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key()); + String partitionPathField = context.getWriterContext().getProps().getString(DataSourceWriteOptions.PARTITIONPATH_FIELD().key()); // todo: fix hard coded fields from configs. // read input and resolve insert, updates, etc. Dataset inputDf = session.read().format("avro").load(inputPath); @@ -112,8 +112,8 @@ public class ValidateDatasetNode extends DagNode { } if (config.isValidateHive()) { - String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key()); - String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key()); + String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE().key()); + String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE().key()); log.warn("Validating hive table with db : " + database + " and table : " + tableName); Dataset cowDf = session.sql("SELECT * FROM " + database + "." + tableName); Dataset trimmedCowDf = cowDf.drop(HoodieRecord.COMMIT_TIME_METADATA_FIELD).drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD).drop(HoodieRecord.RECORD_KEY_METADATA_FIELD) diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala index af3df6f70..d55bb9693 100644 --- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala +++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkBulkInsertNode.scala @@ -53,11 +53,11 @@ class SparkBulkInsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] { val saveMode = if(curItrCount == 0) SaveMode.Overwrite else SaveMode.Append inputDF.write.format("hudi") .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap)) - .option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key(), context.getHoodieTestSuiteWriter.getCfg.tableType) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key(), String.valueOf(config.enableRowWriting())) - .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key(), "deltastreamer.checkpoint.key") + .option(DataSourceWriteOptions.TABLE_NAME.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName) + .option(DataSourceWriteOptions.TABLE_TYPE.key(), context.getHoodieTestSuiteWriter.getCfg.tableType) + .option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.ENABLE_ROW_WRITER.key(), String.valueOf(config.enableRowWriting())) + .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key(), "deltastreamer.checkpoint.key") .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse("")) .option(HoodieWriteConfig.TABLE_NAME.key(), context.getHoodieTestSuiteWriter.getCfg.targetTableName) .mode(saveMode) diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala index a9abe44f4..e9720fd47 100644 --- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala +++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala @@ -53,10 +53,10 @@ class SparkInsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] { context.getWriterContext.getSparkSession) inputDF.write.format("hudi") .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap)) - .option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.tableType) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key, "deltastreamer.checkpoint.key") + .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) + .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key") .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse("")) .option(HoodieWriteConfig.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) .mode(SaveMode.Overwrite) diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala index 7f3b1d32d..3666b3856 100644 --- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala +++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala @@ -53,10 +53,10 @@ class SparkUpsertNode(config1: Config) extends DagNode[RDD[WriteStatus]] { context.getWriterContext.getSparkSession) inputDF.write.format("hudi") .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap)) - .option(DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, context.getHoodieTestSuiteWriter.getCfg.tableType) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY.key, "deltastreamer.checkpoint.key") + .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) + .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key") .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse("")) .option(HoodieWriteConfig.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName) .mode(SaveMode.Append) diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/job/TestHoodieTestSuiteJob.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/job/TestHoodieTestSuiteJob.java index 554b1390c..56aa7af7f 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/job/TestHoodieTestSuiteJob.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/testsuite/job/TestHoodieTestSuiteJob.java @@ -165,11 +165,11 @@ public class TestHoodieTestSuiteJob extends UtilitiesTestBase { // Make path selection test suite specific props.setProperty("hoodie.deltastreamer.source.input.selector", DFSTestSuitePathSelector.class.getName()); // Hive Configs - props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/"); - props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb1"); - props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), "table1"); - props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr"); - props.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), TimestampBasedKeyGenerator.class.getName()); + props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/"); + props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1"); + props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "table1"); + props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr"); + props.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), TimestampBasedKeyGenerator.class.getName()); props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider"); props.setProperty("hoodie.write.lock.hivemetastore.database", "testdb1"); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index 469f9c7b4..700af622e 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -136,9 +136,9 @@ public class DataSourceUtils { public static Map getExtraMetadata(Map properties) { Map extraMetadataMap = new HashMap<>(); - if (properties.containsKey(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key())) { + if (properties.containsKey(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key())) { properties.entrySet().forEach(entry -> { - if (entry.getKey().startsWith(properties.get(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key()))) { + if (entry.getKey().startsWith(properties.get(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()))) { extraMetadataMap.put(entry.getKey(), entry.getValue()); } }); @@ -169,13 +169,13 @@ public class DataSourceUtils { public static HoodieWriteConfig createHoodieConfig(String schemaStr, String basePath, String tblName, Map parameters) { - boolean asyncCompact = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY().key())); - boolean inlineCompact = !asyncCompact && parameters.get(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key()) + boolean asyncCompact = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key())); + boolean inlineCompact = !asyncCompact && parameters.get(DataSourceWriteOptions.TABLE_TYPE().key()) .equals(DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL()); - boolean asyncClusteringEnabled = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY().key())); - boolean inlineClusteringEnabled = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INLINE_CLUSTERING_ENABLE_OPT_KEY().key())); + boolean asyncClusteringEnabled = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key())); + boolean inlineClusteringEnabled = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INLINE_CLUSTERING_ENABLE().key())); // insert/bulk-insert combining to be true, if filtering for duplicates - boolean combineInserts = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY().key())); + boolean combineInserts = Boolean.parseBoolean(parameters.get(DataSourceWriteOptions.INSERT_DROP_DUPS().key())); HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder() .withPath(basePath).withAutoCommit(false).combineInput(combineInserts, true); if (schemaStr != null) { @@ -185,12 +185,12 @@ public class DataSourceUtils { return builder.forTable(tblName) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build()) .withCompactionConfig(HoodieCompactionConfig.newBuilder() - .withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY().key())) + .withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS().key())) .withInlineCompaction(inlineCompact).build()) .withClusteringConfig(HoodieClusteringConfig.newBuilder() .withInlineClustering(inlineClusteringEnabled) .withAsyncClustering(asyncClusteringEnabled).build()) - .withPayloadConfig(HoodiePayloadConfig.newBuilder().withPayloadOrderingField(parameters.get(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key())) + .withPayloadConfig(HoodiePayloadConfig.newBuilder().withPayloadOrderingField(parameters.get(DataSourceWriteOptions.PRECOMBINE_FIELD().key())) .build()) // override above with Hoodie configs specified as options. .withProps(parameters).build(); @@ -268,33 +268,33 @@ public class DataSourceUtils { } public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String basePath, String baseFileFormat) { - checkRequiredProperties(props, Collections.singletonList(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key())); + checkRequiredProperties(props, Collections.singletonList(DataSourceWriteOptions.HIVE_TABLE().key())); HiveSyncConfig hiveSyncConfig = new HiveSyncConfig(); hiveSyncConfig.basePath = basePath; hiveSyncConfig.usePreApacheInputFormat = - props.getBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY().key(), - Boolean.parseBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY().defaultValue())); - hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), - DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().defaultValue()); - hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key()); + props.getBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT().key(), + Boolean.parseBoolean(DataSourceWriteOptions.HIVE_USE_PRE_APACHE_INPUT_FORMAT().defaultValue())); + hiveSyncConfig.databaseName = props.getString(DataSourceWriteOptions.HIVE_DATABASE().key(), + DataSourceWriteOptions.HIVE_DATABASE().defaultValue()); + hiveSyncConfig.tableName = props.getString(DataSourceWriteOptions.HIVE_TABLE().key()); hiveSyncConfig.baseFileFormat = baseFileFormat; hiveSyncConfig.hiveUser = - props.getString(DataSourceWriteOptions.HIVE_USER_OPT_KEY().key(), DataSourceWriteOptions.HIVE_USER_OPT_KEY().defaultValue()); + props.getString(DataSourceWriteOptions.HIVE_USER().key(), DataSourceWriteOptions.HIVE_USER().defaultValue()); hiveSyncConfig.hivePass = - props.getString(DataSourceWriteOptions.HIVE_PASS_OPT_KEY().key(), DataSourceWriteOptions.HIVE_PASS_OPT_KEY().defaultValue()); + props.getString(DataSourceWriteOptions.HIVE_PASS().key(), DataSourceWriteOptions.HIVE_PASS().defaultValue()); hiveSyncConfig.jdbcUrl = - props.getString(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), DataSourceWriteOptions.HIVE_URL_OPT_KEY().defaultValue()); + props.getString(DataSourceWriteOptions.HIVE_URL().key(), DataSourceWriteOptions.HIVE_URL().defaultValue()); hiveSyncConfig.partitionFields = - props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), ",", new ArrayList<>()); + props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), ",", new ArrayList<>()); hiveSyncConfig.partitionValueExtractorClass = - props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + props.getString(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), SlashEncodedDayPartitionValueExtractor.class.getName()); - hiveSyncConfig.useJdbc = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_USE_JDBC_OPT_KEY().key(), - DataSourceWriteOptions.HIVE_USE_JDBC_OPT_KEY().defaultValue())); - hiveSyncConfig.autoCreateDatabase = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE_OPT_KEY().key(), - DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE_OPT_KEY().defaultValue())); - hiveSyncConfig.ignoreExceptions = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS_OPT_KEY().key(), - DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS_OPT_KEY().defaultValue())); + hiveSyncConfig.useJdbc = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_USE_JDBC().key(), + DataSourceWriteOptions.HIVE_USE_JDBC().defaultValue())); + hiveSyncConfig.autoCreateDatabase = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE().key(), + DataSourceWriteOptions.HIVE_AUTO_CREATE_DATABASE().defaultValue())); + hiveSyncConfig.ignoreExceptions = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().key(), + DataSourceWriteOptions.HIVE_IGNORE_EXCEPTIONS().defaultValue())); hiveSyncConfig.skipROSuffix = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX().key(), DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX().defaultValue())); hiveSyncConfig.supportTimestamp = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP().key(), diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/BulkInsertDataInternalWriterHelper.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/BulkInsertDataInternalWriterHelper.java index 29c7d862e..0a845f701 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/BulkInsertDataInternalWriterHelper.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/internal/BulkInsertDataInternalWriterHelper.java @@ -106,7 +106,7 @@ public class BulkInsertDataInternalWriterHelper { private Option getKeyGenerator(Properties properties) { TypedProperties typedProperties = new TypedProperties(); typedProperties.putAll(properties); - if (properties.get(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key()).equals(NonpartitionedKeyGenerator.class.getName())) { + if (properties.get(DataSourceWriteOptions.KEYGENERATOR_CLASS().key()).equals(NonpartitionedKeyGenerator.class.getName())) { return Option.empty(); // Do not instantiate NonPartitionKeyGen } else { try { diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index fa1a1daa1..3c82c44c9 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -17,7 +17,7 @@ package org.apache.hudi -import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE_OPT_KEY, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL} +import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL} import org.apache.hudi.common.config.ConfigProperty import org.apache.hudi.common.fs.ConsistencyGuardConfig import org.apache.hudi.common.model.{HoodieTableType, WriteOperationType} @@ -45,7 +45,7 @@ object DataSourceReadOptions { val QUERY_TYPE_SNAPSHOT_OPT_VAL = "snapshot" val QUERY_TYPE_READ_OPTIMIZED_OPT_VAL = "read_optimized" val QUERY_TYPE_INCREMENTAL_OPT_VAL = "incremental" - val QUERY_TYPE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val QUERY_TYPE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.query.type") .defaultValue(QUERY_TYPE_SNAPSHOT_OPT_VAL) .withAlternatives("hoodie.datasource.view.type") @@ -55,14 +55,14 @@ object DataSourceReadOptions { val REALTIME_SKIP_MERGE_OPT_VAL = "skip_merge" val REALTIME_PAYLOAD_COMBINE_OPT_VAL = "payload_combine" - val REALTIME_MERGE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val REALTIME_MERGE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.merge.type") .defaultValue(REALTIME_PAYLOAD_COMBINE_OPT_VAL) .withDocumentation("For Snapshot query on merge on read table, control whether we invoke the record " + s"payload implementation to merge (${REALTIME_PAYLOAD_COMBINE_OPT_VAL}) or skip merging altogether" + s"${REALTIME_SKIP_MERGE_OPT_VAL}") - val READ_PATHS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val READ_PATHS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.paths") .noDefaultValue() .withDocumentation("Comma separated list of file paths to read within a Hudi table.") @@ -86,32 +86,32 @@ object DataSourceReadOptions { @Deprecated val DEFAULT_VIEW_TYPE_OPT_VAL = VIEW_TYPE_READ_OPTIMIZED_OPT_VAL - val BEGIN_INSTANTTIME_OPT_KEY: ConfigProperty[String] = ConfigProperty + val BEGIN_INSTANTTIME: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.begin.instanttime") .noDefaultValue() .withDocumentation("Instant time to start incrementally pulling data from. The instanttime here need not necessarily " + "correspond to an instant on the timeline. New data written with an instant_time > BEGIN_INSTANTTIME are fetched out. " + "For e.g: ‘20170901080000’ will get all new data written after Sep 1, 2017 08:00AM.") - val END_INSTANTTIME_OPT_KEY: ConfigProperty[String] = ConfigProperty + val END_INSTANTTIME: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.end.instanttime") .noDefaultValue() .withDocumentation("Instant time to limit incrementally fetched data to. " + "New data written with an instant_time <= END_INSTANTTIME are fetched out.") - val INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME_OPT_KEY: ConfigProperty[String] = ConfigProperty + val INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.schema.use.end.instanttime") .defaultValue("false") .withDocumentation("Uses end instant schema when incrementally fetched data to. Default: users latest instant schema.") - val PUSH_DOWN_INCR_FILTERS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val PUSH_DOWN_INCR_FILTERS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.incr.filters") .defaultValue("") .withDocumentation("For use-cases like DeltaStreamer which reads from Hoodie Incremental table and applies " + "opaque map functions, filters appearing late in the sequence of transformations cannot be automatically " + "pushed down. This option allows setting filters directly on Hoodie Source.") - val INCR_PATH_GLOB_OPT_KEY: ConfigProperty[String] = ConfigProperty + val INCR_PATH_GLOB: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.read.incr.path.glob") .defaultValue("") .withDocumentation("For the use-cases like users only want to incremental pull from certain partitions " @@ -132,7 +132,7 @@ object DataSourceWriteOptions { val BOOTSTRAP_OPERATION_OPT_VAL = WriteOperationType.BOOTSTRAP.value val INSERT_OVERWRITE_OPERATION_OPT_VAL = WriteOperationType.INSERT_OVERWRITE.value val INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL = WriteOperationType.INSERT_OVERWRITE_TABLE.value - val OPERATION_OPT_KEY: ConfigProperty[String] = ConfigProperty + val OPERATION: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.operation") .defaultValue(UPSERT_OPERATION_OPT_VAL) .withDocumentation("Whether to do upsert, insert or bulkinsert for the write operation. " + @@ -141,14 +141,14 @@ object DataSourceWriteOptions { val COW_TABLE_TYPE_OPT_VAL = HoodieTableType.COPY_ON_WRITE.name val MOR_TABLE_TYPE_OPT_VAL = HoodieTableType.MERGE_ON_READ.name - val TABLE_TYPE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val TABLE_TYPE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.table.type") .defaultValue(COW_TABLE_TYPE_OPT_VAL) .withAlternatives("hoodie.datasource.write.storage.type") .withDocumentation("The table type for the underlying data, for this write. This can’t change between writes.") @Deprecated - val STORAGE_TYPE_OPT_KEY = "hoodie.datasource.write.storage.type" + val STORAGE_TYPE_OPT = "hoodie.datasource.write.storage.type" @Deprecated val COW_STORAGE_TYPE_OPT_VAL = HoodieTableType.COPY_ON_WRITE.name @Deprecated @@ -164,12 +164,12 @@ object DataSourceWriteOptions { */ def translateSqlOptions(optParams: Map[String, String]): Map[String, String] = { var translatedOptParams = optParams - // translate the api partitionBy of spark DataFrameWriter to PARTITIONPATH_FIELD_OPT_KEY + // translate the api partitionBy of spark DataFrameWriter to PARTITIONPATH_FIELD if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)) { val partitionColumns = optParams.get(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY) .map(SparkDataSourceUtils.decodePartitioningColumns) .getOrElse(Nil) - val keyGeneratorClass = optParams.getOrElse(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key(), + val keyGeneratorClass = optParams.getOrElse(DataSourceWriteOptions.KEYGENERATOR_CLASS.key(), DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL) val partitionPathField = @@ -188,12 +188,12 @@ object DataSourceWriteOptions { case _ => partitionColumns.mkString(",") } - translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD_OPT_KEY.key -> partitionPathField) + translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD.key -> partitionPathField) } translatedOptParams } - val TABLE_NAME_OPT_KEY: ConfigProperty[String] = ConfigProperty + val TABLE_NAME: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.table.name") .noDefaultValue() .withDocumentation("Table name for the datasource write. Also used to register the table into meta stores.") @@ -203,13 +203,13 @@ object DataSourceWriteOptions { * key value, we will pick the one with the largest value for the precombine field, * determined by Object.compareTo(..) */ - val PRECOMBINE_FIELD_OPT_KEY = HoodieWriteConfig.PRECOMBINE_FIELD_PROP + val PRECOMBINE_FIELD = HoodieWriteConfig.PRECOMBINE_FIELD_PROP /** * Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting. * This will render any value set for `PRECOMBINE_FIELD_OPT_VAL` in-effective */ - val PAYLOAD_CLASS_OPT_KEY = HoodieWriteConfig.WRITE_PAYLOAD_CLASS + val PAYLOAD_CLASS = HoodieWriteConfig.WRITE_PAYLOAD_CLASS /** * Record key field. Value to be used as the `recordKey` component of `HoodieKey`. Actual value @@ -217,56 +217,56 @@ object DataSourceWriteOptions { * the dot notation eg: `a.b.c` * */ - val RECORDKEY_FIELD_OPT_KEY = KeyGeneratorOptions.RECORDKEY_FIELD_OPT_KEY + val RECORDKEY_FIELD = KeyGeneratorOptions.RECORDKEY_FIELD /** * Partition path field. Value to be used at the `partitionPath` component of `HoodieKey`. Actual * value obtained by invoking .toString() */ - val PARTITIONPATH_FIELD_OPT_KEY = KeyGeneratorOptions.PARTITIONPATH_FIELD_OPT_KEY + val PARTITIONPATH_FIELD = KeyGeneratorOptions.PARTITIONPATH_FIELD /** * Flag to indicate whether to use Hive style partitioning. * If set true, the names of partition folders follow = format. * By default false (the names of partition folders are only partition values) */ - val HIVE_STYLE_PARTITIONING_OPT_KEY = KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_OPT_KEY - val URL_ENCODE_PARTITIONING_OPT_KEY = KeyGeneratorOptions.URL_ENCODE_PARTITIONING_OPT_KEY + val HIVE_STYLE_PARTITIONING = KeyGeneratorOptions.HIVE_STYLE_PARTITIONING + val URL_ENCODE_PARTITIONING = KeyGeneratorOptions.URL_ENCODE_PARTITIONING /** * Key generator class, that implements will extract the key out of incoming record * */ - val KEYGENERATOR_CLASS_OPT_KEY = HoodieWriteConfig.KEYGENERATOR_CLASS_PROP + val KEYGENERATOR_CLASS = HoodieWriteConfig.KEYGENERATOR_CLASS_PROP val DEFAULT_KEYGENERATOR_CLASS_OPT_VAL = classOf[SimpleKeyGenerator].getName /** * * By default, false (will be enabled as default in a future release) */ - val ENABLE_ROW_WRITER_OPT_KEY: ConfigProperty[String] = ConfigProperty + val ENABLE_ROW_WRITER: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.row.writer.enable") .defaultValue("false") .withDocumentation("When set to true, will perform write operations directly using the spark native " + "`Row` representation, avoiding any additional conversion costs.") - val COMMIT_METADATA_KEYPREFIX_OPT_KEY: ConfigProperty[String] = ConfigProperty + val COMMIT_METADATA_KEYPREFIX: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.commitmeta.key.prefix") .defaultValue("_") .withDocumentation("Option keys beginning with this prefix, are automatically added to the commit/deltacommit metadata. " + "This is useful to store checkpointing information, in a consistent way with the hudi timeline") - val INSERT_DROP_DUPS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val INSERT_DROP_DUPS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.insert.drop.duplicates") .defaultValue("false") .withDocumentation("If set to true, filters out all duplicate records from incoming dataframe, during insert operations.") - val STREAMING_RETRY_CNT_OPT_KEY: ConfigProperty[String] = ConfigProperty + val STREAMING_RETRY_CNT: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.streaming.retry.count") .defaultValue("3") .withDocumentation("Config to indicate how many times streaming job should retry for a failed micro batch.") - val STREAMING_RETRY_INTERVAL_MS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val STREAMING_RETRY_INTERVAL_MS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.streaming.retry.interval.ms") .defaultValue("2000") .withDocumentation(" Config to indicate how long (by millisecond) before a retry should issued for failed microbatch") @@ -275,7 +275,7 @@ object DataSourceWriteOptions { * * By default true (in favor of streaming progressing over data integrity) */ - val STREAMING_IGNORE_FAILED_BATCH_OPT_KEY: ConfigProperty[String] = ConfigProperty + val STREAMING_IGNORE_FAILED_BATCH: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.write.streaming.ignore.failed.batch") .defaultValue("true") .withDocumentation("Config to indicate whether to ignore any non exception error (e.g. writestatus error)" @@ -289,77 +289,77 @@ object DataSourceWriteOptions { // HIVE SYNC SPECIFIC CONFIGS // NOTE: DO NOT USE uppercase for the keys as they are internally lower-cased. Using upper-cases causes // unexpected issues with config getting reset - val HIVE_SYNC_ENABLED_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_SYNC_ENABLED: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.enable") .defaultValue("false") .withDocumentation("When set to true, register/sync the table to Apache Hive metastore") - val META_SYNC_ENABLED_OPT_KEY: ConfigProperty[String] = ConfigProperty + val META_SYNC_ENABLED: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.meta.sync.enable") .defaultValue("false") .withDocumentation("") - val HIVE_DATABASE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_DATABASE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.database") .defaultValue("default") .withDocumentation("database to sync to") - val HIVE_TABLE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_TABLE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.table") .defaultValue("unknown") .withDocumentation("table to sync to") - val HIVE_BASE_FILE_FORMAT_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_BASE_FILE_FORMAT: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.base_file_format") .defaultValue("PARQUET") .withDocumentation("Base file format for the sync.") - val HIVE_USER_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_USER: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.username") .defaultValue("hive") .withDocumentation("hive user name to use") - val HIVE_PASS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_PASS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.password") .defaultValue("hive") .withDocumentation("hive password to use") - val HIVE_URL_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_URL: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.jdbcurl") .defaultValue("jdbc:hive2://localhost:10000") .withDocumentation("Hive metastore url") - val HIVE_PARTITION_FIELDS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_PARTITION_FIELDS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.partition_fields") .defaultValue("") .withDocumentation("field in the table to use for determining hive partition columns.") - val HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_PARTITION_EXTRACTOR_CLASS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.partition_extractor_class") .defaultValue(classOf[SlashEncodedDayPartitionValueExtractor].getCanonicalName) .withDocumentation("") - val HIVE_ASSUME_DATE_PARTITION_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_ASSUME_DATE_PARTITION: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.assume_date_partitioning") .defaultValue("false") .withDocumentation("Assume partitioning is yyyy/mm/dd") - val HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_USE_PRE_APACHE_INPUT_FORMAT: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.use_pre_apache_input_format") .defaultValue("false") .withDocumentation("") - val HIVE_USE_JDBC_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_USE_JDBC: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.use_jdbc") .defaultValue("true") .withDocumentation("Use JDBC when hive synchronization is enabled") - val HIVE_AUTO_CREATE_DATABASE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_AUTO_CREATE_DATABASE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.auto_create_database") .defaultValue("true") .withDocumentation("Auto create hive database if does not exists") - val HIVE_IGNORE_EXCEPTIONS_OPT_KEY: ConfigProperty[String] = ConfigProperty + val HIVE_IGNORE_EXCEPTIONS: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.ignore_exceptions") .defaultValue("false") .withDocumentation("") @@ -402,18 +402,18 @@ object DataSourceWriteOptions { .withDocumentation("The number of partitions one batch when synchronous partitions to hive.") // Async Compaction - Enabled by default for MOR - val ASYNC_COMPACT_ENABLE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val ASYNC_COMPACT_ENABLE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.compaction.async.enable") .defaultValue("true") .withDocumentation("Controls whether async compaction should be turned on for MOR table writing.") - val INLINE_CLUSTERING_ENABLE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val INLINE_CLUSTERING_ENABLE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.clustering.inline.enable") .defaultValue("false") .sinceVersion("0.9.0") .withDocumentation("Enable inline clustering. Disabled by default.") - val ASYNC_CLUSTERING_ENABLE_OPT_KEY: ConfigProperty[String] = ConfigProperty + val ASYNC_CLUSTERING_ENABLE: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.clustering.async.enable") .defaultValue("false") .sinceVersion("0.9.0") @@ -432,8 +432,8 @@ object DataSourceOptionsHelper { // put all the configs with alternatives here val allConfigsWithAlternatives = List( - DataSourceReadOptions.QUERY_TYPE_OPT_KEY, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, + DataSourceReadOptions.QUERY_TYPE, + DataSourceWriteOptions.TABLE_TYPE, HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP, HoodieTableConfig.HOODIE_LOG_FILE_FORMAT_PROP ) @@ -461,7 +461,7 @@ object DataSourceOptionsHelper { if (allAlternatives.contains(opt) && !optParams.contains(allAlternatives(opt))) { log.warn(opt + " is deprecated and will be removed in a later release; Please use " + allAlternatives(opt)) if (opt == DataSourceReadOptions.VIEW_TYPE_OPT_KEY) { - // special handle for VIEW_TYPE_OPT_KEY, also need to translate its values + // special handle for VIEW_TYPE, also need to translate its values translatedOpt ++= Map(allAlternatives(opt) -> viewTypeValueMap(optParams(opt))) } else { translatedOpt ++= Map(allAlternatives(opt) -> optParams(opt)) @@ -476,13 +476,13 @@ object DataSourceOptionsHelper { def parametersWithReadDefaults(parameters: Map[String, String]): Map[String, String] = { // First check if the ConfigUtils.IS_QUERY_AS_RO_TABLE has set by HiveSyncTool, - // or else use query type from QUERY_TYPE_OPT_KEY. + // or else use query type from QUERY_TYPE. val queryType = parameters.get(ConfigUtils.IS_QUERY_AS_RO_TABLE) .map(is => if (is.toBoolean) QUERY_TYPE_READ_OPTIMIZED_OPT_VAL else QUERY_TYPE_SNAPSHOT_OPT_VAL) - .getOrElse(parameters.getOrElse(QUERY_TYPE_OPT_KEY.key, QUERY_TYPE_OPT_KEY.defaultValue())) + .getOrElse(parameters.getOrElse(QUERY_TYPE.key, QUERY_TYPE.defaultValue())) Map( - QUERY_TYPE_OPT_KEY.key -> queryType + QUERY_TYPE.key -> queryType ) ++ translateConfigurations(parameters) } } diff --git a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java index fda60d931..690f613ae 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java +++ b/hudi-spark-datasource/hudi-spark-common/src/test/java/org/apache/hudi/internal/HoodieBulkInsertInternalWriterTestBase.java @@ -72,9 +72,9 @@ public class HoodieBulkInsertInternalWriterTestBase extends HoodieClientTestHarn protected HoodieWriteConfig getWriteConfig(boolean populateMetaFields) { Properties properties = new Properties(); if (!populateMetaFields) { - properties.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), SimpleKeyGenerator.class.getName()); - properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); - properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), SimpleKeyGenerator.class.getName()); + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), SparkDatasetTestUtils.RECORD_KEY_FIELD_NAME); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), SparkDatasetTestUtils.PARTITION_PATH_FIELD_NAME); properties.setProperty(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), "false"); } return getConfigBuilder(basePath).withProperties(properties).build(); diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java index 5b501eb3e..c9639851c 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java +++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/HoodieDatasetBulkInsertHelper.java @@ -77,7 +77,7 @@ public class HoodieDatasetBulkInsertHelper { TypedProperties properties = new TypedProperties(); properties.putAll(config.getProps()); - String keyGeneratorClass = properties.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key()); + String keyGeneratorClass = properties.getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key()); BuiltinKeyGenerator keyGenerator = (BuiltinKeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, properties); StructType structTypeForUDF = rows.schema(); diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala index 04e6b01a4..a5098d6dc 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala @@ -20,7 +20,7 @@ package org.apache.hudi import org.apache.hadoop.fs.Path import org.apache.hudi.DataSourceReadOptions._ import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord} -import org.apache.hudi.DataSourceWriteOptions.{BOOTSTRAP_OPERATION_OPT_VAL, OPERATION_OPT_KEY} +import org.apache.hudi.DataSourceWriteOptions.{BOOTSTRAP_OPERATION_OPT_VAL, OPERATION} import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.HoodieTableType.{COPY_ON_WRITE, MERGE_ON_READ} import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver} @@ -75,9 +75,9 @@ class DefaultSource extends RelationProvider val parameters = DataSourceOptionsHelper.parametersWithReadDefaults(optParams) val path = parameters.get("path") - val readPathsStr = parameters.get(DataSourceReadOptions.READ_PATHS_OPT_KEY.key) + val readPathsStr = parameters.get(DataSourceReadOptions.READ_PATHS.key) if (path.isEmpty && readPathsStr.isEmpty) { - throw new HoodieException(s"'path' or '$READ_PATHS_OPT_KEY' or both must be specified.") + throw new HoodieException(s"'path' or '$READ_PATHS' or both must be specified.") } val readPaths = readPathsStr.map(p => p.split(",").toSeq).getOrElse(Seq()) @@ -89,7 +89,7 @@ class DefaultSource extends RelationProvider val enableFileIndex = optParams.get(ENABLE_HOODIE_FILE_INDEX.key) .map(_.toBoolean).getOrElse(ENABLE_HOODIE_FILE_INDEX.defaultValue) val useHoodieFileIndex = enableFileIndex && path.isDefined && !path.get.contains("*") && - !parameters.contains(DataSourceReadOptions.READ_PATHS_OPT_KEY.key) + !parameters.contains(DataSourceReadOptions.READ_PATHS.key) val globPaths = if (useHoodieFileIndex) { None } else { @@ -106,7 +106,7 @@ class DefaultSource extends RelationProvider val metaClient = HoodieTableMetaClient.builder().setConf(fs.getConf).setBasePath(tablePath).build() val isBootstrappedTable = metaClient.getTableConfig.getBootstrapBasePath.isPresent val tableType = metaClient.getTableType - val queryType = parameters(QUERY_TYPE_OPT_KEY.key) + val queryType = parameters(QUERY_TYPE.key) log.info(s"Is bootstrapped table => $isBootstrappedTable, tableType is: $tableType, queryType is: $queryType") @@ -159,7 +159,7 @@ class DefaultSource extends RelationProvider val translatedOptions = DataSourceWriteOptions.translateSqlOptions(parameters) val dfWithoutMetaCols = df.drop(HoodieRecord.HOODIE_META_COLUMNS.asScala:_*) - if (translatedOptions(OPERATION_OPT_KEY.key).equals(BOOTSTRAP_OPERATION_OPT_VAL)) { + if (translatedOptions(OPERATION.key).equals(BOOTSTRAP_OPERATION_OPT_VAL)) { HoodieSparkSqlWriter.bootstrap(sqlContext, mode, translatedOptions, dfWithoutMetaCols) } else { HoodieSparkSqlWriter.write(sqlContext, mode, translatedOptions, dfWithoutMetaCols) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala index 7253143f3..5543ea161 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieFileIndex.scala @@ -21,7 +21,7 @@ import java.util.Properties import scala.collection.JavaConverters._ import org.apache.hadoop.fs.{FileStatus, Path} -import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE_OPT_KEY, QUERY_TYPE_SNAPSHOT_OPT_VAL} +import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_SNAPSHOT_OPT_VAL} import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.common.config.HoodieMetadataConfig import org.apache.hudi.common.fs.FSUtils @@ -77,7 +77,7 @@ case class HoodieFileIndex( @transient private val queryPath = new Path(options.getOrElse("path", "'path' option required")) - private val queryType = options(QUERY_TYPE_OPT_KEY.key()) + private val queryType = options(QUERY_TYPE.key()) private val tableType = metaClient.getTableType @@ -370,7 +370,7 @@ case class HoodieFileIndex( } else { // If partitionSeqs.length == partitionSchema.fields.length // Append partition name to the partition value if the - // HIVE_STYLE_PARTITIONING_OPT_KEY is disable. + // HIVE_STYLE_PARTITIONING is disable. // e.g. convert "/xx/xx/2021/02" to "/xx/xx/year=2021/month=02" val partitionWithName = partitionFragments.zip(partitionSchema).map { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala index c283e93ba..e132471f7 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala @@ -85,17 +85,17 @@ object HoodieSparkSqlWriter { case Some(ser) if ser.equals("org.apache.spark.serializer.KryoSerializer") => case _ => throw new HoodieException("hoodie only support org.apache.spark.serializer.KryoSerializer as spark.serializer") } - val tableType = HoodieTableType.valueOf(hoodieConfig.getString(TABLE_TYPE_OPT_KEY)) - var operation = WriteOperationType.fromValue(hoodieConfig.getString(OPERATION_OPT_KEY)) - // It does not make sense to allow upsert() operation if INSERT_DROP_DUPS_OPT_KEY is true - // Auto-correct the operation to "insert" if OPERATION_OPT_KEY is set to "upsert" wrongly + val tableType = HoodieTableType.valueOf(hoodieConfig.getString(TABLE_TYPE)) + var operation = WriteOperationType.fromValue(hoodieConfig.getString(OPERATION)) + // It does not make sense to allow upsert() operation if INSERT_DROP_DUPS is true + // Auto-correct the operation to "insert" if OPERATION is set to "upsert" wrongly // or not set (in which case it will be set as "upsert" by parametersWithWriteDefaults()) . - if (hoodieConfig.getBoolean(INSERT_DROP_DUPS_OPT_KEY) && + if (hoodieConfig.getBoolean(INSERT_DROP_DUPS) && operation == WriteOperationType.UPSERT) { log.warn(s"$UPSERT_OPERATION_OPT_VAL is not applicable " + - s"when $INSERT_DROP_DUPS_OPT_KEY is set to be true, " + - s"overriding the $OPERATION_OPT_KEY to be $INSERT_OPERATION_OPT_VAL") + s"when $INSERT_DROP_DUPS is set to be true, " + + s"overriding the $OPERATION to be $INSERT_OPERATION_OPT_VAL") operation = WriteOperationType.INSERT } @@ -119,7 +119,7 @@ object HoodieSparkSqlWriter { val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP) val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP) val partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator) - val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY) + val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD) val populateMetaFields = parameters.getOrElse(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()).toBoolean val tableMetaClient = HoodieTableMetaClient.withPropertyBuilder() @@ -128,12 +128,12 @@ object HoodieSparkSqlWriter { .setRecordKeyFields(recordKeyFields) .setBaseFileFormat(baseFileFormat) .setArchiveLogFolder(archiveLogFolder) - .setPayloadClassName(hoodieConfig.getString(PAYLOAD_CLASS_OPT_KEY)) - .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null)) + .setPayloadClassName(hoodieConfig.getString(PAYLOAD_CLASS)) + .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD, null)) .setPartitionFields(partitionColumns) .setPopulateMetaFields(populateMetaFields) - .setRecordKeyFields(hoodieConfig.getString(RECORDKEY_FIELD_OPT_KEY)) - .setKeyGeneratorClassProp(hoodieConfig.getString(KEYGENERATOR_CLASS_OPT_KEY)) + .setRecordKeyFields(hoodieConfig.getString(RECORDKEY_FIELD)) + .setKeyGeneratorClassProp(hoodieConfig.getString(KEYGENERATOR_CLASS)) .initTable(sparkContext.hadoopConfiguration, path.get) tableConfig = tableMetaClient.getTableConfig } @@ -142,7 +142,7 @@ object HoodieSparkSqlWriter { // short-circuit if bulk_insert via row is enabled. // scalastyle:off - if (hoodieConfig.getBoolean(ENABLE_ROW_WRITER_OPT_KEY) && + if (hoodieConfig.getBoolean(ENABLE_ROW_WRITER) && operation == WriteOperationType.BULK_INSERT) { val (success, commitTime: common.util.Option[String]) = bulkInsertAsRow(sqlContext, parameters, df, tblName, basePath, path, instantTime, parameters.getOrElse(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), @@ -164,16 +164,16 @@ object HoodieSparkSqlWriter { // Convert to RDD[HoodieRecord] val genericRecords: RDD[GenericRecord] = HoodieSparkUtils.createRdd(df, schema, structName, nameSpace) - val shouldCombine = parameters(INSERT_DROP_DUPS_OPT_KEY.key()).toBoolean || operation.equals(WriteOperationType.UPSERT); + val shouldCombine = parameters(INSERT_DROP_DUPS.key()).toBoolean || operation.equals(WriteOperationType.UPSERT); val hoodieAllIncomingRecords = genericRecords.map(gr => { val hoodieRecord = if (shouldCombine) { - val orderingVal = HoodieAvroUtils.getNestedFieldVal(gr, hoodieConfig.getString(PRECOMBINE_FIELD_OPT_KEY), false) + val orderingVal = HoodieAvroUtils.getNestedFieldVal(gr, hoodieConfig.getString(PRECOMBINE_FIELD), false) .asInstanceOf[Comparable[_]] DataSourceUtils.createHoodieRecord(gr, orderingVal, keyGenerator.getKey(gr), - hoodieConfig.getString(PAYLOAD_CLASS_OPT_KEY)) + hoodieConfig.getString(PAYLOAD_CLASS)) } else { - DataSourceUtils.createHoodieRecord(gr, keyGenerator.getKey(gr), hoodieConfig.getString(PAYLOAD_CLASS_OPT_KEY)) + DataSourceUtils.createHoodieRecord(gr, keyGenerator.getKey(gr), hoodieConfig.getString(PAYLOAD_CLASS)) } hoodieRecord }).toJavaRDD() @@ -192,7 +192,7 @@ object HoodieSparkSqlWriter { } val hoodieRecords = - if (hoodieConfig.getBoolean(INSERT_DROP_DUPS_OPT_KEY)) { + if (hoodieConfig.getBoolean(INSERT_DROP_DUPS)) { DataSourceUtils.dropDuplicates(jsc, hoodieAllIncomingRecords, mapAsJavaMap(parameters)) } else { hoodieAllIncomingRecords @@ -256,7 +256,7 @@ object HoodieSparkSqlWriter { val path = parameters.getOrElse("path", throw new HoodieException("'path' must be set.")) val hoodieConfig = HoodieWriterUtils.convertMapToHoodieConfig(parameters) val tableName = hoodieConfig.getStringOrThrow(HoodieWriteConfig.TABLE_NAME, s"'${HoodieWriteConfig.TABLE_NAME.key}' must be set.") - val tableType = hoodieConfig.getStringOrDefault(TABLE_TYPE_OPT_KEY) + val tableType = hoodieConfig.getStringOrDefault(TABLE_TYPE) val bootstrapBasePath = hoodieConfig.getStringOrThrow(BOOTSTRAP_BASE_PATH_PROP, s"'${BOOTSTRAP_BASE_PATH_PROP.key}' is required for '${BOOTSTRAP_OPERATION_OPT_VAL}'" + " operation'") @@ -286,7 +286,7 @@ object HoodieSparkSqlWriter { if (!tableExists) { val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP) val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters) - val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY) + val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD) val keyGenProp = hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_KEY_GENERATOR_CLASS) val populateMetaFields = parameters.getOrElse(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()).toBoolean @@ -295,8 +295,8 @@ object HoodieSparkSqlWriter { .setTableName(tableName) .setRecordKeyFields(recordKeyFields) .setArchiveLogFolder(archiveLogFolder) - .setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS_OPT_KEY)) - .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null)) + .setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS)) + .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD, null)) .setBootstrapIndexClass(bootstrapIndexClass) .setBootstrapBasePath(bootstrapBasePath) .setPartitionFields(partitionColumns) @@ -334,7 +334,7 @@ object HoodieSparkSqlWriter { val schema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, nameSpace) sparkContext.getConf.registerAvroSchemas(schema) log.info(s"Registered avro schema : ${schema.toString(true)}") - if (parameters(INSERT_DROP_DUPS_OPT_KEY.key).toBoolean) { + if (parameters(INSERT_DROP_DUPS.key).toBoolean) { throw new HoodieException("Dropping duplicates with bulk_insert in row writer path is not supported yet") } val params = parameters.updated(HoodieWriteConfig.AVRO_SCHEMA.key, schema.toString) @@ -382,8 +382,8 @@ object HoodieSparkSqlWriter { + " To use row writer please switch to spark 2 or spark 3") } val hoodieConfig = HoodieWriterUtils.convertMapToHoodieConfig(params) - val hiveSyncEnabled = hoodieConfig.getStringOrDefault(HIVE_SYNC_ENABLED_OPT_KEY).toBoolean - val metaSyncEnabled = hoodieConfig.getStringOrDefault(META_SYNC_ENABLED_OPT_KEY).toBoolean + val hiveSyncEnabled = hoodieConfig.getStringOrDefault(HIVE_SYNC_ENABLED).toBoolean + val metaSyncEnabled = hoodieConfig.getStringOrDefault(META_SYNC_ENABLED).toBoolean val syncHiveSuccess = if (hiveSyncEnabled || metaSyncEnabled) { metaSync(sqlContext.sparkSession, hoodieConfig, basePath, df.schema) @@ -439,26 +439,26 @@ object HoodieSparkSqlWriter { private def buildSyncConfig(basePath: Path, hoodieConfig: HoodieConfig, sqlConf: SQLConf): HiveSyncConfig = { val hiveSyncConfig: HiveSyncConfig = new HiveSyncConfig() hiveSyncConfig.basePath = basePath.toString - hiveSyncConfig.baseFileFormat = hoodieConfig.getString(HIVE_BASE_FILE_FORMAT_OPT_KEY) + hiveSyncConfig.baseFileFormat = hoodieConfig.getString(HIVE_BASE_FILE_FORMAT) hiveSyncConfig.usePreApacheInputFormat = - hoodieConfig.getStringOrDefault(HIVE_USE_PRE_APACHE_INPUT_FORMAT_OPT_KEY).toBoolean - hiveSyncConfig.databaseName = hoodieConfig.getString(HIVE_DATABASE_OPT_KEY) - hiveSyncConfig.tableName = hoodieConfig.getString(HIVE_TABLE_OPT_KEY) - hiveSyncConfig.hiveUser = hoodieConfig.getString(HIVE_USER_OPT_KEY) - hiveSyncConfig.hivePass = hoodieConfig.getString(HIVE_PASS_OPT_KEY) - hiveSyncConfig.jdbcUrl = hoodieConfig.getString(HIVE_URL_OPT_KEY) + hoodieConfig.getStringOrDefault(HIVE_USE_PRE_APACHE_INPUT_FORMAT).toBoolean + hiveSyncConfig.databaseName = hoodieConfig.getString(HIVE_DATABASE) + hiveSyncConfig.tableName = hoodieConfig.getString(HIVE_TABLE) + hiveSyncConfig.hiveUser = hoodieConfig.getString(HIVE_USER) + hiveSyncConfig.hivePass = hoodieConfig.getString(HIVE_PASS) + hiveSyncConfig.jdbcUrl = hoodieConfig.getString(HIVE_URL) hiveSyncConfig.skipROSuffix = hoodieConfig.getStringOrDefault(HIVE_SKIP_RO_SUFFIX, DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX.defaultValue).toBoolean hiveSyncConfig.partitionFields = - ListBuffer(hoodieConfig.getString(HIVE_PARTITION_FIELDS_OPT_KEY).split(",").map(_.trim).filter(!_.isEmpty).toList: _*) - hiveSyncConfig.partitionValueExtractorClass = hoodieConfig.getString(HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY) - hiveSyncConfig.useJdbc = hoodieConfig.getBoolean(HIVE_USE_JDBC_OPT_KEY) + ListBuffer(hoodieConfig.getString(HIVE_PARTITION_FIELDS).split(",").map(_.trim).filter(!_.isEmpty).toList: _*) + hiveSyncConfig.partitionValueExtractorClass = hoodieConfig.getString(HIVE_PARTITION_EXTRACTOR_CLASS) + hiveSyncConfig.useJdbc = hoodieConfig.getBoolean(HIVE_USE_JDBC) hiveSyncConfig.useFileListingFromMetadata = hoodieConfig.getBoolean(HoodieMetadataConfig.METADATA_ENABLE_PROP) hiveSyncConfig.verifyMetadataFileListing = hoodieConfig.getBoolean(HoodieMetadataConfig.METADATA_VALIDATE_PROP) - hiveSyncConfig.ignoreExceptions = hoodieConfig.getStringOrDefault(HIVE_IGNORE_EXCEPTIONS_OPT_KEY).toBoolean + hiveSyncConfig.ignoreExceptions = hoodieConfig.getStringOrDefault(HIVE_IGNORE_EXCEPTIONS).toBoolean hiveSyncConfig.supportTimestamp = hoodieConfig.getStringOrDefault(HIVE_SUPPORT_TIMESTAMP).toBoolean - hiveSyncConfig.autoCreateDatabase = hoodieConfig.getStringOrDefault(HIVE_AUTO_CREATE_DATABASE_OPT_KEY).toBoolean - hiveSyncConfig.decodePartition = hoodieConfig.getStringOrDefault(URL_ENCODE_PARTITIONING_OPT_KEY).toBoolean + hiveSyncConfig.autoCreateDatabase = hoodieConfig.getStringOrDefault(HIVE_AUTO_CREATE_DATABASE).toBoolean + hiveSyncConfig.decodePartition = hoodieConfig.getStringOrDefault(URL_ENCODE_PARTITIONING).toBoolean hiveSyncConfig.batchSyncNum = hoodieConfig.getStringOrDefault(HIVE_BATCH_SYNC_PARTITION_NUM).toInt hiveSyncConfig.syncAsSparkDataSourceTable = hoodieConfig.getStringOrDefault(HIVE_SYNC_AS_DATA_SOURCE_TABLE).toBoolean @@ -472,8 +472,8 @@ object HoodieSparkSqlWriter { private def metaSync(spark: SparkSession, hoodieConfig: HoodieConfig, basePath: Path, schema: StructType): Boolean = { - val hiveSyncEnabled = hoodieConfig.getStringOrDefault(HIVE_SYNC_ENABLED_OPT_KEY).toBoolean - var metaSyncEnabled = hoodieConfig.getStringOrDefault(META_SYNC_ENABLED_OPT_KEY).toBoolean + val hiveSyncEnabled = hoodieConfig.getStringOrDefault(HIVE_SYNC_ENABLED).toBoolean + var metaSyncEnabled = hoodieConfig.getStringOrDefault(META_SYNC_ENABLED).toBoolean var syncClientToolClassSet = scala.collection.mutable.Set[String]() hoodieConfig.getString(META_SYNC_CLIENT_TOOL_CLASS).split(",").foreach(syncClass => syncClientToolClassSet += syncClass) @@ -488,7 +488,7 @@ object HoodieSparkSqlWriter { syncClientToolClassSet.foreach(impl => { val syncSuccess = impl.trim match { case "org.apache.hudi.hive.HiveSyncTool" => { - log.info("Syncing to Hive Metastore (URL: " + hoodieConfig.getString(HIVE_URL_OPT_KEY) + ")") + log.info("Syncing to Hive Metastore (URL: " + hoodieConfig.getString(HIVE_URL) + ")") syncHive(basePath, fs, hoodieConfig, spark.sessionState.conf) true } @@ -524,7 +524,7 @@ object HoodieSparkSqlWriter { if(writeResult.getWriteStatuses.rdd.filter(ws => ws.hasErrors).isEmpty()) { log.info("Proceeding to commit the write.") val metaMap = parameters.filter(kv => - kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX_OPT_KEY.key))) + kv._1.startsWith(parameters(COMMIT_METADATA_KEYPREFIX.key))) val commitSuccess = client.commit(tableInstantInfo.instantTime, writeResult.getWriteStatuses, common.util.Option.of(new util.HashMap[String, String](mapAsJavaMap(metaMap))), @@ -589,7 +589,7 @@ object HoodieSparkSqlWriter { parameters: Map[String, String], configuration: Configuration) : Boolean = { log.info(s"Config.inlineCompactionEnabled ? ${client.getConfig.inlineCompactionEnabled}") if (asyncCompactionTriggerFnDefined && !client.getConfig.inlineCompactionEnabled - && parameters.get(ASYNC_COMPACT_ENABLE_OPT_KEY.key).exists(r => r.toBoolean)) { + && parameters.get(ASYNC_COMPACT_ENABLE.key).exists(r => r.toBoolean)) { tableConfig.getTableType == HoodieTableType.MERGE_ON_READ } else { false @@ -600,7 +600,7 @@ object HoodieSparkSqlWriter { parameters: Map[String, String]) : Boolean = { log.info(s"Config.asyncClusteringEnabled ? ${client.getConfig.isAsyncClusteringEnabled}") asyncClusteringTriggerFnDefined && client.getConfig.isAsyncClusteringEnabled && - parameters.get(ASYNC_CLUSTERING_ENABLE_OPT_KEY.key).exists(r => r.toBoolean) + parameters.get(ASYNC_CLUSTERING_ENABLE.key).exists(r => r.toBoolean) } private def getHoodieTableConfig(sparkContext: SparkContext, diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala index 8dfcbc4f1..b1f8eb57b 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieStreamingSink.scala @@ -48,9 +48,9 @@ class HoodieStreamingSink(sqlContext: SQLContext, private val log = LogManager.getLogger(classOf[HoodieStreamingSink]) - private val retryCnt = options(DataSourceWriteOptions.STREAMING_RETRY_CNT_OPT_KEY.key).toInt - private val retryIntervalMs = options(DataSourceWriteOptions.STREAMING_RETRY_INTERVAL_MS_OPT_KEY.key).toLong - private val ignoreFailedBatch = options(DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH_OPT_KEY.key).toBoolean + private val retryCnt = options(DataSourceWriteOptions.STREAMING_RETRY_CNT.key).toInt + private val retryIntervalMs = options(DataSourceWriteOptions.STREAMING_RETRY_INTERVAL_MS.key).toLong + private val ignoreFailedBatch = options(DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH.key).toBoolean private var isAsyncCompactorServiceShutdownAbnormally = false private var isAsyncClusteringServiceShutdownAbnormally = false @@ -113,7 +113,7 @@ class HoodieStreamingSink(sqlContext: SQLContext, log.error(s"Micro batch id=$batchId threw following exception: ", e) if (ignoreFailedBatch) { log.info(s"Ignore the exception and move on streaming as per " + - s"${DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH_OPT_KEY.key} configuration") + s"${DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH.key} configuration") Success((true, None, None)) } else { if (retryCnt > 1) log.info(s"Retrying the failed micro batch id=$batchId ...") @@ -127,7 +127,7 @@ class HoodieStreamingSink(sqlContext: SQLContext, })) if (ignoreFailedBatch) { log.info(s"Ignore the errors and move on streaming as per " + - s"${DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH_OPT_KEY.key} configuration") + s"${DataSourceWriteOptions.STREAMING_IGNORE_FAILED_BATCH.key} configuration") Success((true, None, None)) } else { if (retryCnt > 1) log.info(s"Retrying the failed micro batch id=$batchId ...") diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala index 3056103a2..0fd299167 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala @@ -46,39 +46,39 @@ object HoodieWriterUtils { * @return */ def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = { - Map(OPERATION_OPT_KEY.key -> OPERATION_OPT_KEY.defaultValue, - TABLE_TYPE_OPT_KEY.key -> TABLE_TYPE_OPT_KEY.defaultValue, - PRECOMBINE_FIELD_OPT_KEY.key -> PRECOMBINE_FIELD_OPT_KEY.defaultValue, - PAYLOAD_CLASS_OPT_KEY.key -> PAYLOAD_CLASS_OPT_KEY.defaultValue, - RECORDKEY_FIELD_OPT_KEY.key -> RECORDKEY_FIELD_OPT_KEY.defaultValue, - PARTITIONPATH_FIELD_OPT_KEY.key -> PARTITIONPATH_FIELD_OPT_KEY.defaultValue, - KEYGENERATOR_CLASS_OPT_KEY.key -> DEFAULT_KEYGENERATOR_CLASS_OPT_VAL, + Map(OPERATION.key -> OPERATION.defaultValue, + TABLE_TYPE.key -> TABLE_TYPE.defaultValue, + PRECOMBINE_FIELD.key -> PRECOMBINE_FIELD.defaultValue, + PAYLOAD_CLASS.key -> PAYLOAD_CLASS.defaultValue, + RECORDKEY_FIELD.key -> RECORDKEY_FIELD.defaultValue, + PARTITIONPATH_FIELD.key -> PARTITIONPATH_FIELD.defaultValue, + KEYGENERATOR_CLASS.key -> DEFAULT_KEYGENERATOR_CLASS_OPT_VAL, METADATA_ENABLE_PROP.key -> METADATA_ENABLE_PROP.defaultValue.toString, METADATA_VALIDATE_PROP.key -> METADATA_VALIDATE_PROP.defaultValue.toString, - COMMIT_METADATA_KEYPREFIX_OPT_KEY.key -> COMMIT_METADATA_KEYPREFIX_OPT_KEY.defaultValue, - INSERT_DROP_DUPS_OPT_KEY.key -> INSERT_DROP_DUPS_OPT_KEY.defaultValue, - STREAMING_RETRY_CNT_OPT_KEY.key -> STREAMING_RETRY_CNT_OPT_KEY.defaultValue, - STREAMING_RETRY_INTERVAL_MS_OPT_KEY.key -> STREAMING_RETRY_INTERVAL_MS_OPT_KEY.defaultValue, - STREAMING_IGNORE_FAILED_BATCH_OPT_KEY.key -> STREAMING_IGNORE_FAILED_BATCH_OPT_KEY.defaultValue, + COMMIT_METADATA_KEYPREFIX.key -> COMMIT_METADATA_KEYPREFIX.defaultValue, + INSERT_DROP_DUPS.key -> INSERT_DROP_DUPS.defaultValue, + STREAMING_RETRY_CNT.key -> STREAMING_RETRY_CNT.defaultValue, + STREAMING_RETRY_INTERVAL_MS.key -> STREAMING_RETRY_INTERVAL_MS.defaultValue, + STREAMING_IGNORE_FAILED_BATCH.key -> STREAMING_IGNORE_FAILED_BATCH.defaultValue, META_SYNC_CLIENT_TOOL_CLASS.key -> META_SYNC_CLIENT_TOOL_CLASS.defaultValue, - HIVE_SYNC_ENABLED_OPT_KEY.key -> HIVE_SYNC_ENABLED_OPT_KEY.defaultValue, - META_SYNC_ENABLED_OPT_KEY.key -> META_SYNC_ENABLED_OPT_KEY.defaultValue, - HIVE_DATABASE_OPT_KEY.key -> HIVE_DATABASE_OPT_KEY.defaultValue, - HIVE_TABLE_OPT_KEY.key -> HIVE_TABLE_OPT_KEY.defaultValue, - HIVE_BASE_FILE_FORMAT_OPT_KEY.key -> HIVE_BASE_FILE_FORMAT_OPT_KEY.defaultValue, - HIVE_USER_OPT_KEY.key -> HIVE_USER_OPT_KEY.defaultValue, - HIVE_PASS_OPT_KEY.key -> HIVE_PASS_OPT_KEY.defaultValue, - HIVE_URL_OPT_KEY.key -> HIVE_URL_OPT_KEY.defaultValue, - HIVE_PARTITION_FIELDS_OPT_KEY.key -> HIVE_PARTITION_FIELDS_OPT_KEY.defaultValue, - HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key -> HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.defaultValue, - HIVE_STYLE_PARTITIONING_OPT_KEY.key -> HIVE_STYLE_PARTITIONING_OPT_KEY.defaultValue, - HIVE_USE_JDBC_OPT_KEY.key -> HIVE_USE_JDBC_OPT_KEY.defaultValue, + HIVE_SYNC_ENABLED.key -> HIVE_SYNC_ENABLED.defaultValue, + META_SYNC_ENABLED.key -> META_SYNC_ENABLED.defaultValue, + HIVE_DATABASE.key -> HIVE_DATABASE.defaultValue, + HIVE_TABLE.key -> HIVE_TABLE.defaultValue, + HIVE_BASE_FILE_FORMAT.key -> HIVE_BASE_FILE_FORMAT.defaultValue, + HIVE_USER.key -> HIVE_USER.defaultValue, + HIVE_PASS.key -> HIVE_PASS.defaultValue, + HIVE_URL.key -> HIVE_URL.defaultValue, + HIVE_PARTITION_FIELDS.key -> HIVE_PARTITION_FIELDS.defaultValue, + HIVE_PARTITION_EXTRACTOR_CLASS.key -> HIVE_PARTITION_EXTRACTOR_CLASS.defaultValue, + HIVE_STYLE_PARTITIONING.key -> HIVE_STYLE_PARTITIONING.defaultValue, + HIVE_USE_JDBC.key -> HIVE_USE_JDBC.defaultValue, HIVE_CREATE_MANAGED_TABLE.key() -> HIVE_CREATE_MANAGED_TABLE.defaultValue.toString, HIVE_SYNC_AS_DATA_SOURCE_TABLE.key() -> HIVE_SYNC_AS_DATA_SOURCE_TABLE.defaultValue(), - ASYNC_COMPACT_ENABLE_OPT_KEY.key -> ASYNC_COMPACT_ENABLE_OPT_KEY.defaultValue, - INLINE_CLUSTERING_ENABLE_OPT_KEY.key -> INLINE_CLUSTERING_ENABLE_OPT_KEY.defaultValue, - ASYNC_CLUSTERING_ENABLE_OPT_KEY.key -> ASYNC_CLUSTERING_ENABLE_OPT_KEY.defaultValue, - ENABLE_ROW_WRITER_OPT_KEY.key -> ENABLE_ROW_WRITER_OPT_KEY.defaultValue + ASYNC_COMPACT_ENABLE.key -> ASYNC_COMPACT_ENABLE.defaultValue, + INLINE_CLUSTERING_ENABLE.key -> INLINE_CLUSTERING_ENABLE.defaultValue, + ASYNC_CLUSTERING_ENABLE.key -> ASYNC_CLUSTERING_ENABLE.defaultValue, + ENABLE_ROW_WRITER.key -> ENABLE_ROW_WRITER.defaultValue ) ++ DataSourceOptionsHelper.translateConfigurations(parameters) } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala index d7022f23b..958a15eeb 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala @@ -61,22 +61,22 @@ class IncrementalRelation(val sqlContext: SQLContext, if (commitTimeline.empty()) { throw new HoodieException("No instants to incrementally pull") } - if (!optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key)) { + if (!optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME.key)) { throw new HoodieException(s"Specify the begin instant time to pull from using " + - s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key}") + s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME.key}") } if (!metaClient.getTableConfig.populateMetaFields()) { throw new HoodieException("Incremental queries are not supported when meta fields are disabled") } - val useEndInstantSchema = optParams.getOrElse(DataSourceReadOptions.INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME_OPT_KEY.key, - DataSourceReadOptions.INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME_OPT_KEY.defaultValue).toBoolean + val useEndInstantSchema = optParams.getOrElse(DataSourceReadOptions.INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME.key, + DataSourceReadOptions.INCREMENTAL_READ_SCHEMA_USE_END_INSTANTTIME.defaultValue).toBoolean private val lastInstant = commitTimeline.lastInstant().get() private val commitsTimelineToReturn = commitTimeline.findInstantsInRange( - optParams(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key), - optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key(), lastInstant.getTimestamp)) + optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key), + optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key(), lastInstant.getTimestamp)) private val commitsToReturn = commitsTimelineToReturn.getInstants.iterator().toList // use schema from a file produced in the end/latest instant @@ -93,8 +93,8 @@ class IncrementalRelation(val sqlContext: SQLContext, StructType(skeletonSchema.fields ++ dataSchema.fields) } - private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS_OPT_KEY.key, - DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS_OPT_KEY.defaultValue).split(",").filter(!_.isEmpty) + private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key, + DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty) override def schema: StructType = usedSchema @@ -137,10 +137,10 @@ class IncrementalRelation(val sqlContext: SQLContext, } val pathGlobPattern = optParams.getOrElse( - DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.key, - DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.defaultValue) + DataSourceReadOptions.INCR_PATH_GLOB.key, + DataSourceReadOptions.INCR_PATH_GLOB.defaultValue) val (filteredRegularFullPaths, filteredMetaBootstrapFullPaths) = { - if(!pathGlobPattern.equals(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.defaultValue)) { + if(!pathGlobPattern.equals(DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)) { val globMatcher = new GlobPattern("*" + pathGlobPattern) (regularFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values, metaBootstrapFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values) @@ -163,7 +163,7 @@ class IncrementalRelation(val sqlContext: SQLContext, df = sqlContext.sparkSession.read .format("hudi") .schema(usedSchema) - .option(DataSourceReadOptions.READ_PATHS_OPT_KEY.key, filteredMetaBootstrapFullPaths.mkString(",")) + .option(DataSourceReadOptions.READ_PATHS.key, filteredMetaBootstrapFullPaths.mkString(",")) .load() } diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala index 0d72698a4..f1bc84751 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala @@ -55,9 +55,9 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, if (commitTimeline.empty()) { throw new HoodieException("No instants to incrementally pull") } - if (!optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key)) { + if (!optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME.key)) { throw new HoodieException(s"Specify the begin instant time to pull from using " + - s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key}") + s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME.key}") } if (!metaClient.getTableConfig.populateMetaFields()) { throw new HoodieException("Incremental queries are not supported when meta fields are disabled") @@ -65,12 +65,12 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, private val lastInstant = commitTimeline.lastInstant().get() private val mergeType = optParams.getOrElse( - DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, - DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.defaultValue) + DataSourceReadOptions.REALTIME_MERGE.key, + DataSourceReadOptions.REALTIME_MERGE.defaultValue) private val commitsTimelineToReturn = commitTimeline.findInstantsInRange( - optParams(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key), - optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, lastInstant.getTimestamp)) + optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key), + optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key, lastInstant.getTimestamp)) log.debug(s"${commitsTimelineToReturn.getInstants.iterator().toList.map(f => f.toString).mkString(",")}") private val commitsToReturn = commitsTimelineToReturn.getInstants.iterator().toList private val schemaUtil = new TableSchemaResolver(metaClient) @@ -182,10 +182,10 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext, // Filter files based on user defined glob pattern val pathGlobPattern = optParams.getOrElse( - DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.key, - DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.defaultValue) + DataSourceReadOptions.INCR_PATH_GLOB.key, + DataSourceReadOptions.INCR_PATH_GLOB.defaultValue) val filteredFileGroup = if(!pathGlobPattern - .equals(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.defaultValue)) { + .equals(DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)) { val globMatcher = new GlobPattern("*" + pathGlobPattern) fileGroup.filter(f => { if (f.getLatestFileSlice.get().getBaseFile.isPresent) { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala index 7fffb7e8a..dd18c5a32 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala @@ -76,8 +76,8 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext, private lazy val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema) private val mergeType = optParams.getOrElse( - DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, - DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.defaultValue) + DataSourceReadOptions.REALTIME_MERGE.key, + DataSourceReadOptions.REALTIME_MERGE.defaultValue) private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf) private val preCombineField = { val preCombineFieldFromTableConfig = metaClient.getTableConfig.getPreCombineField @@ -234,8 +234,8 @@ object MergeOnReadSnapshotRelation { // .So we should encode the file path here. Otherwise, there is a FileNotException throw // out. // For example, If the "pt" is the partition path field, and "pt" = "2021/02/02", If - // we enable the URL_ENCODE_PARTITIONING_OPT_KEY and write data to hudi table.The data - // path in the table will just like "/basePath/2021%2F02%2F02/xxxx.parquet". When we read + // we enable the URL_ENCODE_PARTITIONING and write data to hudi table.The data path + // in the table will just like "/basePath/2021%2F02%2F02/xxxx.parquet". When we read // data from the table, if there are no encode for the file path, // ParquetFileFormat#buildReaderWithPartitionValues will decode it to // "/basePath/2021/02/02/xxxx.parquet" witch will result to a FileNotException. diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala index 72dea1184..c49ffbd17 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala @@ -41,26 +41,26 @@ object HoodieOptionConfig { val SQL_KEY_TABLE_PRIMARY_KEY: HoodieOption[String] = buildConf() .withSqlKey("primaryKey") - .withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key) + .withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD.key) .withTableConfigKey(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS.key) .build() val SQL_KEY_TABLE_TYPE: HoodieOption[String] = buildConf() .withSqlKey("type") - .withHoodieKey(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key) + .withHoodieKey(DataSourceWriteOptions.TABLE_TYPE.key) .withTableConfigKey(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP.key) .defaultValue(SQL_VALUE_TABLE_TYPE_COW) .build() val SQL_KEY_PRECOMBINE_FIELD: HoodieOption[String] = buildConf() .withSqlKey("preCombineField") - .withHoodieKey(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key) + .withHoodieKey(DataSourceWriteOptions.PRECOMBINE_FIELD.key) .withTableConfigKey(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP.key) .build() val SQL_PAYLOAD_CLASS: HoodieOption[String] = buildConf() .withSqlKey("payloadClass") - .withHoodieKey(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY.key) + .withHoodieKey(DataSourceWriteOptions.PAYLOAD_CLASS.key) .withTableConfigKey(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP.key) .defaultValue(classOf[DefaultHoodieRecordPayload].getName) .build() @@ -151,7 +151,7 @@ object HoodieOptionConfig { */ def getPrimaryColumns(options: Map[String, String]): Array[String] = { val params = mappingSqlOptionToHoodieParam(options) - params.get(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key) + params.get(DataSourceWriteOptions.RECORDKEY_FIELD.key) .map(_.split(",").filter(_.length > 0)) .getOrElse(Array.empty) } @@ -163,13 +163,13 @@ object HoodieOptionConfig { */ def getTableType(options: Map[String, String]): String = { val params = mappingSqlOptionToHoodieParam(options) - params.getOrElse(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.defaultValue) + params.getOrElse(DataSourceWriteOptions.TABLE_TYPE.key, + DataSourceWriteOptions.TABLE_TYPE.defaultValue) } def getPreCombineField(options: Map[String, String]): Option[String] = { val params = mappingSqlOptionToHoodieParam(options) - params.get(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key) + params.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key) } def buildConf[T](): HoodieOptions[T] = { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala index d2e72ccc5..74fe88bcc 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/DeleteHoodieTableCommand.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hudi.command import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport} -import org.apache.hudi.DataSourceWriteOptions.{HIVE_STYLE_PARTITIONING_OPT_KEY, HIVE_SUPPORT_TIMESTAMP, KEYGENERATOR_CLASS_OPT_KEY, OPERATION_OPT_KEY, PARTITIONPATH_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY} +import org.apache.hudi.DataSourceWriteOptions.{HIVE_STYLE_PARTITIONING, HIVE_SUPPORT_TIMESTAMP, KEYGENERATOR_CLASS, OPERATION, PARTITIONPATH_FIELD, RECORDKEY_FIELD} import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME import org.apache.spark.sql._ @@ -69,12 +69,12 @@ case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends Runnab withSparkConf(sparkSession, targetTable.storage.properties) { Map( "path" -> path, - KEYGENERATOR_CLASS_OPT_KEY.key -> classOf[SqlKeyGenerator].getCanonicalName, + KEYGENERATOR_CLASS.key -> classOf[SqlKeyGenerator].getCanonicalName, TABLE_NAME.key -> tableId.table, - OPERATION_OPT_KEY.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL, - PARTITIONPATH_FIELD_OPT_KEY.key -> targetTable.partitionColumnNames.mkString(","), + OPERATION.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL, + PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","), HIVE_SUPPORT_TIMESTAMP.key -> "true", - HIVE_STYLE_PARTITIONING_OPT_KEY.key -> "true", + HIVE_STYLE_PARTITIONING.key -> "true", HoodieWriteConfig.DELETE_PARALLELISM.key -> "200", SqlKeyGenerator.PARTITION_SCHEMA -> targetTable.partitionSchema.toDDL ) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala index 6ce307027..bbca17f8d 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala @@ -188,7 +188,7 @@ object InsertIntoHoodieTableCommand { } val parameters = HoodieOptionConfig.mappingSqlOptionToHoodieParam(table.storage.properties) - val tableType = parameters.getOrElse(TABLE_TYPE_OPT_KEY.key, TABLE_TYPE_OPT_KEY.defaultValue) + val tableType = parameters.getOrElse(TABLE_TYPE.key, TABLE_TYPE.defaultValue) val partitionFields = table.partitionColumnNames.mkString(",") val path = getTableLocation(table, sparkSession) @@ -205,8 +205,8 @@ object InsertIntoHoodieTableCommand { } val dropDuplicate = sparkSession.conf - .getOption(INSERT_DROP_DUPS_OPT_KEY.key) - .getOrElse(INSERT_DROP_DUPS_OPT_KEY.defaultValue) + .getOption(INSERT_DROP_DUPS.key) + .getOrElse(INSERT_DROP_DUPS.defaultValue) .toBoolean val operation = if (isOverwrite) { @@ -235,23 +235,23 @@ object InsertIntoHoodieTableCommand { withSparkConf(sparkSession, options) { Map( "path" -> path, - TABLE_TYPE_OPT_KEY.key -> tableType, + TABLE_TYPE.key -> tableType, TABLE_NAME.key -> table.identifier.table, - PRECOMBINE_FIELD_OPT_KEY.key -> tableSchema.fields.last.name, - OPERATION_OPT_KEY.key -> operation, - KEYGENERATOR_CLASS_OPT_KEY.key -> keyGenClass, - RECORDKEY_FIELD_OPT_KEY.key -> primaryColumns.mkString(","), - PARTITIONPATH_FIELD_OPT_KEY.key -> partitionFields, - PAYLOAD_CLASS_OPT_KEY.key -> payloadClassName, - META_SYNC_ENABLED_OPT_KEY.key -> enableHive.toString, - HIVE_USE_JDBC_OPT_KEY.key -> "false", - HIVE_DATABASE_OPT_KEY.key -> table.identifier.database.getOrElse("default"), - HIVE_TABLE_OPT_KEY.key -> table.identifier.table, + PRECOMBINE_FIELD.key -> tableSchema.fields.last.name, + OPERATION.key -> operation, + KEYGENERATOR_CLASS.key -> keyGenClass, + RECORDKEY_FIELD.key -> primaryColumns.mkString(","), + PARTITIONPATH_FIELD.key -> partitionFields, + PAYLOAD_CLASS.key -> payloadClassName, + META_SYNC_ENABLED.key -> enableHive.toString, + HIVE_USE_JDBC.key -> "false", + HIVE_DATABASE.key -> table.identifier.database.getOrElse("default"), + HIVE_TABLE.key -> table.identifier.table, HIVE_SUPPORT_TIMESTAMP.key -> "true", - HIVE_STYLE_PARTITIONING_OPT_KEY.key -> "true", - HIVE_PARTITION_FIELDS_OPT_KEY.key -> partitionFields, - HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, - URL_ENCODE_PARTITIONING_OPT_KEY.key -> "true", + HIVE_STYLE_PARTITIONING.key -> "true", + HIVE_PARTITION_FIELDS.key -> partitionFields, + HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, + URL_ENCODE_PARTITIONING.key -> "true", HoodieWriteConfig.INSERT_PARALLELISM.key -> "200", HoodieWriteConfig.UPSERT_PARALLELISM.key -> "200", SqlKeyGenerator.PARTITION_SCHEMA -> table.partitionSchema.toDDL @@ -261,7 +261,7 @@ object InsertIntoHoodieTableCommand { } /** - * Validate the duplicate key for insert statement without enable the INSERT_DROP_DUPS_OPT_KEY + * Validate the duplicate key for insert statement without enable the INSERT_DROP_DUPS_OPT * config. */ class ValidateDuplicateKeyPayload(record: GenericRecord, orderingVal: Comparable[_]) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala index d2c6905af..ab714c852 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala @@ -228,9 +228,9 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab // may be different from the target table, because the are transform logical in the update or // insert actions. var writeParams = parameters + - (OPERATION_OPT_KEY.key -> UPSERT_OPERATION_OPT_VAL) + + (OPERATION.key -> UPSERT_OPERATION_OPT_VAL) + (HoodieWriteConfig.WRITE_SCHEMA_PROP.key -> getTableSchema.toString) + - (DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> targetTableType) + (DataSourceWriteOptions.TABLE_TYPE.key -> targetTableType) // Map of Condition -> Assignments val updateConditionToAssignments = @@ -275,7 +275,7 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab checkInsertAssignments(insertActions) var writeParams = parameters + - (OPERATION_OPT_KEY.key -> INSERT_OPERATION_OPT_VAL) + + (OPERATION.key -> INSERT_OPERATION_OPT_VAL) + (HoodieWriteConfig.WRITE_SCHEMA_PROP.key -> getTableSchema.toString) writeParams += (PAYLOAD_INSERT_CONDITION_AND_ASSIGNMENTS -> @@ -430,21 +430,21 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab withSparkConf(sparkSession, options) { Map( "path" -> path, - RECORDKEY_FIELD_OPT_KEY.key -> targetKey2SourceExpression.keySet.mkString(","), - KEYGENERATOR_CLASS_OPT_KEY.key -> classOf[SqlKeyGenerator].getCanonicalName, - PRECOMBINE_FIELD_OPT_KEY.key -> targetKey2SourceExpression.keySet.head, // set a default preCombine field + RECORDKEY_FIELD.key -> targetKey2SourceExpression.keySet.mkString(","), + KEYGENERATOR_CLASS.key -> classOf[SqlKeyGenerator].getCanonicalName, + PRECOMBINE_FIELD.key -> targetKey2SourceExpression.keySet.head, // set a default preCombine field TABLE_NAME.key -> targetTableName, - PARTITIONPATH_FIELD_OPT_KEY.key -> targetTable.partitionColumnNames.mkString(","), - PAYLOAD_CLASS_OPT_KEY.key -> classOf[ExpressionPayload].getCanonicalName, - META_SYNC_ENABLED_OPT_KEY.key -> enableHive.toString, - HIVE_USE_JDBC_OPT_KEY.key -> "false", - HIVE_DATABASE_OPT_KEY.key -> targetTableDb, - HIVE_TABLE_OPT_KEY.key -> targetTableName, + PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","), + PAYLOAD_CLASS.key -> classOf[ExpressionPayload].getCanonicalName, + META_SYNC_ENABLED.key -> enableHive.toString, + HIVE_USE_JDBC.key -> "false", + HIVE_DATABASE.key -> targetTableDb, + HIVE_TABLE.key -> targetTableName, HIVE_SUPPORT_TIMESTAMP.key -> "true", - HIVE_STYLE_PARTITIONING_OPT_KEY.key -> "true", - HIVE_PARTITION_FIELDS_OPT_KEY.key -> targetTable.partitionColumnNames.mkString(","), - HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, - URL_ENCODE_PARTITIONING_OPT_KEY.key -> "true", // enable the url decode for sql. + HIVE_STYLE_PARTITIONING.key -> "true", + HIVE_PARTITION_FIELDS.key -> targetTable.partitionColumnNames.mkString(","), + HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, + URL_ENCODE_PARTITIONING.key -> "true", // enable the url decode for sql. HoodieWriteConfig.INSERT_PARALLELISM.key -> "200", // set the default parallelism to 200 for sql HoodieWriteConfig.UPSERT_PARALLELISM.key -> "200", HoodieWriteConfig.DELETE_PARALLELISM.key -> "200", diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala index d9169da9d..fe0a7e1e2 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala @@ -44,7 +44,7 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props) override def getPartitionPath(record: GenericRecord): String = { val partitionPath = super.getPartitionPath(record) if (partitionSchema.isDefined) { - // we can split the partitionPath here because we enable the URL_ENCODE_PARTITIONING_OPT_KEY + // we can split the partitionPath here because we enable the URL_ENCODE_PARTITIONING_OPT // by default for sql. val partitionFragments = partitionPath.split(KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR) assert(partitionFragments.size == partitionSchema.get.size) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala index 73addacee..e384c413b 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala @@ -97,21 +97,21 @@ case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends RunnableCo withSparkConf(sparkSession, targetTable.storage.properties) { Map( "path" -> path, - RECORDKEY_FIELD_OPT_KEY.key -> primaryColumns.mkString(","), - KEYGENERATOR_CLASS_OPT_KEY.key -> classOf[SqlKeyGenerator].getCanonicalName, - PRECOMBINE_FIELD_OPT_KEY.key -> primaryColumns.head, //set the default preCombine field. + RECORDKEY_FIELD.key -> primaryColumns.mkString(","), + KEYGENERATOR_CLASS.key -> classOf[SqlKeyGenerator].getCanonicalName, + PRECOMBINE_FIELD.key -> primaryColumns.head, //set the default preCombine field. TABLE_NAME.key -> tableId.table, - OPERATION_OPT_KEY.key -> DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL, - PARTITIONPATH_FIELD_OPT_KEY.key -> targetTable.partitionColumnNames.mkString(","), - META_SYNC_ENABLED_OPT_KEY.key -> enableHive.toString, - HIVE_USE_JDBC_OPT_KEY.key -> "false", - HIVE_DATABASE_OPT_KEY.key -> tableId.database.getOrElse("default"), - HIVE_TABLE_OPT_KEY.key -> tableId.table, - HIVE_PARTITION_FIELDS_OPT_KEY.key -> targetTable.partitionColumnNames.mkString(","), - HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, - URL_ENCODE_PARTITIONING_OPT_KEY.key -> "true", + OPERATION.key -> DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL, + PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","), + META_SYNC_ENABLED.key -> enableHive.toString, + HIVE_USE_JDBC.key -> "false", + HIVE_DATABASE.key -> tableId.database.getOrElse("default"), + HIVE_TABLE.key -> tableId.table, + HIVE_PARTITION_FIELDS.key -> targetTable.partitionColumnNames.mkString(","), + HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName, + URL_ENCODE_PARTITIONING.key -> "true", HIVE_SUPPORT_TIMESTAMP.key -> "true", - HIVE_STYLE_PARTITIONING_OPT_KEY.key -> "true", + HIVE_STYLE_PARTITIONING.key -> "true", HoodieWriteConfig.UPSERT_PARALLELISM.key -> "200", SqlKeyGenerator.PARTITION_SCHEMA -> targetTable.partitionSchema.toDDL ) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala index ce3373ae7..a43416cb2 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala @@ -201,7 +201,7 @@ class ExpressionPayload(record: GenericRecord, } private def isMORTable(properties: Properties): Boolean = { - properties.getProperty(TABLE_TYPE_OPT_KEY.key, null) == MOR_TABLE_TYPE_OPT_VAL + properties.getProperty(TABLE_TYPE.key, null) == MOR_TABLE_TYPE_OPT_VAL } private def convertToRecord(values: Array[AnyRef], schema: Schema): IndexedRecord = { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala index 58260592e..0482e7488 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala @@ -154,8 +154,8 @@ class HoodieStreamSource( } else { // Consume the data between (startCommitTime, endCommitTime] val incParams = parameters ++ Map( - DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key -> startCommitTime(startOffset), - DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key -> endOffset.commitTime + DataSourceReadOptions.BEGIN_INSTANTTIME.key -> startCommitTime(startOffset), + DataSourceReadOptions.END_INSTANTTIME.key -> endOffset.commitTime ) val rdd = tableType match { diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java index 966ffb0b8..06e745c89 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaApp.java @@ -140,23 +140,23 @@ public class HoodieJavaApp { // full list in HoodieWriteConfig & its package .option("hoodie.upsert.shuffle.parallelism", "2") // Hoodie Table Type - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key(), tableType) + .option(DataSourceWriteOptions.TABLE_TYPE().key(), tableType) // insert - .option(DataSourceWriteOptions.OPERATION_OPT_KEY().key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) + .option(DataSourceWriteOptions.OPERATION().key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) // This is the record key - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key") + .option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key") // this is the partition to place it into - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition") // use to combine duplicate records in input/with disk val - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key(), "timestamp") + .option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp") // Used by hive sync and queries .option(HoodieWriteConfig.TABLE_NAME.key(), tableName) // Add Key Extractor - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()) - .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY().key(), "false") - .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY().key(), "true") + .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false") + .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true") // This will remove any existing data at path below, and create a .mode(SaveMode.Overwrite); @@ -175,16 +175,16 @@ public class HoodieJavaApp { Dataset inputDF2 = spark.read().json(jssc.parallelize(records2, 2)); writer = inputDF2.write().format("org.apache.hudi").option("hoodie.insert.shuffle.parallelism", "2") .option("hoodie.upsert.shuffle.parallelism", "2") - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key(), tableType) // Hoodie Table Type - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key") - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition") - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key(), "timestamp") - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.TABLE_TYPE().key(), tableType) // Hoodie Table Type + .option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition") + .option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp") + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP.key(), "1") - .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY().key(), "false") - .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY().key(), "true") + .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false") + .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true") .option(HoodieWriteConfig.TABLE_NAME.key(), tableName).mode(SaveMode.Append); updateHiveSyncConfig(writer); @@ -202,17 +202,17 @@ public class HoodieJavaApp { writer = inputDF3.write().format("org.apache.hudi").option("hoodie.insert.shuffle.parallelism", "2") .option("hoodie.upsert.shuffle.parallelism", "2") .option("hoodie.delete.shuffle.parallelism", "2") - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key(), tableType) // Hoodie Table Type - .option(DataSourceWriteOptions.OPERATION_OPT_KEY().key(), "delete") - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key") - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition") - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key(), "_row_key") - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.TABLE_TYPE().key(), tableType) // Hoodie Table Type + .option(DataSourceWriteOptions.OPERATION().key(), "delete") + .option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition") + .option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "_row_key") + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP.key(), "1") - .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY().key(), "false") - .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY().key(), "true") + .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false") + .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true") .option(HoodieWriteConfig.TABLE_NAME.key(), tableName).mode(SaveMode.Append); updateHiveSyncConfig(writer); @@ -237,9 +237,9 @@ public class HoodieJavaApp { * Consume incrementally, only changes in commit 2 above. Currently only supported for COPY_ON_WRITE TABLE */ Dataset incQueryDF = spark.read().format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) + .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) // Only changes in write 2 above - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY().key(), commitInstantTime1) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), commitInstantTime1) // For incremental view, pass in the root/base path of dataset .load(tablePath); @@ -254,23 +254,23 @@ public class HoodieJavaApp { private DataFrameWriter updateHiveSyncConfig(DataFrameWriter writer) { if (enableHiveSync) { LOG.info("Enabling Hive sync to " + hiveJdbcUrl); - writer = writer.option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), hiveTable) - .option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), hiveDB) - .option(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), hiveJdbcUrl) - .option(DataSourceWriteOptions.HIVE_USER_OPT_KEY().key(), hiveUser) - .option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY().key(), hivePass) - .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY().key(), "true"); + writer = writer.option(DataSourceWriteOptions.HIVE_TABLE().key(), hiveTable) + .option(DataSourceWriteOptions.HIVE_DATABASE().key(), hiveDB) + .option(DataSourceWriteOptions.HIVE_URL().key(), hiveJdbcUrl) + .option(DataSourceWriteOptions.HIVE_USER().key(), hiveUser) + .option(DataSourceWriteOptions.HIVE_PASS().key(), hivePass) + .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED().key(), "true"); if (nonPartitionedTable) { writer = writer - .option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), NonPartitionedExtractor.class.getCanonicalName()) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), ""); + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), ""); } else if (useMultiPartitionKeys) { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "year,month,day").option( - DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "year,month,day").option( + DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getCanonicalName()); } else { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "dateStr"); + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr"); } } return writer; diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaGenerateApp.java b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaGenerateApp.java index e907cd7cc..43e0b20d3 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaGenerateApp.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaGenerateApp.java @@ -124,23 +124,23 @@ public class HoodieJavaGenerateApp { private DataFrameWriter updateHiveSyncConfig(DataFrameWriter writer) { if (enableHiveSync) { LOG.info("Enabling Hive sync to " + hiveJdbcUrl); - writer = writer.option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), hiveTable) - .option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), hiveDB) - .option(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), hiveJdbcUrl) - .option(DataSourceWriteOptions.HIVE_USER_OPT_KEY().key(), hiveUser) - .option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY().key(), hivePass) - .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY().key(), "true"); + writer = writer.option(DataSourceWriteOptions.HIVE_TABLE().key(), hiveTable) + .option(DataSourceWriteOptions.HIVE_DATABASE().key(), hiveDB) + .option(DataSourceWriteOptions.HIVE_URL().key(), hiveJdbcUrl) + .option(DataSourceWriteOptions.HIVE_USER().key(), hiveUser) + .option(DataSourceWriteOptions.HIVE_PASS().key(), hivePass) + .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED().key(), "true"); if (nonPartitionedTable) { writer = writer - .option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), NonPartitionedExtractor.class.getCanonicalName()) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), ""); + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), ""); } else if (useMultiPartitionKeys) { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "year,month,day").option( - DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "year,month,day").option( + DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getCanonicalName()); } else { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "dateStr"); + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr"); } } return writer; @@ -165,19 +165,19 @@ public class HoodieJavaGenerateApp { // full list in HoodieWriteConfig & its package .option("hoodie.upsert.shuffle.parallelism", "2") // Hoodie Table Type - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key(), tableType) + .option(DataSourceWriteOptions.TABLE_TYPE().key(), tableType) // insert - .option(DataSourceWriteOptions.OPERATION_OPT_KEY().key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) + .option(DataSourceWriteOptions.OPERATION().key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) // This is the record key - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key") + .option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key") // this is the partition to place it into - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition") // use to combine duplicate records in input/with disk val - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key(), "timestamp") + .option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp") // Used by hive sync and queries .option(HoodieWriteConfig.TABLE_NAME.key(), tableName) // Add Key Extractor - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()) .mode(commitType); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java index 75fa91eba..dfaa887cb 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java @@ -332,9 +332,9 @@ public class HoodieJavaStreamingApp { * Consume incrementally, only changes in commit 2 above. Currently only supported for COPY_ON_WRITE TABLE */ Dataset hoodieIncViewDF = spark.read().format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) + .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) // Only changes in write 2 above - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY().key(), commitInstantTime1) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), commitInstantTime1) // For incremental view, pass in the root/base path of dataset .load(tablePath); @@ -355,14 +355,14 @@ public class HoodieJavaStreamingApp { DataStreamWriter writer = streamingInput.writeStream().format("org.apache.hudi") .option("hoodie.insert.shuffle.parallelism", "2").option("hoodie.upsert.shuffle.parallelism", "2") .option("hoodie.delete.shuffle.parallelism", "2") - .option(DataSourceWriteOptions.OPERATION_OPT_KEY().key(), operationType) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY().key(), tableType) - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key") - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition") - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY().key(), "timestamp") + .option(DataSourceWriteOptions.OPERATION().key(), operationType) + .option(DataSourceWriteOptions.TABLE_TYPE().key(), tableType) + .option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition") + .option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp") .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP.key(), "1") - .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY().key(), "true") - .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY().key(), "true") + .option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "true") + .option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true") .option(HoodieWriteConfig.TABLE_NAME.key(), tableName).option("checkpointLocation", checkpointLocation) .outputMode(OutputMode.Append()); @@ -380,18 +380,18 @@ public class HoodieJavaStreamingApp { private DataStreamWriter updateHiveSyncConfig(DataStreamWriter writer) { if (enableHiveSync) { LOG.info("Enabling Hive sync to " + hiveJdbcUrl); - writer = writer.option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), hiveTable) - .option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), hiveDB) - .option(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), hiveJdbcUrl) - .option(DataSourceWriteOptions.HIVE_USER_OPT_KEY().key(), hiveUser) - .option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY().key(), hivePass) - .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY().key(), "true"); + writer = writer.option(DataSourceWriteOptions.HIVE_TABLE().key(), hiveTable) + .option(DataSourceWriteOptions.HIVE_DATABASE().key(), hiveDB) + .option(DataSourceWriteOptions.HIVE_URL().key(), hiveJdbcUrl) + .option(DataSourceWriteOptions.HIVE_USER().key(), hiveUser) + .option(DataSourceWriteOptions.HIVE_PASS().key(), hivePass) + .option(DataSourceWriteOptions.HIVE_SYNC_ENABLED().key(), "true"); if (useMultiPartitionKeys) { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "year,month,day").option( - DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "year,month,day").option( + DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getCanonicalName()); } else { - writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "dateStr"); + writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr"); } } return writer; diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java index 3e04b2418..2e5bb53c9 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestHoodieDatasetBulkInsertHelper.java @@ -209,18 +209,18 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase { private Map getProps(boolean setAll, boolean setKeyGen, boolean setRecordKey, boolean setPartitionPath) { Map props = new HashMap<>(); if (setAll) { - props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), "org.apache.hudi.keygen.SimpleKeyGenerator"); - props.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key"); - props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition"); + props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), "org.apache.hudi.keygen.SimpleKeyGenerator"); + props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key"); + props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition"); } else { if (setKeyGen) { - props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key(), "org.apache.hudi.keygen.SimpleKeyGenerator"); + props.put(DataSourceWriteOptions.KEYGENERATOR_CLASS().key(), "org.apache.hudi.keygen.SimpleKeyGenerator"); } if (setRecordKey) { - props.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key"); + props.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key"); } if (setPartitionPath) { - props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "partition"); + props.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition"); } } return props; diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java index 34cb811d3..df18bc1b3 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java @@ -550,8 +550,8 @@ public class TestBootstrap extends HoodieClientTestBase { HoodieWriteConfig.Builder builder = getConfigBuilder(schemaStr, IndexType.BLOOM) .withExternalSchemaTrasformation(true); TypedProperties properties = new TypedProperties(); - properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key(), "_row_key"); - properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY().key(), "datestr"); + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key"); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "datestr"); builder = builder.withProps(properties); return builder; } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala index 1fbc13c1e..3036d5031 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala @@ -55,9 +55,9 @@ class TestDataSourceDefaults { private def getKeyConfig(recordKeyFieldName: String, partitionPathField: String, hiveStylePartitioning: String): TypedProperties = { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, recordKeyFieldName) - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, partitionPathField) - props.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key, hiveStylePartitioning) + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, recordKeyFieldName) + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, partitionPathField) + props.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key, hiveStylePartitioning) props } @@ -75,7 +75,7 @@ class TestDataSourceDefaults { // partition path field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") new SimpleKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -86,7 +86,7 @@ class TestDataSourceDefaults { // partition path field not specified using Row try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") val keyGen = new SimpleKeyGenerator(props) keyGen.getRecordKey(baseRow) fail("Should have errored out") @@ -98,7 +98,7 @@ class TestDataSourceDefaults { // recordkey field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key(), "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(), "partitionField") new SimpleKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -109,7 +109,7 @@ class TestDataSourceDefaults { // recordkey field not specified using Row try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") val keyGen = new SimpleKeyGenerator(props) keyGen.getPartitionPath(baseRow) fail("Should have errored out") @@ -181,8 +181,8 @@ class TestDataSourceDefaults { try { baseRecord.put("field1", "") val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "name") new SimpleKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -193,8 +193,8 @@ class TestDataSourceDefaults { // if record key is empty, throw error. Using Row try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "name") keyGen = new SimpleKeyGenerator(props) baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType) keyGen.getRecordKey(baseRow) @@ -208,8 +208,8 @@ class TestDataSourceDefaults { try { baseRecord.put("field1", null) val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "name") new SimpleKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -220,8 +220,8 @@ class TestDataSourceDefaults { // if record key is null, throw error. Using Row try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "name") keyGen = new SimpleKeyGenerator(props) baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType) keyGen.getRecordKey(baseRow) @@ -239,8 +239,8 @@ class TestDataSourceDefaults { } class UserDefinedKeyGenerator(props: TypedProperties) extends KeyGenerator(props) with SparkKeyGeneratorInterface { - val recordKeyProp: String = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key) - val partitionPathProp: String = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key) + val recordKeyProp: String = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD.key) + val partitionPathProp: String = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD.key) val STRUCT_NAME: String = "hoodieRowTopLevelField" val NAMESPACE: String = "hoodieRow" var converterFn: Function1[Any, Any] = _ @@ -279,7 +279,7 @@ class TestDataSourceDefaults { // partition path field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") new ComplexKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -290,7 +290,7 @@ class TestDataSourceDefaults { // partition path field not specified using Row try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1") val keyGen = new ComplexKeyGenerator(props) keyGen.getRecordKey(baseRow) fail("Should have errored out") @@ -302,7 +302,7 @@ class TestDataSourceDefaults { // recordkey field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") new ComplexKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -313,7 +313,7 @@ class TestDataSourceDefaults { // recordkey field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") val keyGen = new ComplexKeyGenerator(props) keyGen.getPartitionPath(baseRow) fail("Should have errored out") @@ -395,8 +395,8 @@ class TestDataSourceDefaults { baseRecord.put("name", "") baseRecord.put("field1", null) val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1,name") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "field1,name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1,name") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "field1,name") new ComplexKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -409,8 +409,8 @@ class TestDataSourceDefaults { baseRecord.put("name", "") baseRecord.put("field1", null) val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1,name") - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "field1,name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1,name") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "field1,name") keyGen = new ComplexKeyGenerator(props) baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType) keyGen.getRecordKey(baseRow) @@ -453,7 +453,7 @@ class TestDataSourceDefaults { // top level, partition value not included val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1,name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1,name") keyGen = new GlobalDeleteKeyGenerator(props) val hk2 = keyGen.getKey(baseRecord) assertEquals("field1:field1,name:name1", hk2.getRecordKey) @@ -487,7 +487,7 @@ class TestDataSourceDefaults { // recordkey field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") new GlobalDeleteKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -498,7 +498,7 @@ class TestDataSourceDefaults { // recordkey field not specified try { val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partitionField") + props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partitionField") val keyGen = new GlobalDeleteKeyGenerator(props) keyGen.getRecordKey(baseRow) fail("Should have errored out") @@ -532,7 +532,7 @@ class TestDataSourceDefaults { baseRecord.put("name", "") baseRecord.put("field1", null) val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1,name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1,name") new GlobalDeleteKeyGenerator(props).getKey(baseRecord) fail("Should have errored out") } catch { @@ -546,7 +546,7 @@ class TestDataSourceDefaults { baseRecord.put("field1", null) baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType) val props = new TypedProperties() - props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "field1,name") + props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD.key, "field1,name") val keyGen = new GlobalDeleteKeyGenerator(props) keyGen.getRecordKey(baseRow) fail("Should have errored out") diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala index c1f8bd132..74da49137 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala @@ -47,14 +47,14 @@ class TestHoodieFileIndex extends HoodieClientTestBase { val commonOpts = Map( "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) var queryOpts = Map( - DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key -> DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL + DataSourceReadOptions.QUERY_TYPE.key -> DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL ) @BeforeEach override def setUp() { @@ -75,8 +75,8 @@ class TestHoodieFileIndex extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .mode(SaveMode.Overwrite) .save(basePath) metaClient = HoodieTableMetaClient.reload(metaClient) @@ -94,8 +94,8 @@ class TestHoodieFileIndex extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, keyGenerator) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, keyGenerator) .option(Config.TIMESTAMP_TYPE_FIELD_PROP, TimestampType.DATE_STRING.name()) .option(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "yyyy/MM/dd") .option(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "yyyy-MM-dd") @@ -115,9 +115,9 @@ class TestHoodieFileIndex extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, keyGenerator) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "partition:simple") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, keyGenerator) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "partition:simple") .mode(SaveMode.Overwrite) .save(basePath) metaClient = HoodieTableMetaClient.reload(metaClient) @@ -134,8 +134,8 @@ class TestHoodieFileIndex extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .mode(SaveMode.Overwrite) .save(basePath) metaClient = HoodieTableMetaClient.reload(metaClient) @@ -172,12 +172,12 @@ class TestHoodieFileIndex extends HoodieClientTestBase { inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(RECORDKEY_FIELD_OPT_KEY.key, "id") - .option(PRECOMBINE_FIELD_OPT_KEY.key, "version") - .option(PARTITIONPATH_FIELD_OPT_KEY.key, "dt,hh") - .option(KEYGENERATOR_CLASS_OPT_KEY.key, classOf[ComplexKeyGenerator].getName) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, "false") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(RECORDKEY_FIELD.key, "id") + .option(PRECOMBINE_FIELD.key, "version") + .option(PARTITIONPATH_FIELD.key, "dt,hh") + .option(KEYGENERATOR_CLASS.key, classOf[ComplexKeyGenerator].getName) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, "false") .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, useMetaFileList) .mode(SaveMode.Overwrite) .save(basePath) @@ -209,12 +209,12 @@ class TestHoodieFileIndex extends HoodieClientTestBase { s"2021/03/0${i % 2 + 1}", "10")).toDF("id", "name", "price", "version", "dt", "hh") inputDF2.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(RECORDKEY_FIELD_OPT_KEY.key, "id") - .option(PRECOMBINE_FIELD_OPT_KEY.key, "version") - .option(PARTITIONPATH_FIELD_OPT_KEY.key, "dt,hh") - .option(KEYGENERATOR_CLASS_OPT_KEY.key, classOf[ComplexKeyGenerator].getName) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, "false") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(RECORDKEY_FIELD.key, "id") + .option(PRECOMBINE_FIELD.key, "version") + .option(PARTITIONPATH_FIELD.key, "dt,hh") + .option(KEYGENERATOR_CLASS.key, classOf[ComplexKeyGenerator].getName) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, "false") .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key(), useMetaFileList) .mode(SaveMode.Overwrite) .save(basePath) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala index be95d7737..422af2086 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/HoodieSparkSqlWriterSuite.scala @@ -57,7 +57,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val originals = HoodieWriterUtils.parametersWithWriteDefaults(Map.empty) val rhsKey = "hoodie.right.hand.side.key" val rhsVal = "hoodie.right.hand.side.val" - val modifier = Map(OPERATION_OPT_KEY.key -> INSERT_OPERATION_OPT_VAL, TABLE_TYPE_OPT_KEY.key -> MOR_TABLE_TYPE_OPT_VAL, rhsKey -> rhsVal) + val modifier = Map(OPERATION.key -> INSERT_OPERATION_OPT_VAL, TABLE_TYPE.key -> MOR_TABLE_TYPE_OPT_VAL, rhsKey -> rhsVal) val modified = HoodieWriterUtils.parametersWithWriteDefaults(modifier) val matcher = (k: String, v: String) => modified(k) should be(v) @@ -111,7 +111,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { assert(tableAlreadyExistException.getMessage.contains("hoodie table with name " + hoodieFooTableName + " already exist")) //on same path try append with delete operation and different("hoodie_bar_tbl") table name which should throw an exception - val deleteTableParams = barTableParams ++ Map(OPERATION_OPT_KEY.key -> "delete") + val deleteTableParams = barTableParams ++ Map(OPERATION.key -> "delete") val deleteCmdException = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, deleteTableParams, dataFrame2)) assert(deleteCmdException.getMessage.contains("hoodie table with name " + hoodieFooTableName + " already exist")) } finally { @@ -154,15 +154,15 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { //create a new table val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, + DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, "hoodie.bulkinsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key -> "true", + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.ENABLE_ROW_WRITER.key -> "true", HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key() -> String.valueOf(populateMetaFields), - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", HoodieWriteConfig.BULKINSERT_SORT_MODE.key() -> sortMode.name(), - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -218,14 +218,14 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { //create a new table val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, + DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, "hoodie.bulkinsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key -> "true", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.ENABLE_ROW_WRITER.key -> "true", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", HoodieWriteConfig.BULKINSERT_SORT_MODE.key() -> BulkInsertSortMode.NONE.name(), - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -256,14 +256,14 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { //create a new table val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, + DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, "hoodie.bulkinsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key -> "true", - INSERT_DROP_DUPS_OPT_KEY.key -> "true", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.ENABLE_ROW_WRITER.key -> "true", + INSERT_DROP_DUPS.key -> "true", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -296,11 +296,11 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, "hoodie.bulkinsert.shuffle.parallelism" -> "1", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY.key -> "false", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.INSERT_DROP_DUPS.key -> "false", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -310,7 +310,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val recordsSeq = convertRowListToSeq(records) val df = spark.createDataFrame(sc.parallelize(recordsSeq), structType) // write to Hudi - HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableParams - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key, df) + HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableParams - DataSourceWriteOptions.PRECOMBINE_FIELD.key, df) // collect all parition paths to issue read of parquet files val partitions = Seq(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, @@ -347,11 +347,11 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, "hoodie.bulkinsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.ENABLE_ROW_WRITER_OPT_KEY.key -> "true", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.ENABLE_ROW_WRITER.key -> "true", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) val partitions = Seq(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, @@ -409,13 +409,13 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, HoodieWriteConfig.BASE_FILE_FORMAT.key -> baseFileFormat, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> tableType, + DataSourceWriteOptions.TABLE_TYPE.key -> tableType, HoodieWriteConfig.INSERT_PARALLELISM.key -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL, - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL, + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key() -> String.valueOf(populateMetaFields), - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> classOf[SimpleKeyGenerator].getCanonicalName) + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> classOf[SimpleKeyGenerator].getCanonicalName) val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -494,11 +494,11 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val fooTableModifier = Map("path" -> path.toAbsolutePath.toString, HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key -> srcPath.toAbsolutePath.toString, HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> tableType, + DataSourceWriteOptions.TABLE_TYPE.key -> tableType, HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key -> "4", - DataSourceWriteOptions.OPERATION_OPT_KEY.key -> DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL, - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", + DataSourceWriteOptions.OPERATION.key -> DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL, + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key -> classOf[NonpartitionedKeyGenerator].getCanonicalName) val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) @@ -538,10 +538,10 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { HoodieWriteConfig.TABLE_NAME.key -> hoodieFooTableName, "hoodie.insert.shuffle.parallelism" -> "1", "hoodie.upsert.shuffle.parallelism" -> "1", - DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> tableType, - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") + DataSourceWriteOptions.TABLE_TYPE.key -> tableType, + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.SimpleKeyGenerator") val fooTableParams = HoodieWriterUtils.parametersWithWriteDefaults(fooTableModifier) // generate the inserts @@ -618,8 +618,8 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val basePath = "/tmp/hoodie_test" val params = Map( "path" -> basePath, - DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key -> "test_hoodie", - DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY.key -> "partition", + DataSourceWriteOptions.TABLE_NAME.key -> "test_hoodie", + DataSourceWriteOptions.HIVE_PARTITION_FIELDS.key -> "partition", DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX.key -> "true", DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> "true" ) @@ -645,8 +645,8 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val basePath = "/tmp/hoodie_test" val params = Map( "path" -> basePath, - DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key -> "test_hoodie", - DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY.key -> "partition" + DataSourceWriteOptions.TABLE_NAME.key -> "test_hoodie", + DataSourceWriteOptions.HIVE_PARTITION_FIELDS.key -> "partition" ) val parameters = HoodieWriterUtils.parametersWithWriteDefaults(params) val hoodieConfig = HoodieWriterUtils.convertMapToHoodieConfig(parameters) @@ -687,11 +687,11 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { val bootStrapPath = java.nio.file.Files.createTempDirectory("hoodie_test_bootstrap") val basePath = path.toAbsolutePath.toString val baseBootStrapPath = bootStrapPath.toAbsolutePath.toString - val options = Map(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key -> tableType, - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "col3", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "keyid", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "", - DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator", + val options = Map(DataSourceWriteOptions.TABLE_TYPE.key -> tableType, + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "col3", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "keyid", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "", + DataSourceWriteOptions.KEYGENERATOR_CLASS.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test") try { val df = spark.range(0, 1000).toDF("keyid") @@ -701,20 +701,20 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { df.write.format("hudi") .options(options) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, "insert") + .option(DataSourceWriteOptions.OPERATION.key, "insert") .option("hoodie.insert.shuffle.parallelism", "4") .mode(SaveMode.Overwrite).save(basePath) df.write.format("hudi") .options(options) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, "insert_overwrite_table") + .option(DataSourceWriteOptions.OPERATION.key, "insert_overwrite_table") .option("hoodie.insert.shuffle.parallelism", "4") .mode(SaveMode.Append).save(basePath) val currentCommits = spark.read.format("hudi").load(basePath).select("_hoodie_commit_time").take(1).map(_.getString(0)) - val incrementalKeyIdNum = spark.read.format("hudi").option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "0000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, currentCommits(0)) + val incrementalKeyIdNum = spark.read.format("hudi").option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "0000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, currentCommits(0)) .load(basePath).select("keyid").orderBy("keyid").count assert(incrementalKeyIdNum == 1000) @@ -725,20 +725,20 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { .options(options) .option(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key, baseBootStrapPath) .option(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key, classOf[NonpartitionedKeyGenerator].getCanonicalName) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) .option(HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key, "4") .mode(SaveMode.Overwrite).save(basePath) df.write.format("hudi") .options(options) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, "insert_overwrite_table") + .option(DataSourceWriteOptions.OPERATION.key, "insert_overwrite_table") .option("hoodie.insert.shuffle.parallelism", "4") .mode(SaveMode.Append).save(basePath) val currentCommitsBootstrap = spark.read.format("hudi").load(basePath).select("_hoodie_commit_time").take(1).map(_.getString(0)) - val incrementalKeyIdNumBootstrap = spark.read.format("hudi").option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "0000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, currentCommitsBootstrap(0)) + val incrementalKeyIdNumBootstrap = spark.read.format("hudi").option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "0000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, currentCommitsBootstrap(0)) .load(basePath).select("keyid").orderBy("keyid").count assert(incrementalKeyIdNumBootstrap == 1000) } finally { @@ -761,12 +761,12 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { .withColumn("age", expr("keyid + 1000")) df.write.format("hudi") - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, tableType) - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key, "col3") - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "keyid") - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "") - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, "insert") + .option(DataSourceWriteOptions.TABLE_TYPE.key, tableType) + .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key, "col3") + .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "keyid") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "") + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") + .option(DataSourceWriteOptions.OPERATION.key, "insert") .option("hoodie.insert.shuffle.parallelism", "1") .option("hoodie.metadata.enable", "true") .option(HoodieWriteConfig.TABLE_NAME.key, "hoodie_test") @@ -776,12 +776,12 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers { .withColumn("col3", expr("keyid")) .withColumn("age", expr("keyid + 2000")) df_update.write.format("hudi") - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, tableType) - .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key, "col3") - .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key, "keyid") - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "") - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, "upsert") + .option(DataSourceWriteOptions.TABLE_TYPE.key, tableType) + .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key, "col3") + .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "keyid") + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "") + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") + .option(DataSourceWriteOptions.OPERATION.key, "upsert") .option("hoodie.upsert.shuffle.parallelism", "1") .option("hoodie.metadata.enable", "true") .option(HoodieWriteConfig.TABLE_NAME.key, "hoodie_test") diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala index 99905e0ec..ee3aba677 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala @@ -55,9 +55,9 @@ class TestCOWDataSource extends HoodieClientTestBase { "hoodie.upsert.shuffle.parallelism" -> "4", "hoodie.bulkinsert.shuffle.parallelism" -> "2", "hoodie.delete.shuffle.parallelism" -> "1", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) @@ -84,7 +84,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2)) inputDF.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) @@ -112,7 +112,7 @@ class TestCOWDataSource extends HoodieClientTestBase { .option("hoodie.keep.max.commits", "2") .option("hoodie.cleaner.commits.retained", "0") .option("hoodie.datasource.write.row.writer.enable", "true") - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) .mode(if (i == 0) SaveMode.Overwrite else SaveMode.Append) .save(basePath) } @@ -149,7 +149,7 @@ class TestCOWDataSource extends HoodieClientTestBase { inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -167,7 +167,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Overwrite) .save(basePath) @@ -222,9 +222,9 @@ class TestCOWDataSource extends HoodieClientTestBase { // we have 2 commits, try pulling the first commit (which is not the latest) val firstCommit = HoodieDataSourceHelpers.listCommitsSince(fs, basePath, "000").get(0) val hoodieIncViewDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, firstCommit) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommit) .load(basePath) assertEquals(100, hoodieIncViewDF1.count()) // 100 initial inserts must be pulled var countsPerCommit = hoodieIncViewDF1.groupBy("_hoodie_commit_time").count().collect() @@ -233,9 +233,9 @@ class TestCOWDataSource extends HoodieClientTestBase { // Test incremental query has no instant in range val emptyIncDF = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, "001") + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, "001") .load(basePath) assertEquals(0, emptyIncDF.count()) @@ -250,8 +250,8 @@ class TestCOWDataSource extends HoodieClientTestBase { // pull the latest commit val hoodieIncViewDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime2) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime2) .load(basePath) assertEquals(uniqueKeyCnt, hoodieIncViewDF2.count()) // 100 records must be pulled @@ -261,16 +261,16 @@ class TestCOWDataSource extends HoodieClientTestBase { // pull the latest commit within certain partitions val hoodieIncViewDF3 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime2) - .option(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.key, "/2016/*/*/*") + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime2) + .option(DataSourceReadOptions.INCR_PATH_GLOB.key, "/2016/*/*/*") .load(basePath) assertEquals(hoodieIncViewDF2.filter(col("_hoodie_partition_path").contains("2016")).count(), hoodieIncViewDF3.count()) val timeTravelDF = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, firstCommit) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommit) .load(basePath) assertEquals(100, timeTravelDF.count()) // 100 initial inserts must be pulled } @@ -280,7 +280,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -288,7 +288,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2)) inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -306,7 +306,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -314,7 +314,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2)) inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) @@ -333,7 +333,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -342,7 +342,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2)) inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -351,7 +351,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF3 = spark.read.json(spark.sparkContext.parallelize(records3, 2)) inputDF3.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -389,7 +389,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Append) .save(basePath) @@ -398,7 +398,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2)) inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) @@ -448,7 +448,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) val hoodieROViewDF1 = spark.read.format("org.apache.hudi") @@ -460,7 +460,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2)) inputDF2.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY.key, "true") + .option(DataSourceWriteOptions.INSERT_DROP_DUPS.key, "true") .mode(SaveMode.Append) .save(basePath) val hoodieROViewDF2 = spark.read.format("org.apache.hudi") @@ -468,8 +468,8 @@ class TestCOWDataSource extends HoodieClientTestBase { assertEquals(hoodieROViewDF2.count(), totalUniqueKeyToGenerate) val hoodieIncViewDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime1) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime1) .load(basePath) assertEquals(hoodieIncViewDF2.count(), insert2NewKeyCnt) } @@ -511,7 +511,7 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .option(HoodieWriteConfig.HOODIE_AUTO_COMMIT_PROP.key, "true") .mode(SaveMode.Overwrite) .save(basePath) @@ -525,7 +525,7 @@ class TestCOWDataSource extends HoodieClientTestBase { inputDF.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, keyGenerator) + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, keyGenerator) .mode(SaveMode.Overwrite) } @@ -670,13 +670,13 @@ class TestCOWDataSource extends HoodieClientTestBase { @CsvSource(Array("true,false", "true,true", "false,true", "false,false")) def testQueryCOWWithBasePathAndFileIndex(partitionEncode: Boolean, isMetadataEnabled: Boolean): Unit = { val N = 20 - // Test query with partition prune if URL_ENCODE_PARTITIONING_OPT_KEY has enable + // Test query with partition prune if URL_ENCODE_PARTITIONING has enable val records1 = dataGen.generateInsertsContainsAllPartitions("000", N) val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Overwrite) .save(basePath) @@ -704,15 +704,15 @@ class TestCOWDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records2), 2)) inputDF2.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Append) .save(basePath) // Incremental query without "*" in path val hoodieIncViewDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime1) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime1) .load(basePath) assertEquals(N + 1, hoodieIncViewDF1.count()) } diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala index b20124c65..983b4b114 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestDataSourceForBootstrap.scala @@ -47,9 +47,9 @@ class TestDataSourceForBootstrap { HoodieWriteConfig.BULKINSERT_PARALLELISM.key -> "4", HoodieWriteConfig.FINALIZE_WRITE_PARALLELISM.key -> "4", HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key -> "4", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) var basePath: String = _ @@ -117,9 +117,9 @@ class TestDataSourceForBootstrap { updateDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.KEYGENERATOR_CLASS.key, "org.apache.hudi.keygen.NonpartitionedKeyGenerator") .mode(SaveMode.Append) .save(basePath) @@ -168,11 +168,11 @@ class TestDataSourceForBootstrap { updateDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") // Required because source data is hive style partitioned - .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key, "true") + .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key, "true") .mode(SaveMode.Append) .save(basePath) @@ -221,9 +221,9 @@ class TestDataSourceForBootstrap { updateDf1.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .mode(SaveMode.Append) .save(basePath) @@ -241,9 +241,9 @@ class TestDataSourceForBootstrap { updateDF2.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .mode(SaveMode.Append) .save(basePath) @@ -282,7 +282,7 @@ class TestDataSourceForBootstrap { // Read bootstrapped table and verify count val hoodieROViewDF1 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*") assertEquals(numRecords, hoodieROViewDF1.count()) @@ -295,9 +295,9 @@ class TestDataSourceForBootstrap { updateDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .option(HoodieCompactionConfig.INLINE_COMPACT_PROP.key, "true") .option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP.key, "1") .mode(SaveMode.Append) @@ -309,14 +309,14 @@ class TestDataSourceForBootstrap { // Read table after upsert and verify count. Since we have inline compaction enabled the RO view will have // the updated rows. val hoodieROViewDF2 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*") assertEquals(numRecords, hoodieROViewDF2.count()) assertEquals(numRecordsUpdate, hoodieROViewDF2.filter(s"timestamp == $updateTimestamp").count()) // Test query without "*" for MOR READ_OPTIMIZED val hoodieROViewDFWithBasePath = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath) assertEquals(numRecords, hoodieROViewDFWithBasePath.count()) @@ -347,13 +347,13 @@ class TestDataSourceForBootstrap { // Read bootstrapped table and verify count val hoodieROViewDF1 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*") assertEquals(numRecords, hoodieROViewDF1.count()) // Read bootstrapped table without "*" val hoodieROViewDFWithBasePath = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath) assertEquals(numRecords, hoodieROViewDFWithBasePath.count()) @@ -363,16 +363,16 @@ class TestDataSourceForBootstrap { updateDf1.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .mode(SaveMode.Append) .save(basePath) // Read table after upsert and verify the value assertEquals(1, HoodieDataSourceHelpers.listCommitsSince(fs, basePath, commitInstantTime1).size()) val hoodieROViewDF2 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*") hoodieROViewDF2.collect() @@ -386,9 +386,9 @@ class TestDataSourceForBootstrap { updateDF2.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .mode(SaveMode.Append) .save(basePath) @@ -398,7 +398,7 @@ class TestDataSourceForBootstrap { // Read table after upsert and verify count. Since we have inline compaction off the RO view will have // no updated rows. val hoodieROViewDF3 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*") assertEquals(numRecords, hoodieROViewDF3.count()) @@ -428,8 +428,8 @@ class TestDataSourceForBootstrap { bootstrapDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .option(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key, srcPath) .option(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key, classOf[SimpleKeyGenerator].getName) .option(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR.key, classOf[FullRecordBootstrapModeSelector].getName) @@ -455,9 +455,9 @@ class TestDataSourceForBootstrap { updateDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, "datestr") + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "datestr") .mode(SaveMode.Append) .save(basePath) @@ -478,9 +478,9 @@ class TestDataSourceForBootstrap { bootstrapDF.write .format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, tableType) - .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key, partitionColumns.getOrElse("")) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.BOOTSTRAP_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, tableType) + .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, partitionColumns.getOrElse("")) .option(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key, srcPath) .option(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key, classOf[SimpleKeyGenerator].getName) .mode(SaveMode.Overwrite) @@ -496,9 +496,9 @@ class TestDataSourceForBootstrap { // incrementally pull only changes in the bootstrap commit, which would pull all the initial records written // during bootstrap val hoodieIncViewDF1 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, bootstrapCommitInstantTime) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, bootstrapCommitInstantTime) .load(basePath) assertEquals(numRecords, hoodieIncViewDF1.count()) @@ -509,8 +509,8 @@ class TestDataSourceForBootstrap { // incrementally pull only changes after bootstrap commit, which would pull only the updated records in the // later commits val hoodieIncViewDF2 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, bootstrapCommitInstantTime) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, bootstrapCommitInstantTime) .load(basePath); assertEquals(numRecordsUpdate, hoodieIncViewDF2.count()) @@ -522,9 +522,9 @@ class TestDataSourceForBootstrap { val relativePartitionPath = if (isHiveStylePartitioned) "/datestr=2020-04-02/*" else "/2020-04-02/*" // pull the update commits within certain partitions val hoodieIncViewDF3 = spark.read.format("hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, bootstrapCommitInstantTime) - .option(DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY.key, relativePartitionPath) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, bootstrapCommitInstantTime) + .option(DataSourceReadOptions.INCR_PATH_GLOB.key, relativePartitionPath) .load(basePath) assertEquals(hoodieIncViewDF2.filter(col("_hoodie_partition_path").contains("2020-04-02")).count(), diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestEmptyCommit.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestEmptyCommit.scala index 2f9dfc62c..b54e58d77 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestEmptyCommit.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestEmptyCommit.scala @@ -31,9 +31,9 @@ class TestEmptyCommit extends HoodieClientTestBase { val commonOpts = Map( "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) @@ -57,7 +57,7 @@ class TestEmptyCommit extends HoodieClientTestBase { val inputDF1 = spark.read.json(spark.sparkContext.parallelize(Seq.empty[String], 1)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) .option(HoodieWriteConfig.ALLOW_EMPTY_COMMIT.key(), allowEmptyCommit.toString) .mode(SaveMode.Overwrite) .save(basePath) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala index dc299d115..c0909c3f0 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala @@ -20,7 +20,7 @@ package org.apache.hudi.functional import org.apache.hadoop.fs.Path import scala.collection.JavaConverters._ -import org.apache.hudi.DataSourceWriteOptions.{KEYGENERATOR_CLASS_OPT_KEY, PARTITIONPATH_FIELD_OPT_KEY, PAYLOAD_CLASS_OPT_KEY, PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY} +import org.apache.hudi.DataSourceWriteOptions.{KEYGENERATOR_CLASS, PARTITIONPATH_FIELD, PAYLOAD_CLASS, PRECOMBINE_FIELD, RECORDKEY_FIELD} import org.apache.hudi.common.config.HoodieMetadataConfig import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieTableType} @@ -53,9 +53,9 @@ class TestMORDataSource extends HoodieClientTestBase { val commonOpts = Map( "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) @@ -87,8 +87,8 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("org.apache.hudi") .options(commonOpts) .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Overwrite) .save(basePath) @@ -97,7 +97,7 @@ class TestMORDataSource extends HoodieClientTestBase { // Read RO View val hudiRODF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .load(basePath + "/*/*/*") @@ -118,7 +118,7 @@ class TestMORDataSource extends HoodieClientTestBase { // Read Snapshot query val updateCommitTime = HoodieDataSourceHelpers.latestCommit(fs, basePath) val hudiSnapshotDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .load(basePath + "/*/*/*") @@ -151,13 +151,13 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("org.apache.hudi") .options(commonOpts) .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000")) val hudiSnapshotDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF1.count()) // still 100, since we only updated @@ -171,7 +171,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF2.count()) // still 100, since we only updated val commit1Time = hudiSnapshotDF1.select("_hoodie_commit_time").head().get(0).toString @@ -183,9 +183,9 @@ class TestMORDataSource extends HoodieClientTestBase { // incremental view // base file only val hudiIncDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, commit1Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, commit1Time) .load(basePath) assertEquals(100, hudiIncDF1.count()) assertEquals(1, hudiIncDF1.select("_hoodie_commit_time").distinct().count()) @@ -193,9 +193,9 @@ class TestMORDataSource extends HoodieClientTestBase { hudiIncDF1.show(1) // log file only val hudiIncDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit1Time) - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, commit2Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit1Time) + .option(DataSourceReadOptions.END_INSTANTTIME.key, commit2Time) .load(basePath) assertEquals(100, hudiIncDF2.count()) assertEquals(1, hudiIncDF2.select("_hoodie_commit_time").distinct().count()) @@ -204,9 +204,9 @@ class TestMORDataSource extends HoodieClientTestBase { // base file + log file val hudiIncDF3 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, commit2Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, commit2Time) .load(basePath) assertEquals(100, hudiIncDF3.count()) // log file being load @@ -215,16 +215,16 @@ class TestMORDataSource extends HoodieClientTestBase { // Test incremental query has no instant in range val emptyIncDF = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, "001") + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, "001") .load(basePath) assertEquals(0, emptyIncDF.count()) // Unmerge val hudiSnapshotSkipMergeDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) - .option(DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.REALTIME_MERGE.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(200, hudiSnapshotSkipMergeDF2.count()) assertEquals(100, hudiSnapshotSkipMergeDF2.select("_hoodie_record_key").distinct().count()) @@ -232,7 +232,7 @@ class TestMORDataSource extends HoodieClientTestBase { // Test Read Optimized Query on MOR table val hudiRODF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiRODF2.count()) @@ -246,7 +246,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF3 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") // still 100, because we only updated the existing records assertEquals(100, hudiSnapshotDF3.count()) @@ -259,17 +259,17 @@ class TestMORDataSource extends HoodieClientTestBase { // incremental query from commit2Time val hudiIncDF4 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit2Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit2Time) .load(basePath) assertEquals(50, hudiIncDF4.count()) // skip merge incremental view // including commit 2 and commit 3 val hudiIncDF4SkipMerge = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.REALTIME_MERGE.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) .load(basePath) assertEquals(200, hudiIncDF4SkipMerge.count()) @@ -286,7 +286,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF4 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") // 200, because we insert 100 records to a new partition assertEquals(200, hudiSnapshotDF4.count()) @@ -295,8 +295,8 @@ class TestMORDataSource extends HoodieClientTestBase { // Incremental query, 50 from log file, 100 from base file of the new partition. val hudiIncDF5 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit2Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit2Time) .load(basePath) assertEquals(150, hudiIncDF5.count()) @@ -312,7 +312,7 @@ class TestMORDataSource extends HoodieClientTestBase { .save(basePath) val commit5Time = HoodieDataSourceHelpers.latestCommit(fs, basePath) val hudiSnapshotDF5 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(200, hudiSnapshotDF5.count()) @@ -327,13 +327,13 @@ class TestMORDataSource extends HoodieClientTestBase { .save(basePath) val commit6Time = HoodieDataSourceHelpers.latestCommit(fs, basePath) val hudiSnapshotDF6 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/2020/01/10/*") assertEquals(102, hudiSnapshotDF6.count()) val hudiIncDF6 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit5Time) - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, commit6Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit5Time) + .option(DataSourceReadOptions.END_INSTANTTIME.key, commit6Time) .load(basePath) // compaction updated 150 rows + inserted 2 new row assertEquals(152, hudiIncDF6.count()) @@ -349,13 +349,13 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("org.apache.hudi") .options(commonOpts) .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000")) val hudiSnapshotDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF1.count()) // still 100, since we only updated @@ -369,7 +369,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(50, hudiSnapshotDF2.count()) // 50 records were deleted assertEquals(hudiSnapshotDF2.select("_hoodie_commit_time").distinct().count(), 1) @@ -380,15 +380,15 @@ class TestMORDataSource extends HoodieClientTestBase { // unmerge query, skip the delete records val hudiSnapshotDF2Unmerge = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) - .option(DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.REALTIME_MERGE.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF2Unmerge.count()) // incremental query, read 50 delete records from log file and get 0 count. val hudiIncDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit2Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit2Time) .load(basePath) assertEquals(0, hudiIncDF1.count()) @@ -402,7 +402,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF3 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(0, hudiSnapshotDF3.count()) // 100 records were deleted, 0 record to load } @@ -417,12 +417,12 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("org.apache.hudi") .options(commonOpts) .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) val hudiSnapshotDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") val commit1Time = hudiSnapshotDF1.select("_hoodie_commit_time").head().get(0).toString @@ -445,20 +445,20 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") val hudiIncDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") .load(basePath) val hudiIncDF1Skipmerge = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.REALTIME_MERGE_OPT_KEY.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.REALTIME_MERGE.key, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") .load(basePath) val hudiIncDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commit1Time) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit1Time) .load(basePath) // filter first commit and only read log records @@ -498,12 +498,12 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("org.apache.hudi") .options(commonOpts) .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) .mode(SaveMode.Overwrite) .save(basePath) val hudiSnapshotDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF1.count()) @@ -515,7 +515,7 @@ class TestMORDataSource extends HoodieClientTestBase { .mode(SaveMode.Append) .save(basePath) val hudiSnapshotDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL) .load(basePath + "/*/*/*/*") assertEquals(100, hudiSnapshotDF2.count()) @@ -553,12 +553,12 @@ class TestMORDataSource extends HoodieClientTestBase { df.write.format("org.apache.hudi") .options(commonOpts) // use DefaultHoodieRecordPayload here - .option(PAYLOAD_CLASS_OPT_KEY.key, classOf[DefaultHoodieRecordPayload].getCanonicalName) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(RECORDKEY_FIELD_OPT_KEY.key, "id") - .option(PRECOMBINE_FIELD_OPT_KEY.key, "version") - .option(PARTITIONPATH_FIELD_OPT_KEY.key, "") - .option(KEYGENERATOR_CLASS_OPT_KEY.key, classOf[NonpartitionedKeyGenerator].getName) + .option(PAYLOAD_CLASS.key, classOf[DefaultHoodieRecordPayload].getCanonicalName) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(RECORDKEY_FIELD.key, "id") + .option(PRECOMBINE_FIELD.key, "version") + .option(PARTITIONPATH_FIELD.key, "") + .option(KEYGENERATOR_CLASS.key, classOf[NonpartitionedKeyGenerator].getName) .mode(SaveMode.Append) .save(basePath) } @@ -595,14 +595,14 @@ class TestMORDataSource extends HoodieClientTestBase { @CsvSource(Array("true,false", "true,true", "false,true", "false,false")) def testQueryMORWithBasePathAndFileIndex(partitionEncode: Boolean, isMetadataEnabled: Boolean): Unit = { val N = 20 - // Test query with partition prune if URL_ENCODE_PARTITIONING_OPT_KEY has enable + // Test query with partition prune if URL_ENCODE_PARTITIONING has enable val records1 = dataGen.generateInsertsContainsAllPartitions("000", N) val inputDF1 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Overwrite) .save(basePath) @@ -630,16 +630,16 @@ class TestMORDataSource extends HoodieClientTestBase { val inputDF2 = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records2), 2)) inputDF2.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) .option(HoodieMetadataConfig.METADATA_ENABLE_PROP.key, isMetadataEnabled) .mode(SaveMode.Append) .save(basePath) // Incremental query without "*" in path val hoodieIncViewDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime1) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime1) .load(basePath) assertEquals(N + 1, hoodieIncViewDF1.count()) } @@ -656,10 +656,10 @@ class TestMORDataSource extends HoodieClientTestBase { inputDF1.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) - .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY.key, partitionEncode) - .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY.key, hiveStylePartition) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.URL_ENCODE_PARTITIONING.key, partitionEncode) + .option(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING.key, hiveStylePartition) .mode(SaveMode.Overwrite) .save(basePath) @@ -714,8 +714,8 @@ class TestMORDataSource extends HoodieClientTestBase { val inputDF = spark.read.json(spark.sparkContext.parallelize(recordsToStrings(records1), 2)) inputDF.write.format("hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) // Use InMemoryIndex to generate log only mor table. .option(HoodieIndexConfig.INDEX_TYPE_PROP.key, IndexType.INMEMORY.toString) .mode(SaveMode.Overwrite) @@ -732,8 +732,8 @@ class TestMORDataSource extends HoodieClientTestBase { val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2)) inputDF1.write.format("org.apache.hudi") .options(commonOpts) - .option(DataSourceWriteOptions.OPERATION_OPT_KEY.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) - .option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) + .option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL) + .option(DataSourceWriteOptions.TABLE_TYPE.key(), DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL) // option for clustering .option("hoodie.parquet.small.file.limit", "0") .option("hoodie.clustering.inline", "true") diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStreamingSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStreamingSource.scala index e1eeda38e..f6aa5509b 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStreamingSource.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStreamingSource.scala @@ -18,7 +18,7 @@ package org.apache.hudi.functional import org.apache.hudi.DataSourceWriteOptions -import org.apache.hudi.DataSourceWriteOptions.{PRECOMBINE_FIELD_OPT_KEY, RECORDKEY_FIELD_OPT_KEY} +import org.apache.hudi.DataSourceWriteOptions.{PRECOMBINE_FIELD, RECORDKEY_FIELD} import org.apache.hudi.common.model.HoodieTableType.{COPY_ON_WRITE, MERGE_ON_READ} import org.apache.hudi.common.table.HoodieTableMetaClient import org.apache.hudi.config.HoodieWriteConfig.{DELETE_PARALLELISM, INSERT_PARALLELISM, TABLE_NAME, UPSERT_PARALLELISM} @@ -29,8 +29,8 @@ class TestStreamingSource extends StreamTest { import testImplicits._ private val commonOptions = Map( - RECORDKEY_FIELD_OPT_KEY.key -> "id", - PRECOMBINE_FIELD_OPT_KEY.key -> "ts", + RECORDKEY_FIELD.key -> "id", + PRECOMBINE_FIELD.key -> "ts", INSERT_PARALLELISM.key -> "4", UPSERT_PARALLELISM.key -> "4", DELETE_PARALLELISM.key -> "4" @@ -47,7 +47,7 @@ class TestStreamingSource extends StreamTest { HoodieTableMetaClient.withPropertyBuilder() .setTableType(COPY_ON_WRITE) .setTableName(getTableName(tablePath)) - .setPayloadClassName(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY.defaultValue) + .setPayloadClassName(DataSourceWriteOptions.PAYLOAD_CLASS.defaultValue) .initTable(spark.sessionState.newHadoopConf(), tablePath) addData(tablePath, Seq(("1", "a1", "10", "000"))) @@ -97,7 +97,7 @@ class TestStreamingSource extends StreamTest { HoodieTableMetaClient.withPropertyBuilder() .setTableType(MERGE_ON_READ) .setTableName(getTableName(tablePath)) - .setPayloadClassName(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY.defaultValue) + .setPayloadClassName(DataSourceWriteOptions.PAYLOAD_CLASS.defaultValue) .initTable(spark.sessionState.newHadoopConf(), tablePath) addData(tablePath, Seq(("1", "a1", "10", "000"))) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStructuredStreaming.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStructuredStreaming.scala index 483ed92c0..a8bd9bc73 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStructuredStreaming.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestStructuredStreaming.scala @@ -47,9 +47,9 @@ class TestStructuredStreaming extends HoodieClientTestBase { val commonOpts = Map( "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4", - DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY.key -> "_row_key", - DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY.key -> "partition", - DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY.key -> "timestamp", + DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", + DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", + DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp", HoodieWriteConfig.TABLE_NAME.key -> "hoodie_test" ) @@ -137,9 +137,9 @@ class TestStructuredStreaming extends HoodieClientTestBase { // we have 2 commits, try pulling the first commit (which is not the latest) val firstCommit = HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").get(0) val hoodieIncViewDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, firstCommit) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommit) .load(destPath) assertEquals(100, hoodieIncViewDF1.count()) // 100 initial inserts must be pulled @@ -149,8 +149,8 @@ class TestStructuredStreaming extends HoodieClientTestBase { // pull the latest commit val hoodieIncViewDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, commitInstantTime1) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commitInstantTime1) .load(destPath) assertEquals(uniqueKeyCnt, hoodieIncViewDF2.count()) // 100 records must be pulled @@ -195,8 +195,8 @@ class TestStructuredStreaming extends HoodieClientTestBase { clusteringNumCommit: String, fileMaxRecordNum: Int):Map[String, String] = { commonOpts + (HoodieClusteringConfig.INLINE_CLUSTERING_PROP.key -> isInlineClustering, HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP.key -> clusteringNumCommit, - DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key -> isAsyncClustering, - DataSourceWriteOptions.ASYNC_COMPACT_ENABLE_OPT_KEY.key -> isAsyncCompaction, + DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE.key -> isAsyncClustering, + DataSourceWriteOptions.ASYNC_COMPACT_ENABLE.key -> isAsyncCompaction, HoodieClusteringConfig.ASYNC_CLUSTERING_MAX_COMMIT_PROP.key -> clusteringNumCommit, HoodieStorageConfig.PARQUET_FILE_MAX_BYTES.key -> dataGen.getEstimatedFileSizeInBytes(fileMaxRecordNum).toString ) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala index 403d93e11..bd8558710 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala @@ -495,9 +495,9 @@ class TestMergeIntoTable extends TestHoodieSqlBase { ) // Test incremental query val hudiIncDF1 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, "000") - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY.key, firstCommitTime) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, "000") + .option(DataSourceReadOptions.END_INSTANTTIME.key, firstCommitTime) .load(targetBasePath) hudiIncDF1.createOrReplaceTempView("inc1") checkAnswer(s"select id, name, price, _ts from inc1")( @@ -520,8 +520,8 @@ class TestMergeIntoTable extends TestHoodieSqlBase { ) // Test incremental query val hudiIncDF2 = spark.read.format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY.key, secondCommitTime) + .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, secondCommitTime) .load(targetBasePath) hudiIncDF2.createOrReplaceTempView("inc2") checkAnswer(s"select id, name, price, _ts from inc2 order by id")( diff --git a/hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java b/hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java index eea49e667..04162562e 100644 --- a/hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java +++ b/hudi-spark-datasource/hudi-spark2/src/test/java/org/apache/hudi/internal/TestHoodieDataSourceInternalWriter.java @@ -126,14 +126,14 @@ public class TestHoodieDataSourceInternalWriter extends String commitExtraMetaPrefix = "commit_extra_meta_"; Map extraMeta = new HashMap<>(); - extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key(), commitExtraMetaPrefix); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); extraMeta.put(commitExtraMetaPrefix + "a", "valA"); extraMeta.put(commitExtraMetaPrefix + "b", "valB"); extraMeta.put("commit_extra_c", "valC"); // should not be part of commit extra metadata Map expectedMetadata = new HashMap<>(); expectedMetadata.putAll(extraMeta); - expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key()); + expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()); expectedMetadata.remove("commit_extra_c"); testDataSourceWriterInternal(extraMeta, expectedMetadata, true); @@ -143,7 +143,7 @@ public class TestHoodieDataSourceInternalWriter extends public void testDataSourceWriterEmptyExtraCommitMetadata() throws Exception { String commitExtraMetaPrefix = "commit_extra_meta_"; Map extraMeta = new HashMap<>(); - extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key(), commitExtraMetaPrefix); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); extraMeta.put("keyA", "valA"); extraMeta.put("keyB", "valB"); extraMeta.put("commit_extra_c", "valC"); diff --git a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java index ae4980461..176b67bbe 100644 --- a/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java +++ b/hudi-spark-datasource/hudi-spark3/src/test/java/org/apache/hudi/spark3/internal/TestHoodieDataSourceInternalBatchWrite.java @@ -128,14 +128,14 @@ public class TestHoodieDataSourceInternalBatchWrite extends public void testDataSourceWriterExtraCommitMetadata() throws Exception { String commitExtraMetaPrefix = "commit_extra_meta_"; Map extraMeta = new HashMap<>(); - extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key(), commitExtraMetaPrefix); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); extraMeta.put(commitExtraMetaPrefix + "a", "valA"); extraMeta.put(commitExtraMetaPrefix + "b", "valB"); extraMeta.put("commit_extra_c", "valC"); // should not be part of commit extra metadata Map expectedMetadata = new HashMap<>(); expectedMetadata.putAll(extraMeta); - expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key()); + expectedMetadata.remove(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key()); expectedMetadata.remove("commit_extra_c"); testDataSourceWriterInternal(extraMeta, expectedMetadata, true); @@ -145,7 +145,7 @@ public class TestHoodieDataSourceInternalBatchWrite extends public void testDataSourceWriterEmptyExtraCommitMetadata() throws Exception { String commitExtraMetaPrefix = "commit_extra_meta_"; Map extraMeta = new HashMap<>(); - extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX_OPT_KEY().key(), commitExtraMetaPrefix); + extraMeta.put(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX().key(), commitExtraMetaPrefix); extraMeta.put("keyA", "valA"); extraMeta.put("keyB", "valB"); extraMeta.put("commit_extra_c", "valC"); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java index 7b2523c59..55f849c18 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java @@ -117,8 +117,8 @@ public class BootstrapExecutor implements Serializable { this.bootstrapBasePath = properties.getString(HoodieTableConfig.HOODIE_BOOTSTRAP_BASE_PATH_PROP.key()); // Add more defaults if full bootstrap requested - this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY().key(), - DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY().defaultValue()); + this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS().key(), + DataSourceWriteOptions.PAYLOAD_CLASS().defaultValue()); this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jssc); HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder().withPath(cfg.targetBasePath) diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java index e56082c11..fead1b364 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java @@ -98,7 +98,7 @@ import java.util.stream.Collectors; import scala.collection.JavaConversions; import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP; -import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY; +import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE; import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY; import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_RESET_KEY; import static org.apache.hudi.config.HoodieClusteringConfig.INLINE_CLUSTERING_PROP; @@ -687,7 +687,7 @@ public class DeltaSync implements Serializable { ValidationUtils.checkArgument(config.inlineClusteringEnabled() == cfg.isInlineClusteringEnabled(), String.format("%s should be set to %s", INLINE_CLUSTERING_PROP.key(), cfg.isInlineClusteringEnabled())); ValidationUtils.checkArgument(config.isAsyncClusteringEnabled() == cfg.isAsyncClusteringEnabled(), - String.format("%s should be set to %s", ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), cfg.isAsyncClusteringEnabled())); + String.format("%s should be set to %s", ASYNC_CLUSTERING_ENABLE.key(), cfg.isAsyncClusteringEnabled())); ValidationUtils.checkArgument(!config.shouldAutoCommit(), String.format("%s should be set to %s", HOODIE_AUTO_COMMIT_PROP.key(), autoCommit)); ValidationUtils.checkArgument(config.shouldCombineBeforeInsert() == cfg.filterDupes, diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java index 9734a1d5c..0d4259b1a 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java @@ -362,7 +362,7 @@ public class HoodieDeltaStreamer implements Serializable { public boolean isAsyncClusteringEnabled() { return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getConfig() - .getOrDefault(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), false))); + .getOrDefault(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), false))); } public boolean isInlineClusteringEnabled() { diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java index ddf03cb49..a7bf35353 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java @@ -128,7 +128,7 @@ public class HoodieMultiTableDeltaStreamer { Helpers.deepCopyConfigs(config, cfg); String overriddenTargetBasePath = tableProperties.getString(Constants.TARGET_BASE_PATH_PROP, ""); cfg.targetBasePath = StringUtils.isNullOrEmpty(overriddenTargetBasePath) ? targetBasePath : overriddenTargetBasePath; - if (cfg.enableHiveSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), ""))) { + if (cfg.enableHiveSync && StringUtils.isNullOrEmpty(tableProperties.getString(DataSourceWriteOptions.HIVE_TABLE().key(), ""))) { throw new HoodieException("Hive sync table field not provided!"); } populateSchemaProviderProps(cfg, tableProperties); diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java index 7821f87fd..dd841f427 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java @@ -115,9 +115,9 @@ public class HoodieIncrSource extends RowSource { // Do Incr pull. Set end instant if available DataFrameReader reader = sparkSession.read().format("org.apache.hudi") - .option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) - .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY().key(), instantEndpts.getLeft()) - .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY().key(), instantEndpts.getRight()); + .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) + .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), instantEndpts.getLeft()) + .option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight()); Dataset source = reader.load(srcPath); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java index 0ae0aeb8b..0d721f5fe 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java @@ -884,7 +884,7 @@ public class TestHoodieDeltaStreamer extends TestHoodieDeltaStreamerBase { configs.add(String.format("%s=%s", HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP.key(), inlineClusterMaxCommit)); } if (!StringUtils.isNullOrEmpty(asyncCluster)) { - configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY.key(), asyncCluster)); + configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), asyncCluster)); } if (!StringUtils.isNullOrEmpty(asyncClusterMaxCommit)) { configs.add(String.format("%s=%s", HoodieClusteringConfig.ASYNC_CLUSTERING_MAX_COMMIT_PROP.key(), asyncClusterMaxCommit)); diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerBase.java index ae477dc9a..95f729120 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerBase.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerBase.java @@ -116,11 +116,11 @@ public class TestHoodieDeltaStreamerBase extends UtilitiesTestBase { props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc"); // Hive Configs - props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/"); - props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb1"); - props.setProperty(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY().key(), "hive_trips"); - props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr"); - props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/"); + props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb1"); + props.setProperty(DataSourceWriteOptions.HIVE_TABLE().key(), "hive_trips"); + props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr"); + props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getName()); UtilitiesTestBase.Helpers.savePropsToDFS(props, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE); @@ -212,11 +212,11 @@ public class TestHoodieDeltaStreamerBase extends UtilitiesTestBase { protected static void populateCommonHiveProps(TypedProperties props) { // Hive Configs - props.setProperty(DataSourceWriteOptions.HIVE_URL_OPT_KEY().key(), "jdbc:hive2://127.0.0.1:9999/"); - props.setProperty(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY().key(), "testdb2"); - props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION_OPT_KEY().key(), "false"); - props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY().key(), "datestr"); - props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY().key(), + props.setProperty(DataSourceWriteOptions.HIVE_URL().key(), "jdbc:hive2://127.0.0.1:9999/"); + props.setProperty(DataSourceWriteOptions.HIVE_DATABASE().key(), "testdb2"); + props.setProperty(DataSourceWriteOptions.HIVE_ASSUME_DATE_PARTITION().key(), "false"); + props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "datestr"); + props.setProperty(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getName()); } diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java index f264ec662..0e5cece6a 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java @@ -110,8 +110,8 @@ public class TestHoodieMultiTableDeltaStreamer extends TestHoodieDeltaStreamerBa assertEquals(dfsBasePath + "/multi_table_dataset/uber_db/dummy_table_uber", executionContext.getConfig().targetBasePath); assertEquals("uber_db.dummy_table_uber", executionContext.getConfig().targetTableName); assertEquals("topic1", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.KAFKA_TOPIC_PROP)); - assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY().key())); - assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key())); + assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key())); + assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key())); assertEquals("uber_hive_dummy_table", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.HIVE_SYNC_TABLE_PROP)); } @@ -224,11 +224,11 @@ public class TestHoodieMultiTableDeltaStreamer extends TestHoodieDeltaStreamerBa tableExecutionContexts.forEach(tableExecutionContext -> { switch (tableExecutionContext.getTableName()) { case "dummy_table_short_trip": - String tableLevelKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key()); + String tableLevelKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key()); assertEquals(TestHoodieDeltaStreamer.TestTableLevelGenerator.class.getName(), tableLevelKeyGeneratorClass); break; default: - String defaultKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY().key()); + String defaultKeyGeneratorClass = tableExecutionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS().key()); assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), defaultKeyGeneratorClass); } });