From 2467c137e465927f84df88d1985481a1d47954da Mon Sep 17 00:00:00 2001 From: Raymond Xu <2701446+xushiyan@users.noreply.github.com> Date: Thu, 6 Jan 2022 23:26:35 -0800 Subject: [PATCH] [HUDI-3100] Add config for hive conditional sync (#4440) --- .../src/main/java/org/apache/hudi/DataSourceUtils.java | 2 ++ .../main/scala/org/apache/hudi/DataSourceOptions.scala | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java index 1e076b196..89b73a02e 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java +++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java @@ -307,6 +307,8 @@ public class DataSourceUtils { DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE().defaultValue())); hiveSyncConfig.supportTimestamp = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP_TYPE().key(), DataSourceWriteOptions.HIVE_SUPPORT_TIMESTAMP_TYPE().defaultValue())); + hiveSyncConfig.isConditionalSync = Boolean.valueOf(props.getString(DataSourceWriteOptions.HIVE_CONDITIONAL_SYNC().key(), + DataSourceWriteOptions.HIVE_CONDITIONAL_SYNC().defaultValue())); hiveSyncConfig.bucketSpec = props.getBoolean(DataSourceWriteOptions.HIVE_SYNC_BUCKET_SYNC().key(), (boolean) DataSourceWriteOptions.HIVE_SYNC_BUCKET_SYNC().defaultValue()) ? HiveSyncConfig.getBucketSpec(props.getString(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key()), diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index dbcc847fc..2b18dfdd7 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -495,6 +495,16 @@ object DataSourceWriteOptions { .withDocumentation("‘INT64’ with original type TIMESTAMP_MICROS is converted to hive ‘timestamp’ type. " + "Disabled by default for backward compatibility.") + /** + * Flag to indicate whether to use conditional syncing in HiveSync. + * If set true, the Hive sync procedure will only run if partition or schema changes are detected. + * By default true. + */ + val HIVE_CONDITIONAL_SYNC: ConfigProperty[String] = ConfigProperty + .key("hoodie.datasource.hive_sync.conditional_sync") + .defaultValue("false") + .withDocumentation("Enables conditional hive sync, where partition or schema change must exist to perform sync to hive.") + val HIVE_TABLE_PROPERTIES: ConfigProperty[String] = ConfigProperty .key("hoodie.datasource.hive_sync.table_properties") .noDefaultValue()