diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java index bacb4960b..6d4236b04 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieConfig.java @@ -172,6 +172,11 @@ public class HoodieConfig implements Serializable { .orElseGet(() -> Boolean.parseBoolean(configProperty.defaultValue().toString())); } + public boolean getBooleanOrDefault(ConfigProperty configProperty, boolean defaultVal) { + Option rawValue = getRawValue(configProperty); + return rawValue.map(v -> Boolean.parseBoolean(v.toString())).orElse(defaultVal); + } + public Long getLong(ConfigProperty configProperty) { Option rawValue = getRawValue(configProperty); return rawValue.map(v -> Long.parseLong(v.toString())).orElse(null); diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index cd16e17ab..16f52f33b 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -426,12 +426,8 @@ object DataSourceWriteOptions { @Deprecated val METASTORE_URIS: ConfigProperty[String] = HiveSyncConfigHolder.METASTORE_URIS @Deprecated - val hivePartitionFieldsInferFunc: JavaFunction[HoodieConfig, Option[String]] = HoodieSyncConfig.PARTITION_FIELDS_INFERENCE_FUNCTION - @Deprecated val HIVE_PARTITION_FIELDS: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_PARTITION_FIELDS @Deprecated - val hivePartitionExtractorInferFunc: JavaFunction[HoodieConfig, Option[String]] = HoodieSyncConfig.PARTITION_EXTRACTOR_CLASS_FUNCTION - @Deprecated val HIVE_PARTITION_EXTRACTOR_CLASS: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS @Deprecated val HIVE_ASSUME_DATE_PARTITION: ConfigProperty[String] = HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java index 429bb93aa..093ecdfef 100644 --- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/HoodieSyncConfig.java @@ -23,6 +23,7 @@ import org.apache.hudi.common.config.HoodieConfig; import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.config.TypedProperties; import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.table.HoodieTableConfig; import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.StringUtils; import org.apache.hudi.keygen.constant.KeyGeneratorOptions; @@ -34,8 +35,8 @@ import org.apache.hadoop.fs.FileSystem; import java.util.List; import java.util.Properties; -import java.util.function.Function; +import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; import static org.apache.hudi.common.table.HoodieTableConfig.DATABASE_NAME; import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_TABLE_NAME_KEY; import static org.apache.hudi.common.table.HoodieTableConfig.HOODIE_WRITE_TABLE_NAME_KEY; @@ -72,57 +73,51 @@ public class HoodieSyncConfig extends HoodieConfig { public static final ConfigProperty META_SYNC_BASE_FILE_FORMAT = ConfigProperty .key("hoodie.datasource.hive_sync.base_file_format") .defaultValue("PARQUET") + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(HoodieTableConfig.BASE_FILE_FORMAT))) .withDocumentation("Base file format for the sync."); - // If partition fields are not explicitly provided, obtain from the KeyGeneration Configs - public static final Function> PARTITION_FIELDS_INFERENCE_FUNCTION = cfg -> { - if (cfg.contains(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME)) { - return Option.of(cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME)); - } else { - return Option.empty(); - } - }; public static final ConfigProperty META_SYNC_PARTITION_FIELDS = ConfigProperty .key("hoodie.datasource.hive_sync.partition_fields") .defaultValue("") - .withInferFunction(PARTITION_FIELDS_INFERENCE_FUNCTION) + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME))) .withDocumentation("Field in the table to use for determining hive partition columns."); - // If partition value extraction class is not explicitly provided, configure based on the partition fields. - public static final Function> PARTITION_EXTRACTOR_CLASS_FUNCTION = cfg -> { - if (!cfg.contains(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME)) { - return Option.of("org.apache.hudi.hive.NonPartitionedExtractor"); - } else { - int numOfPartFields = cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME).split(",").length; - if (numOfPartFields == 1 - && cfg.contains(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE) - && cfg.getString(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) { - return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor"); - } else { - return Option.of("org.apache.hudi.hive.MultiPartKeysValueExtractor"); - } - } - }; public static final ConfigProperty META_SYNC_PARTITION_EXTRACTOR_CLASS = ConfigProperty .key("hoodie.datasource.hive_sync.partition_extractor_class") .defaultValue("org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor") - .withInferFunction(PARTITION_EXTRACTOR_CLASS_FUNCTION) + .withInferFunction(cfg -> { + if (cfg.contains(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME)) { + int numOfPartFields = cfg.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME).split(",").length; + if (numOfPartFields == 1 + && cfg.contains(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE) + && cfg.getString(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE).equals("true")) { + return Option.of("org.apache.hudi.hive.HiveStylePartitionValueExtractor"); + } else { + return Option.of("org.apache.hudi.hive.MultiPartKeysValueExtractor"); + } + } else { + return Option.of("org.apache.hudi.hive.NonPartitionedExtractor"); + } + }) .withDocumentation("Class which implements PartitionValueExtractor to extract the partition values, " + "default 'SlashEncodedDayPartitionValueExtractor'."); public static final ConfigProperty META_SYNC_ASSUME_DATE_PARTITION = ConfigProperty .key("hoodie.datasource.hive_sync.assume_date_partitioning") - .defaultValue("false") - .withDocumentation("Assume partitioning is yyyy/mm/dd"); + .defaultValue(HoodieMetadataConfig.ASSUME_DATE_PARTITIONING.defaultValue()) + .withInferFunction(cfg -> Option.ofNullable(cfg.getString(HoodieMetadataConfig.ASSUME_DATE_PARTITIONING))) + .withDocumentation("Assume partitioning is yyyy/MM/dd"); public static final ConfigProperty META_SYNC_DECODE_PARTITION = ConfigProperty .key("hoodie.meta.sync.decode_partition") - .defaultValue(false) // TODO infer from url encode option - .withDocumentation(""); + .defaultValue(false) + .withInferFunction(cfg -> Option.ofNullable(cfg.getBoolean(HoodieTableConfig.URL_ENCODE_PARTITIONING))) + .withDocumentation("If true, meta sync will url-decode the partition path, as it is deemed as url-encoded. Default to false."); public static final ConfigProperty META_SYNC_USE_FILE_LISTING_FROM_METADATA = ConfigProperty .key("hoodie.meta.sync.metadata_file_listing") - .defaultValue(HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS) + .defaultValue(DEFAULT_METADATA_ENABLE_FOR_READERS) + .withInferFunction(cfg -> Option.of(cfg.getBooleanOrDefault(HoodieMetadataConfig.ENABLE, DEFAULT_METADATA_ENABLE_FOR_READERS))) .withDocumentation("Enable the internal metadata table for file listing for syncing with metastores"); public static final ConfigProperty META_SYNC_CONDITIONAL_SYNC = ConfigProperty diff --git a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java index 1f6c05cd1..eb9d44b9e 100644 --- a/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java +++ b/hudi-sync/hudi-sync-common/src/test/java/org/apache/hudi/sync/common/TestHoodieSyncConfig.java @@ -19,16 +19,26 @@ package org.apache.hudi.sync.common; +import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.table.HoodieTableConfig; +import org.apache.hudi.keygen.constant.KeyGeneratorOptions; import org.apache.hadoop.conf.Configuration; import org.junit.jupiter.api.Test; import java.util.Properties; +import static org.apache.hudi.common.config.HoodieMetadataConfig.DEFAULT_METADATA_ENABLE_FOR_READERS; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_ASSUME_DATE_PARTITION; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DATABASE_NAME; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_DECODE_PARTITION; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_PARTITION_FIELDS; import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_TABLE_NAME; +import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_USE_FILE_LISTING_FROM_METADATA; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; class TestHoodieSyncConfig { @@ -52,4 +62,70 @@ class TestHoodieSyncConfig { assertEquals("default", config3.getString(META_SYNC_DATABASE_NAME)); assertEquals("unknown", config3.getString(META_SYNC_TABLE_NAME)); } + + @Test + void testInferBaseFileFormat() { + Properties props1 = new Properties(); + props1.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), "ORC"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertEquals("ORC", config1.getStringOrDefault(META_SYNC_BASE_FILE_FORMAT)); + + HoodieSyncConfig config2 = new HoodieSyncConfig(new Properties(), new Configuration()); + assertEquals("PARQUET", config2.getStringOrDefault(META_SYNC_BASE_FILE_FORMAT)); + } + + @Test + void testInferPartitionFields() { + Properties props1 = new Properties(); + props1.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "foo,bar"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertEquals("foo,bar", config1.getStringOrDefault(META_SYNC_PARTITION_FIELDS)); + } + + @Test + void testInferPartitonExtractorClass() { + Properties props1 = new Properties(); + props1.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "foo,bar"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertEquals("org.apache.hudi.hive.MultiPartKeysValueExtractor", + config1.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); + + Properties props2 = new Properties(); + props2.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "foo"); + props2.setProperty(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key(), "true"); + HoodieSyncConfig config2 = new HoodieSyncConfig(props2, new Configuration()); + assertEquals("org.apache.hudi.hive.HiveStylePartitionValueExtractor", + config2.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); + + HoodieSyncConfig config3 = new HoodieSyncConfig(new Properties(), new Configuration()); + assertEquals("org.apache.hudi.hive.NonPartitionedExtractor", + config3.getStringOrDefault(META_SYNC_PARTITION_EXTRACTOR_CLASS)); + } + + @Test + void testInferAssumeDatePartition() { + Properties props1 = new Properties(); + props1.setProperty(HoodieMetadataConfig.ASSUME_DATE_PARTITIONING.key(), "true"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertEquals("true", config1.getString(META_SYNC_ASSUME_DATE_PARTITION)); + } + + @Test + void testInferDecodePartition() { + Properties props1 = new Properties(); + props1.setProperty(HoodieTableConfig.URL_ENCODE_PARTITIONING.key(), "true"); + HoodieSyncConfig config1 = new HoodieSyncConfig(props1, new Configuration()); + assertTrue(config1.getBoolean(META_SYNC_DECODE_PARTITION)); + } + + @Test + void testInferUseFileListingFromMetadata() { + HoodieSyncConfig config1 = new HoodieSyncConfig(new Properties(), new Configuration()); + assertEquals(DEFAULT_METADATA_ENABLE_FOR_READERS, config1.getBoolean(META_SYNC_USE_FILE_LISTING_FROM_METADATA)); + + Properties props2 = new Properties(); + props2.setProperty(HoodieMetadataConfig.ENABLE.key(), "true"); + HoodieSyncConfig config2 = new HoodieSyncConfig(props2, new Configuration()); + assertTrue(config2.getBoolean(META_SYNC_USE_FILE_LISTING_FROM_METADATA)); + } }