[HUDI-2242] Add configuration inference logic for few options (#3359)
Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -719,8 +719,8 @@ object HoodieSparkSqlWriter {
|
||||
|
||||
private def mergeParamsAndGetHoodieConfig(optParams: Map[String, String],
|
||||
tableConfig: HoodieTableConfig): (Map[String, String], HoodieConfig) = {
|
||||
val mergedParams = mutable.Map.empty ++
|
||||
DataSourceWriteOptions.translateSqlOptions(HoodieWriterUtils.parametersWithWriteDefaults(optParams))
|
||||
val translatedOptions = DataSourceWriteOptions.translateSqlOptions(optParams)
|
||||
val mergedParams = mutable.Map.empty ++ HoodieWriterUtils.parametersWithWriteDefaults(translatedOptions)
|
||||
if (!mergedParams.contains(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key)
|
||||
&& mergedParams.contains(KEYGENERATOR_CLASS_NAME.key)) {
|
||||
mergedParams(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key) = mergedParams(KEYGENERATOR_CLASS_NAME.key)
|
||||
|
||||
@@ -48,41 +48,44 @@ object HoodieWriterUtils {
|
||||
*/
|
||||
def parametersWithWriteDefaults(parameters: Map[String, String]): Map[String, String] = {
|
||||
val globalProps = DFSPropertiesConfiguration.getGlobalProps.asScala
|
||||
Map(OPERATION.key -> OPERATION.defaultValue,
|
||||
TABLE_TYPE.key -> TABLE_TYPE.defaultValue,
|
||||
PRECOMBINE_FIELD.key -> PRECOMBINE_FIELD.defaultValue,
|
||||
PAYLOAD_CLASS_NAME.key -> PAYLOAD_CLASS_NAME.defaultValue,
|
||||
RECORDKEY_FIELD.key -> RECORDKEY_FIELD.defaultValue,
|
||||
PARTITIONPATH_FIELD.key -> PARTITIONPATH_FIELD.defaultValue,
|
||||
KEYGENERATOR_CLASS_NAME.key -> DEFAULT_KEYGENERATOR_CLASS_OPT_VAL,
|
||||
ENABLE.key -> ENABLE.defaultValue.toString,
|
||||
COMMIT_METADATA_KEYPREFIX.key -> COMMIT_METADATA_KEYPREFIX.defaultValue,
|
||||
INSERT_DROP_DUPS.key -> INSERT_DROP_DUPS.defaultValue,
|
||||
STREAMING_RETRY_CNT.key -> STREAMING_RETRY_CNT.defaultValue,
|
||||
STREAMING_RETRY_INTERVAL_MS.key -> STREAMING_RETRY_INTERVAL_MS.defaultValue,
|
||||
STREAMING_IGNORE_FAILED_BATCH.key -> STREAMING_IGNORE_FAILED_BATCH.defaultValue,
|
||||
META_SYNC_CLIENT_TOOL_CLASS_NAME.key -> META_SYNC_CLIENT_TOOL_CLASS_NAME.defaultValue,
|
||||
HIVE_SYNC_ENABLED.key -> HIVE_SYNC_ENABLED.defaultValue,
|
||||
META_SYNC_ENABLED.key -> META_SYNC_ENABLED.defaultValue,
|
||||
HIVE_DATABASE.key -> HIVE_DATABASE.defaultValue,
|
||||
HIVE_TABLE.key -> HIVE_TABLE.defaultValue,
|
||||
HIVE_BASE_FILE_FORMAT.key -> HIVE_BASE_FILE_FORMAT.defaultValue,
|
||||
HIVE_USER.key -> HIVE_USER.defaultValue,
|
||||
HIVE_PASS.key -> HIVE_PASS.defaultValue,
|
||||
HIVE_URL.key -> HIVE_URL.defaultValue,
|
||||
HIVE_PARTITION_FIELDS.key -> HIVE_PARTITION_FIELDS.defaultValue,
|
||||
HIVE_PARTITION_EXTRACTOR_CLASS.key -> HIVE_PARTITION_EXTRACTOR_CLASS.defaultValue,
|
||||
HIVE_STYLE_PARTITIONING.key -> HIVE_STYLE_PARTITIONING.defaultValue,
|
||||
HIVE_USE_JDBC.key -> HIVE_USE_JDBC.defaultValue,
|
||||
HIVE_CREATE_MANAGED_TABLE.key() -> HIVE_CREATE_MANAGED_TABLE.defaultValue.toString,
|
||||
HIVE_SYNC_AS_DATA_SOURCE_TABLE.key() -> HIVE_SYNC_AS_DATA_SOURCE_TABLE.defaultValue(),
|
||||
ASYNC_COMPACT_ENABLE.key -> ASYNC_COMPACT_ENABLE.defaultValue,
|
||||
INLINE_CLUSTERING_ENABLE.key -> INLINE_CLUSTERING_ENABLE.defaultValue,
|
||||
ASYNC_CLUSTERING_ENABLE.key -> ASYNC_CLUSTERING_ENABLE.defaultValue,
|
||||
ENABLE_ROW_WRITER.key -> ENABLE_ROW_WRITER.defaultValue,
|
||||
RECONCILE_SCHEMA.key -> RECONCILE_SCHEMA.defaultValue.toString,
|
||||
DROP_PARTITION_COLUMNS.key -> DROP_PARTITION_COLUMNS.defaultValue
|
||||
) ++ globalProps ++ DataSourceOptionsHelper.translateConfigurations(parameters)
|
||||
val props = new Properties()
|
||||
props.putAll(parameters)
|
||||
val hoodieConfig: HoodieConfig = new HoodieConfig(props)
|
||||
hoodieConfig.setDefaultValue(OPERATION)
|
||||
hoodieConfig.setDefaultValue(TABLE_TYPE)
|
||||
hoodieConfig.setDefaultValue(PRECOMBINE_FIELD)
|
||||
hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME)
|
||||
hoodieConfig.setDefaultValue(RECORDKEY_FIELD)
|
||||
hoodieConfig.setDefaultValue(PARTITIONPATH_FIELD)
|
||||
hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME)
|
||||
hoodieConfig.setDefaultValue(ENABLE)
|
||||
hoodieConfig.setDefaultValue(COMMIT_METADATA_KEYPREFIX)
|
||||
hoodieConfig.setDefaultValue(INSERT_DROP_DUPS)
|
||||
hoodieConfig.setDefaultValue(STREAMING_RETRY_CNT)
|
||||
hoodieConfig.setDefaultValue(STREAMING_RETRY_INTERVAL_MS)
|
||||
hoodieConfig.setDefaultValue(STREAMING_IGNORE_FAILED_BATCH)
|
||||
hoodieConfig.setDefaultValue(META_SYNC_CLIENT_TOOL_CLASS_NAME)
|
||||
hoodieConfig.setDefaultValue(HIVE_SYNC_ENABLED)
|
||||
hoodieConfig.setDefaultValue(META_SYNC_ENABLED)
|
||||
hoodieConfig.setDefaultValue(HIVE_DATABASE)
|
||||
hoodieConfig.setDefaultValue(HIVE_TABLE)
|
||||
hoodieConfig.setDefaultValue(HIVE_BASE_FILE_FORMAT)
|
||||
hoodieConfig.setDefaultValue(HIVE_USER)
|
||||
hoodieConfig.setDefaultValue(HIVE_PASS)
|
||||
hoodieConfig.setDefaultValue(HIVE_URL)
|
||||
hoodieConfig.setDefaultValue(HIVE_PARTITION_FIELDS)
|
||||
hoodieConfig.setDefaultValue(HIVE_PARTITION_EXTRACTOR_CLASS)
|
||||
hoodieConfig.setDefaultValue(HIVE_STYLE_PARTITIONING)
|
||||
hoodieConfig.setDefaultValue(HIVE_USE_JDBC)
|
||||
hoodieConfig.setDefaultValue(HIVE_CREATE_MANAGED_TABLE)
|
||||
hoodieConfig.setDefaultValue(HIVE_SYNC_AS_DATA_SOURCE_TABLE)
|
||||
hoodieConfig.setDefaultValue(ASYNC_COMPACT_ENABLE)
|
||||
hoodieConfig.setDefaultValue(INLINE_CLUSTERING_ENABLE)
|
||||
hoodieConfig.setDefaultValue(ASYNC_CLUSTERING_ENABLE)
|
||||
hoodieConfig.setDefaultValue(ENABLE_ROW_WRITER)
|
||||
hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
|
||||
hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
|
||||
Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ DataSourceOptionsHelper.translateConfigurations(parameters)
|
||||
}
|
||||
|
||||
def toProperties(params: Map[String, String]): TypedProperties = {
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
|
||||
import org.apache.hudi.hive.NonPartitionedExtractor;
|
||||
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
|
||||
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
|
||||
import org.apache.hudi.keygen.SimpleKeyGenerator;
|
||||
|
||||
@@ -270,7 +271,9 @@ public class HoodieJavaApp {
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
MultiPartKeysValueExtractor.class.getCanonicalName());
|
||||
} else {
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr");
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr").option(
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
SlashEncodedDayPartitionValueExtractor.class.getCanonicalName());
|
||||
}
|
||||
}
|
||||
return writer;
|
||||
|
||||
@@ -25,6 +25,7 @@ import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
|
||||
import org.apache.hudi.hive.NonPartitionedExtractor;
|
||||
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
|
||||
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
|
||||
import org.apache.hudi.keygen.SimpleKeyGenerator;
|
||||
|
||||
@@ -140,7 +141,9 @@ public class HoodieJavaGenerateApp {
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
MultiPartKeysValueExtractor.class.getCanonicalName());
|
||||
} else {
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr");
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr").option(
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
SlashEncodedDayPartitionValueExtractor.class.getCanonicalName());
|
||||
}
|
||||
}
|
||||
return writer;
|
||||
|
||||
@@ -28,6 +28,7 @@ import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
|
||||
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
|
||||
|
||||
import com.beust.jcommander.JCommander;
|
||||
import com.beust.jcommander.Parameter;
|
||||
@@ -391,7 +392,9 @@ public class HoodieJavaStreamingApp {
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
MultiPartKeysValueExtractor.class.getCanonicalName());
|
||||
} else {
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr");
|
||||
writer = writer.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr").option(
|
||||
DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(),
|
||||
SlashEncodedDayPartitionValueExtractor.class.getCanonicalName());
|
||||
}
|
||||
}
|
||||
return writer;
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions._
|
||||
import org.apache.hudi.hive.{HiveStylePartitionValueExtractor, MultiPartKeysValueExtractor}
|
||||
import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class TestDataSourceOptions {
|
||||
@Test def inferDataSourceOptions(): Unit = {
|
||||
val inputOptions1 = Map(
|
||||
TABLE_NAME.key -> "hudi_table",
|
||||
PARTITIONPATH_FIELD.key -> "year,month"
|
||||
)
|
||||
val modifiedOptions1 = HoodieWriterUtils.parametersWithWriteDefaults(inputOptions1)
|
||||
assertEquals(classOf[ComplexKeyGenerator].getName, modifiedOptions1(KEYGENERATOR_CLASS_NAME.key))
|
||||
assertEquals("hudi_table", modifiedOptions1(HIVE_TABLE.key))
|
||||
assertEquals("year,month", modifiedOptions1(HIVE_PARTITION_FIELDS.key))
|
||||
assertEquals(classOf[MultiPartKeysValueExtractor].getName,
|
||||
modifiedOptions1(HIVE_PARTITION_EXTRACTOR_CLASS.key))
|
||||
|
||||
val inputOptions2 = Map(
|
||||
TABLE_NAME.key -> "hudi_table",
|
||||
PARTITIONPATH_FIELD.key -> "year",
|
||||
HIVE_STYLE_PARTITIONING.key -> "true"
|
||||
)
|
||||
val modifiedOptions2 = HoodieWriterUtils.parametersWithWriteDefaults(inputOptions2)
|
||||
assertEquals(classOf[SimpleKeyGenerator].getName, modifiedOptions2(KEYGENERATOR_CLASS_NAME.key))
|
||||
assertEquals("hudi_table", modifiedOptions2(HIVE_TABLE.key))
|
||||
assertEquals("year", modifiedOptions2(HIVE_PARTITION_FIELDS.key))
|
||||
assertEquals(classOf[HiveStylePartitionValueExtractor].getName,
|
||||
modifiedOptions2(HIVE_PARTITION_EXTRACTOR_CLASS.key))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user