[HUDI-3730] Improve meta sync class design and hierarchies (#5854)
* [HUDI-3730] Improve meta sync class design and hierarchies (#5754) * Implements class design proposed in RFC-55 Co-authored-by: jian.feng <fengjian428@gmial.com> Co-authored-by: jian.feng <jian.feng@shopee.com>
This commit is contained in:
@@ -18,9 +18,6 @@
|
||||
|
||||
package org.apache.hudi.cli;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||
import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
@@ -38,7 +35,10 @@ import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
import org.apache.hudi.hive.HiveSyncTool;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.sync.common.HoodieSyncConfig;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
@@ -48,6 +48,12 @@ import java.io.Serializable;
|
||||
import java.util.HashMap;
|
||||
|
||||
import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
|
||||
import static org.apache.hudi.config.HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD;
|
||||
import static org.apache.hudi.config.HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC;
|
||||
import static org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SYNC_BUCKET_SYNC_SPEC;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT;
|
||||
import static org.apache.hudi.sync.common.HoodieSyncConfig.META_SYNC_BASE_PATH;
|
||||
|
||||
/**
|
||||
* Performs bootstrap from a non-hudi source.
|
||||
@@ -115,7 +121,7 @@ public class BootstrapExecutorUtils implements Serializable {
|
||||
// Add more defaults if full bootstrap requested
|
||||
this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS_NAME().key(),
|
||||
DataSourceWriteOptions.PAYLOAD_CLASS_NAME().defaultValue());
|
||||
/**
|
||||
/*
|
||||
* Schema provider that supplies the command for reading the input and writing out the target table.
|
||||
*/
|
||||
SchemaProvider schemaProvider = createSchemaProvider(cfg.schemaProviderClass, props, jssc);
|
||||
@@ -165,14 +171,14 @@ public class BootstrapExecutorUtils implements Serializable {
|
||||
if (cfg.enableHiveSync) {
|
||||
TypedProperties metaProps = new TypedProperties();
|
||||
metaProps.putAll(props);
|
||||
metaProps.put(HoodieSyncConfig.META_SYNC_BASE_PATH.key(), cfg.basePath);
|
||||
metaProps.put(HoodieSyncConfig.META_SYNC_BASE_FILE_FORMAT.key(), cfg.baseFileFormat);
|
||||
if (props.getBoolean(HiveSyncConfig.HIVE_SYNC_BUCKET_SYNC.key(), HiveSyncConfig.HIVE_SYNC_BUCKET_SYNC.defaultValue())) {
|
||||
metaProps.put(HiveSyncConfig.HIVE_SYNC_BUCKET_SYNC_SPEC.key(), HiveSyncConfig.getBucketSpec(props.getString(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key()),
|
||||
props.getInteger(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key())));
|
||||
metaProps.put(META_SYNC_BASE_PATH.key(), cfg.basePath);
|
||||
metaProps.put(META_SYNC_BASE_FILE_FORMAT.key(), cfg.baseFileFormat);
|
||||
if (props.getBoolean(HIVE_SYNC_BUCKET_SYNC.key(), HIVE_SYNC_BUCKET_SYNC.defaultValue())) {
|
||||
metaProps.put(HIVE_SYNC_BUCKET_SYNC_SPEC.key(), HiveSyncConfig.getBucketSpec(props.getString(BUCKET_INDEX_HASH_FIELD.key()),
|
||||
props.getInteger(BUCKET_INDEX_NUM_BUCKETS.key())));
|
||||
}
|
||||
|
||||
new HiveSyncTool(metaProps, configuration, fs).syncHoodieTable();
|
||||
new HiveSyncTool(metaProps, configuration).syncHoodieTable();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,11 +20,11 @@ package org.apache.spark.sql.hudi.command
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
import org.apache.hadoop.fs.Path
|
||||
import org.apache.hudi.DataSourceWriteOptions
|
||||
import org.apache.hudi.hive.HiveSyncConfig
|
||||
import org.apache.hudi.hive.HiveSyncConfigHolder
|
||||
import org.apache.hudi.sql.InsertMode
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HoodieCatalogTable}
|
||||
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.needFilterProps
|
||||
import org.apache.hudi.sync.common.util.ConfigUtils
|
||||
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.needFilterProps
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HoodieCatalogTable}
|
||||
import org.apache.spark.sql.catalyst.plans.QueryPlan
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
||||
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
|
||||
@@ -94,9 +94,9 @@ case class CreateHoodieTableAsSelectCommand(
|
||||
|
||||
val tblProperties = hoodieCatalogTable.catalogProperties
|
||||
val options = Map(
|
||||
HiveSyncConfig.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
|
||||
HiveSyncConfig.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
|
||||
HiveSyncConfig.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(newTable.properties.asJava),
|
||||
HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
|
||||
HiveSyncConfigHolder.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
|
||||
HiveSyncConfigHolder.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(newTable.properties.asJava),
|
||||
DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
|
||||
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ import org.apache.hudi.DataSourceWriteOptions._
|
||||
import org.apache.hudi.common.util.StringUtils
|
||||
import org.apache.hudi.config.HoodieWriteConfig
|
||||
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
||||
import org.apache.hudi.hive.HiveSyncConfig
|
||||
import org.apache.hudi.hive.HiveSyncConfigHolder
|
||||
import org.apache.hudi.sync.common.HoodieSyncConfig
|
||||
import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions, HoodieSparkSqlWriter, SparkAdapterSupport}
|
||||
import org.apache.spark.sql._
|
||||
@@ -467,13 +467,13 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
|
||||
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
|
||||
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
|
||||
HoodieSyncConfig.META_SYNC_ENABLED.key -> enableHive.toString,
|
||||
HiveSyncConfig.HIVE_SYNC_ENABLED.key -> enableHive.toString,
|
||||
HiveSyncConfig.HIVE_SYNC_MODE.key -> hiveSyncConfig.syncMode,
|
||||
HiveSyncConfigHolder.HIVE_SYNC_MODE.key -> hiveSyncConfig.getString(HiveSyncConfigHolder.HIVE_SYNC_MODE),
|
||||
HiveSyncConfigHolder.HIVE_SYNC_ENABLED.key -> enableHive.toString,
|
||||
HoodieSyncConfig.META_SYNC_DATABASE_NAME.key -> targetTableDb,
|
||||
HoodieSyncConfig.META_SYNC_TABLE_NAME.key -> targetTableName,
|
||||
HiveSyncConfig.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.supportTimestamp.toString,
|
||||
HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE.key -> hiveSyncConfig.getBoolean(HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE).toString,
|
||||
HoodieSyncConfig.META_SYNC_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
|
||||
HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.partitionValueExtractorClass,
|
||||
HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key -> hiveSyncConfig.getString(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS),
|
||||
HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "200"), // set the default parallelism to 200 for sql
|
||||
HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "200"),
|
||||
HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> hoodieProps.getString(HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key, "200"),
|
||||
|
||||
Reference in New Issue
Block a user