1
0

[HUDI-4001] Filter the properties should not be used when create table for Spark SQL (#5495)

This commit is contained in:
董可伦
2022-05-16 09:50:29 +08:00
committed by GitHub
parent 6e16e719cd
commit 75f847691f
5 changed files with 127 additions and 11 deletions

View File

@@ -23,7 +23,8 @@ import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.hive.HiveSyncConfig
import org.apache.hudi.hive.util.ConfigUtils
import org.apache.hudi.sql.InsertMode
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, HoodieCatalogTable}
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HoodieCatalogTable}
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.needFilterProps
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
@@ -66,9 +67,21 @@ case class CreateHoodieTableAsSelectCommand(
// ReOrder the query which move the partition columns to the last of the project list
val reOrderedQuery = reOrderPartitionColumn(query, table.partitionColumnNames)
val tableWithSchema = table.copy(schema = reOrderedQuery.schema)
// Remove some properties should not be used
val newStorage = new CatalogStorageFormat(
table.storage.locationUri,
table.storage.inputFormat,
table.storage.outputFormat,
table.storage.serde,
table.storage.compressed,
table.storage.properties.--(needFilterProps))
val newTable = table.copy(
storage = newStorage,
schema = reOrderedQuery.schema,
properties = table.properties.--(needFilterProps)
)
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableWithSchema)
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, newTable)
val tablePath = hoodieCatalogTable.tableLocation
val hadoopConf = sparkSession.sessionState.newHadoopConf()
assert(HoodieSqlCommonUtils.isEmptyPath(tablePath, hadoopConf),
@@ -83,11 +96,11 @@ case class CreateHoodieTableAsSelectCommand(
val options = Map(
HiveSyncConfig.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
HiveSyncConfig.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
HiveSyncConfig.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(table.properties.asJava),
HiveSyncConfig.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(newTable.properties.asJava),
DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
)
val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
val success = InsertIntoHoodieTableCommand.run(sparkSession, newTable, reOrderedQuery, Map.empty,
mode == SaveMode.Overwrite, refreshTable = false, extraOptions = options)
if (success) {
// If write success, create the table in catalog if it has not synced to the