[HUDI-4001] Filter the properties should not be used when create table for Spark SQL (#5495)
This commit is contained in:
@@ -23,7 +23,8 @@ import org.apache.hudi.DataSourceWriteOptions
|
||||
import org.apache.hudi.hive.HiveSyncConfig
|
||||
import org.apache.hudi.hive.util.ConfigUtils
|
||||
import org.apache.hudi.sql.InsertMode
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, HoodieCatalogTable}
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HoodieCatalogTable}
|
||||
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.needFilterProps
|
||||
import org.apache.spark.sql.catalyst.plans.QueryPlan
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
||||
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
|
||||
@@ -66,9 +67,21 @@ case class CreateHoodieTableAsSelectCommand(
|
||||
|
||||
// ReOrder the query which move the partition columns to the last of the project list
|
||||
val reOrderedQuery = reOrderPartitionColumn(query, table.partitionColumnNames)
|
||||
val tableWithSchema = table.copy(schema = reOrderedQuery.schema)
|
||||
// Remove some properties should not be used
|
||||
val newStorage = new CatalogStorageFormat(
|
||||
table.storage.locationUri,
|
||||
table.storage.inputFormat,
|
||||
table.storage.outputFormat,
|
||||
table.storage.serde,
|
||||
table.storage.compressed,
|
||||
table.storage.properties.--(needFilterProps))
|
||||
val newTable = table.copy(
|
||||
storage = newStorage,
|
||||
schema = reOrderedQuery.schema,
|
||||
properties = table.properties.--(needFilterProps)
|
||||
)
|
||||
|
||||
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableWithSchema)
|
||||
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, newTable)
|
||||
val tablePath = hoodieCatalogTable.tableLocation
|
||||
val hadoopConf = sparkSession.sessionState.newHadoopConf()
|
||||
assert(HoodieSqlCommonUtils.isEmptyPath(tablePath, hadoopConf),
|
||||
@@ -83,11 +96,11 @@ case class CreateHoodieTableAsSelectCommand(
|
||||
val options = Map(
|
||||
HiveSyncConfig.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
|
||||
HiveSyncConfig.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
|
||||
HiveSyncConfig.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(table.properties.asJava),
|
||||
HiveSyncConfig.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(newTable.properties.asJava),
|
||||
DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
|
||||
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
|
||||
)
|
||||
val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
|
||||
val success = InsertIntoHoodieTableCommand.run(sparkSession, newTable, reOrderedQuery, Map.empty,
|
||||
mode == SaveMode.Overwrite, refreshTable = false, extraOptions = options)
|
||||
if (success) {
|
||||
// If write success, create the table in catalog if it has not synced to the
|
||||
|
||||
Reference in New Issue
Block a user