1
0

[HUDI-3198] Improve Spark SQL create table from existing hudi table (#4584)

To modify SQL statement for creating hudi table based on an existing hudi path.

From:

```sql
create table hudi_tbl using hudi tblproperties (primaryKey='id', preCombineField='ts', type='cow') partitioned by (pt) location '/path/to/hudi'
```

To:
```sql
create table hudi_tbl using hudi location '/path/to/hudi'
```
This commit is contained in:
Yann Byron
2022-01-15 02:15:29 +08:00
committed by GitHub
parent 53f75f84b8
commit 5e0171a5ee
5 changed files with 50 additions and 86 deletions

View File

@@ -164,12 +164,20 @@ class HoodieCatalogTable(val spark: SparkSession, val table: CatalogTable) exten
val properties = new Properties()
properties.putAll(tableConfigs.asJava)
HoodieTableMetaClient.withPropertyBuilder()
.fromProperties(properties)
.setTableName(table.identifier.table)
.setTableCreateSchema(SchemaConverters.toAvroType(finalSchema).toString())
.setPartitionFields(table.partitionColumnNames.mkString(","))
.initTable(hadoopConf, tableLocation)
if (hoodieTableExists) {
// just persist hoodie.table.create.schema
HoodieTableMetaClient.withPropertyBuilder()
.fromProperties(properties)
.setTableCreateSchema(SchemaConverters.toAvroType(finalSchema).toString())
.initTable(hadoopConf, tableLocation)
} else {
HoodieTableMetaClient.withPropertyBuilder()
.fromProperties(properties)
.setTableName(table.identifier.table)
.setTableCreateSchema(SchemaConverters.toAvroType(finalSchema).toString())
.setPartitionFields(table.partitionColumnNames.mkString(","))
.initTable(hadoopConf, tableLocation)
}
}
/**

View File

@@ -61,6 +61,7 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, table)
// check if there are conflict between table configs defined in hoodie table and properties defined in catalog.
CreateHoodieTableCommand.validateTblProperties(hoodieCatalogTable)
// init hoodie table
hoodieCatalogTable.initHoodieTable()
@@ -129,12 +130,14 @@ object CreateHoodieTableCommand {
val newTableIdentifier = table.identifier
.copy(table = tablName, database = Some(newDatabaseName))
val partitionColumnNames = hoodieCatalogTable.partitionSchema.map(_.name)
// append pk, preCombineKey, type to the properties of table
val newTblProperties = hoodieCatalogTable.catalogProperties ++ HoodieOptionConfig.extractSqlOptions(properties)
val newTable = table.copy(
identifier = newTableIdentifier,
schema = hoodieCatalogTable.tableSchema,
storage = newStorage,
schema = hoodieCatalogTable.tableSchema,
partitionColumnNames = partitionColumnNames,
createVersion = SPARK_VERSION,
properties = newTblProperties
)