From 49829f8822bc6bc9389a794e9c08b06c1b1909cf Mon Sep 17 00:00:00 2001 From: pengzhiwei Date: Fri, 20 Aug 2021 14:21:10 +0800 Subject: [PATCH] [HUDI-2339] Create Table If Not Exists Failed After Alter Table (#3510) --- .../spark/sql/hudi/HoodieSqlUtils.scala | 2 +- .../sql/hudi/analysis/HoodieAnalysis.scala | 6 +++-- .../spark/sql/hudi/TestCreateTable.scala | 25 +++++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala index b0a8b525d..c1130d251 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala @@ -77,7 +77,7 @@ object HoodieSqlUtils extends SparkAdapterSupport { case _: Throwable => None } avroSchema.map(SchemaConverters.toSqlType(_).dataType - .asInstanceOf[StructType]) + .asInstanceOf[StructType]).map(removeMetaFields) } private def tripAlias(plan: LogicalPlan): LogicalPlan = { diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala index 74a74882a..a588eb604 100644 --- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala +++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala @@ -305,12 +305,14 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi case c @ CreateTable(tableDesc, _, _) if isHoodieTable(tableDesc) => val tablePath = getTableLocation(c.tableDesc, sparkSession) - if (tableExistsInPath(tablePath, sparkSession.sessionState.newHadoopConf())) { + val tableExistInCatalog = sparkSession.sessionState.catalog.tableExists(tableDesc.identifier) + // Only when the table has not exist in catalog, we need to fill the schema info for creating table. + if (!tableExistInCatalog && tableExistsInPath(tablePath, sparkSession.sessionState.newHadoopConf())) { val metaClient = HoodieTableMetaClient.builder() .setBasePath(tablePath) .setConf(sparkSession.sessionState.newHadoopConf()) .build() - val tableSchema = HoodieSqlUtils.getTableSqlSchema(metaClient).map(HoodieSqlUtils.addMetaFields) + val tableSchema = HoodieSqlUtils.getTableSqlSchema(metaClient) if (tableSchema.isDefined && tableDesc.schema.isEmpty) { // Fill the schema with the schema from the table c.copy(tableDesc.copy(schema = tableSchema.get)) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala index f34d5f34c..2af8fd782 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCreateTable.scala @@ -505,4 +505,29 @@ class TestCreateTable extends TestHoodieSqlBase { } } + test("Test Create Table Exists In Catalog") { + val tableName = generateTableName + spark.sql( + s""" + |create table $tableName ( + | id int, + | name string, + | price double + |) using hudi + |""".stripMargin + ) + + spark.sql(s"alter table $tableName add columns(ts bigint)") + + // Check "create table if not exist" works after schema evolution. + spark.sql( + s""" + |create table if not exists $tableName ( + | id int, + | name string, + | price double + |) using hudi + |""".stripMargin + ) + } }