[HUDI-2208] Support Bulk Insert For Spark Sql (#3328)

2021-08-09 12:18:31 +08:00
parent 11ea74958d
commit 41a9986a76
11 changed files with 407 additions and 50 deletions
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -256,6 +256,22 @@ object DataSourceWriteOptions {
    .withDocumentation("When set to true, will perform write operations directly using the spark native " +
      "`Row` representation, avoiding any additional conversion costs.")
  /**
   * Enable the bulk insert for sql insert statement.
   */
  val SQL_ENABLE_BULK_INSERT:ConfigProperty[String] = ConfigProperty
    .key("hoodie.sql.bulk.insert.enable")
    .defaultValue("false")
    .withDocumentation("When set to true, the sql insert statement will use bulk insert.")
  val SQL_INSERT_MODE: ConfigProperty[String] = ConfigProperty
    .key("hoodie.sql.insert.mode")
    .defaultValue("upsert")
    .withDocumentation("Insert mode when insert data to pk-table. The optional modes are: upsert, strict and non-strict." +
      "For upsert mode, insert statement do the upsert operation for the pk-table which will update the duplicate record." +
      "For strict mode, insert statement will keep the primary key uniqueness constraint which do not allow duplicate record." +
      "While for non-strict mode, hudi just do the insert operation for the pk-table.")
  val COMMIT_METADATA_KEYPREFIX: ConfigProperty[String] = ConfigProperty
    .key("hoodie.datasource.write.commitmeta.key.prefix")
    .defaultValue("_")
--- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/sql/InsertMode.java
+++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/sql/InsertMode.java
@@ -0,0 +1,66 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hudi.sql;
 import java.util.Locale;
 /**
 * Insert mode for insert into pk-table.
 */
 public enum InsertMode {
  /**
   * In upsert mode for insert into, duplicate record on primary key
   * will be updated.This is the default insert mode for pk-table.
   */
  UPSERT("upsert"),
  /**
   * In strict mode for insert into, we do the pk uniqueness guarantee
   * for COW pk-table.
   * For MOR pk-table, it has the same behavior with "upsert" mode.
   */
  STRICT("strict"),
  /**
   * In non-strict mode for insert into, we use insert operation
   * to write data which allow writing the duplicate record.
   */
  NON_STRICT("non-strict")
  ;
  private String value;
  InsertMode(String value) {
    this.value = value;
  }
  public String value() {
    return value;
  }
  public static InsertMode of(String value) {
    switch (value.toLowerCase(Locale.ROOT)) {
      case "upsert":
        return UPSERT;
      case "strict":
        return STRICT;
      case "non-strict":
        return NON_STRICT;
      default:
        throw new AssertionError("UnSupport Insert Mode: " + value);
    }
  }
 }
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -164,7 +164,11 @@ object HoodieSparkSqlWriter {
          // Convert to RDD[HoodieRecord]
          val genericRecords: RDD[GenericRecord] = HoodieSparkUtils.createRdd(df, schema, structName, nameSpace)
-          val shouldCombine = parameters(INSERT_DROP_DUPS.key()).toBoolean || operation.equals(WriteOperationType.UPSERT);
+
          val shouldCombine = parameters(INSERT_DROP_DUPS.key()).toBoolean ||
            operation.equals(WriteOperationType.UPSERT) ||
            parameters.getOrElse(HoodieWriteConfig.COMBINE_BEFORE_INSERT_PROP.key(),
              HoodieWriteConfig.COMBINE_BEFORE_INSERT_PROP.defaultValue()).toBoolean
          val hoodieAllIncomingRecords = genericRecords.map(gr => {
            val hoodieRecord = if (shouldCombine) {
              val orderingVal = HoodieAvroUtils.getNestedFieldVal(gr, hoodieConfig.getString(PRECOMBINE_FIELD), false)
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala
@@ -114,7 +114,7 @@ object HoodieOptionConfig {
  /**
   * Mapping the sql options to the hoodie table config which used to store to the hoodie
-   * .properites when create the table.
+   * .properties when create the table.
   * @param options
   * @return
   */
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
@@ -218,7 +218,7 @@ object HoodieSqlUtils extends SparkAdapterSupport {
   * Append the spark config and table options to the baseConfig.
   */
  def withSparkConf(spark: SparkSession, options: Map[String, String])
-                   (baseConfig: Map[String, String]): Map[String, String] = {
+                   (baseConfig: Map[String, String] = Map.empty): Map[String, String] = {
    baseConfig ++ // Table options has the highest priority
      (spark.sessionState.conf.getAllConfs ++ HoodieOptionConfig.mappingSqlOptionToHoodieParam(options))
        .filterKeys(_.startsWith("hoodie."))
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.hive.util.ConfigUtils
 import org.apache.hudi.sql.InsertMode
 import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
@@ -62,8 +63,8 @@ case class CreateHoodieTableAsSelectCommand(
      }
    }
    val tablePath = getTableLocation(table, sparkSession)
-    val conf = sparkSession.sessionState.newHadoopConf()
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
-    assert(CreateHoodieTableCommand.isEmptyPath(tablePath, conf),
+    assert(CreateHoodieTableCommand.isEmptyPath(tablePath, hadoopConf),
      s"Path '$tablePath' should be empty for CTAS")
    // ReOrder the query which move the partition columns to the last of the project list
@@ -72,17 +73,15 @@ case class CreateHoodieTableAsSelectCommand(
    // Execute the insert query
    try {
-      // Set if sync as a managed table.
+      val options = Map(
-      sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key,
+        DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
-        (table.tableType == CatalogTableType.MANAGED).toString)
+        DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(table.storage.properties.asJava),
-      // Sync the options to hive serde properties
+        DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(table.properties.asJava),
-      sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key,
+        DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
-        ConfigUtils.configToString(table.storage.properties.asJava))
+        DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
-      // Sync the table properties to hive
+      )
      sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key,
        ConfigUtils.configToString(table.properties.asJava))
      val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
-        mode == SaveMode.Overwrite, refreshTable = false)
+        mode == SaveMode.Overwrite, refreshTable = false, extraOptions = options)
      if (success) {
        // If write success, create the table in catalog if it has not synced to the
        // catalog by the meta sync.
@@ -92,11 +91,11 @@ case class CreateHoodieTableAsSelectCommand(
          createTableCommand.createTableInCatalog(sparkSession, checkPathForManagedTable = false)
        }
      } else { // failed to insert data, clear table path
-        clearTablePath(tablePath, conf)
+        clearTablePath(tablePath, hadoopConf)
      }
    } catch {
      case e: Throwable => // failed to insert data, clear table path
-        clearTablePath(tablePath, conf)
+        clearTablePath(tablePath, hadoopConf)
        throw e
    }
    Seq.empty[Row]
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
@@ -28,10 +28,12 @@ import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME
 import org.apache.hudi.hive.MultiPartKeysValueExtractor
 import org.apache.hudi.hive.ddl.HiveSyncMode
-import org.apache.hudi.{HoodieSparkSqlWriter, HoodieWriterUtils}
+import org.apache.hudi.sql.InsertMode
 import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkSqlWriter, HoodieWriterUtils}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SaveMode, SparkSession}
+import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -58,7 +60,7 @@ case class InsertIntoHoodieTableCommand(
  }
 }
-object InsertIntoHoodieTableCommand {
+object InsertIntoHoodieTableCommand extends Logging {
  /**
   * Run the insert query. We support both dynamic partition insert and static partition insert.
   * @param sparkSession The spark session.
@@ -71,12 +73,14 @@ object InsertIntoHoodieTableCommand {
   *                         , it is None in the map.
   * @param overwrite Whether to overwrite the table.
   * @param refreshTable Whether to refresh the table after insert finished.
   * @param extraOptions Extra options for insert.
   */
  def run(sparkSession: SparkSession, table: CatalogTable, query: LogicalPlan,
          insertPartitions: Map[String, Option[String]],
-          overwrite: Boolean, refreshTable: Boolean = true): Boolean = {
+          overwrite: Boolean, refreshTable: Boolean = true,
          extraOptions: Map[String, String] = Map.empty): Boolean = {
-    val config = buildHoodieInsertConfig(table, sparkSession, overwrite, insertPartitions)
+    val config = buildHoodieInsertConfig(table, sparkSession, overwrite, insertPartitions, extraOptions)
    val mode = if (overwrite && table.partitionColumnNames.isEmpty) {
      // insert overwrite non-partition table
@@ -165,7 +169,7 @@ object InsertIntoHoodieTableCommand {
        Alias(castAttr, f.name)()
      })
    }
-    // Remove the hoodie meta fileds from the projects as we do not need these to write
+    // Remove the hoodie meta fields from the projects as we do not need these to write
    val withoutMetaFieldDataProjects = dataProjects.filter(c => !HoodieSqlUtils.isMetaField(c.name))
    val alignedProjects = withoutMetaFieldDataProjects ++ partitionProjects
    Project(alignedProjects, query)
@@ -179,7 +183,8 @@ object InsertIntoHoodieTableCommand {
      table: CatalogTable,
      sparkSession: SparkSession,
      isOverwrite: Boolean,
-      insertPartitions: Map[String, Option[String]] = Map.empty): Map[String, String] = {
+      insertPartitions: Map[String, Option[String]] = Map.empty,
      extraOptions: Map[String, String]): Map[String, String] = {
    if (insertPartitions.nonEmpty &&
      (insertPartitions.keys.toSet != table.partitionColumnNames.toSet)) {
@@ -187,7 +192,7 @@ object InsertIntoHoodieTableCommand {
        s"[${insertPartitions.keys.mkString(" " )}]" +
        s" not equal to the defined partition in table[${table.partitionColumnNames.mkString(",")}]")
    }
-    val parameters = HoodieOptionConfig.mappingSqlOptionToHoodieParam(table.storage.properties)
+    val parameters = withSparkConf(sparkSession, table.storage.properties)() ++ extraOptions
    val tableType = parameters.getOrElse(TABLE_TYPE.key, TABLE_TYPE.defaultValue)
@@ -209,28 +214,49 @@ object InsertIntoHoodieTableCommand {
      .getOrElse(INSERT_DROP_DUPS.defaultValue)
      .toBoolean
-    val operation = if (isOverwrite) {
+    val enableBulkInsert = parameters.getOrElse(DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key,
-      if (table.partitionColumnNames.nonEmpty) {
+      DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.defaultValue()).toBoolean
-        INSERT_OVERWRITE_OPERATION_OPT_VAL  // overwrite partition
+    val isPartitionedTable = table.partitionColumnNames.nonEmpty
-      } else {
+    val isPrimaryKeyTable = primaryColumns.nonEmpty
-        INSERT_OPERATION_OPT_VAL
+    val insertMode = InsertMode.of(parameters.getOrElse(DataSourceWriteOptions.SQL_INSERT_MODE.key,
-      }
+      DataSourceWriteOptions.SQL_INSERT_MODE.defaultValue()))
-    } else {
+    val isNonStrictMode = insertMode == InsertMode.NON_STRICT
      if (primaryColumns.nonEmpty && !dropDuplicate) {
        UPSERT_OPERATION_OPT_VAL
      } else {
        INSERT_OPERATION_OPT_VAL
      }
    }
-    val payloadClassName = if (primaryColumns.nonEmpty && !dropDuplicate &&
+    val operation =
-      tableType == COW_TABLE_TYPE_OPT_VAL) {
+      (isPrimaryKeyTable, enableBulkInsert, isOverwrite, dropDuplicate) match {
        case (true, true, _, _) if !isNonStrictMode =>
          throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
        case (_, true, true, _) if isPartitionedTable =>
          throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
        case (_, true, _, true) =>
          throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
            s" Please disable $INSERT_DROP_DUPS and try again.")
        // if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
        case (_, true, true, _) if !isPartitionedTable => BULK_INSERT_OPERATION_OPT_VAL
        // insert overwrite partition
        case (_, _, true, _) if isPartitionedTable => INSERT_OVERWRITE_OPERATION_OPT_VAL
        // insert overwrite table
        case (_, _, true, _) if !isPartitionedTable => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
        // if it is pk table and the dropDuplicate has disable, use the upsert operation for strict and upsert mode.
        case (true, false, false, false) if !isNonStrictMode => UPSERT_OPERATION_OPT_VAL
        // if enableBulkInsert is true and the table is non-primaryKeyed, use the bulk insert operation
        case (false, true, _, _) => BULK_INSERT_OPERATION_OPT_VAL
        // if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
        case (true, true, _, _) if isNonStrictMode => BULK_INSERT_OPERATION_OPT_VAL
        // for the rest case, use the insert operation
        case (_, _, _, _) => INSERT_OPERATION_OPT_VAL
      }
    val payloadClassName = if (operation ==  UPSERT_OPERATION_OPT_VAL &&
      tableType == COW_TABLE_TYPE_OPT_VAL && insertMode == InsertMode.STRICT) {
      // Only validate duplicate key for COW, for MOR it will do the merge with the DefaultHoodieRecordPayload
      // on reading.
      classOf[ValidateDuplicateKeyPayload].getCanonicalName
    } else {
      classOf[DefaultHoodieRecordPayload].getCanonicalName
    }
    logInfo(s"insert statement use write operation type: $operation, payloadClass: $payloadClassName")
    val enableHive = isEnableHive(sparkSession)
    withSparkConf(sparkSession, options) {
      Map(
@@ -243,6 +269,8 @@ object InsertIntoHoodieTableCommand {
        RECORDKEY_FIELD.key -> primaryColumns.mkString(","),
        PARTITIONPATH_FIELD.key -> partitionFields,
        PAYLOAD_CLASS.key -> payloadClassName,
        ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
        HoodieWriteConfig.COMBINE_BEFORE_INSERT_PROP.key -> isPrimaryKeyTable.toString,
        META_SYNC_ENABLED.key -> enableHive.toString,
        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
        HIVE_USE_JDBC.key -> "false",
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
@@ -22,9 +22,10 @@ import org.apache.avro.generic.GenericRecord
 import org.apache.hudi.common.config.TypedProperties
 import org.apache.hudi.common.util.PartitionPathEncodeUtils
 import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.keygen.{ComplexKeyGenerator, KeyGenUtils}
+import org.apache.hudi.keygen.{BaseKeyGenerator, ComplexKeyGenerator, KeyGenUtils, SparkKeyGeneratorInterface}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.types.{StructType, TimestampType}
-import org.joda.time.format.DateTimeFormat
+import org.joda.time.format.{DateTimeFormat, DateTimeFormatter}
 /**
 * A complex key generator for sql command which do some process for the
@@ -62,8 +63,15 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props)
    }
  }
-  override def getPartitionPath(record: GenericRecord) = {
+  override def getRecordKey(row: Row): String = {
-    val partitionPath = super.getPartitionPath(record)
+    if (originKeyGen.isDefined && originKeyGen.get.isInstanceOf[SparkKeyGeneratorInterface]) {
      originKeyGen.get.asInstanceOf[SparkKeyGeneratorInterface].getRecordKey(row)
    } else {
      super.getRecordKey(row)
    }
  }
  private def convertPartitionPathToSqlType(partitionPath: String, rowType: Boolean): String = {
    if (partitionSchema.isDefined) {
      // we can split the partitionPath here because we enable the URL_ENCODE_PARTITIONING_OPT
      // by default for sql.
@@ -82,9 +90,13 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props)
            partitionField.dataType match {
              case TimestampType =>
-                val timeMs = MILLISECONDS.convert(_partitionValue.toLong, MICROSECONDS)
+                val timeMs = if (rowType) { // In RowType, the partitionPathValue is the time format string, convert to millis
                  SqlKeyGenerator.sqlTimestampFormat.parseMillis(_partitionValue)
                } else {
                  MILLISECONDS.convert(_partitionValue.toLong, MICROSECONDS)
                }
                val timestampFormat = PartitionPathEncodeUtils.escapePathName(
-                  SqlKeyGenerator.timestampTimeFormat.print(timeMs))
+                    SqlKeyGenerator.timestampTimeFormat.print(timeMs))
                if (isHiveStyle) s"$hiveStylePrefix$timestampFormat" else timestampFormat
              case _ => partitionValue
            }
@@ -92,10 +104,21 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props)
      }
    } else partitionPath
  }
  override def getPartitionPath(record: GenericRecord) = {
    val partitionPath = super.getPartitionPath(record)
    convertPartitionPathToSqlType(partitionPath, false)
  }
  override def getPartitionPath(row: Row): String = {
    val partitionPath = super.getPartitionPath(row)
    convertPartitionPathToSqlType(partitionPath, true)
  }
 }
 object SqlKeyGenerator {
  val PARTITION_SCHEMA = "hoodie.sql.partition.schema"
  val ORIGIN_KEYGEN_CLASS = "hoodie.sql.origin.keygen.class"
  private val timestampTimeFormat = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss")
  private val sqlTimestampFormat = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.S")
 }
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UuidKeyGenerator.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UuidKeyGenerator.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.hudi.command
 import java.util.UUID
 import org.apache.avro.generic.GenericRecord
 import org.apache.hudi.common.config.TypedProperties
 import org.apache.spark.sql.Row
 /**
 * A KeyGenerator which use the uuid as the record key.
@@ -28,4 +28,6 @@ import org.apache.hudi.common.config.TypedProperties
 class UuidKeyGenerator(props: TypedProperties) extends SqlKeyGenerator(props) {
  override def getRecordKey(record: GenericRecord): String = UUID.randomUUID.toString
  override def getRecordKey(row: Row): String = UUID.randomUUID.toString
 }
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
@@ -79,12 +79,15 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
  }
  protected def checkException(sql: String)(errorMsg: String): Unit = {
    var hasException = false
    try {
      spark.sql(sql)
    } catch {
      case e: Throwable =>
        assertResult(errorMsg)(e.getMessage)
        hasException = true
    }
    assertResult(true)(hasException)
  }
  protected def removeQuotes(value: Any): Any = {
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -64,6 +64,7 @@ class TestInsertTable extends TestHoodieSqlBase {
  test("Test Insert Into None Partitioned Table") {
    withTempDir { tmp =>
      val tableName = generateTableName
      spark.sql(s"set hoodie.sql.insert.mode=strict")
      // Create none partitioned cow table
      spark.sql(
        s"""
@@ -80,7 +81,6 @@ class TestInsertTable extends TestHoodieSqlBase {
           |  preCombineField = 'ts'
           | )
       """.stripMargin)
      spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
      checkAnswer(s"select id, name, price, ts from $tableName")(
        Seq(1, "a1", 10.0, 1000)
@@ -127,6 +127,9 @@ class TestInsertTable extends TestHoodieSqlBase {
      checkAnswer(s"select id, name, price, ts from $tableName2")(
        Seq(1, "a1", 10.0, 1000)
      )
      // disable this config to avoid affect other test in this class.
      spark.sql("set hoodie.datasource.write.insert.drop.duplicates = false")
      spark.sql(s"set hoodie.sql.insert.mode=upsert")
    }
  }
@@ -146,7 +149,6 @@ class TestInsertTable extends TestHoodieSqlBase {
           | partitioned by (dt)
           | location '${tmp.getCanonicalPath}/$tableName'
       """.stripMargin)
      //  Insert overwrite dynamic partition
      spark.sql(
        s"""
@@ -246,7 +248,6 @@ class TestInsertTable extends TestHoodieSqlBase {
             | partitioned by (dt)
             | location '${tmp.getCanonicalPath}/$tableName'
       """.stripMargin)
        spark.sql(s"insert into $tableName partition(dt = $partitionValue) select 1, 'a1', 10")
        spark.sql(s"insert into $tableName select 2, 'a2', 10, $partitionValue")
        checkAnswer(s"select id, name, price, cast(dt as string) from $tableName order by id")(
@@ -303,5 +304,220 @@ class TestInsertTable extends TestHoodieSqlBase {
      "assertion failed: Required select columns count: 4, Current select columns(including static partition column)" +
        " count: 3，columns: (1,a1,10)"
    )
    spark.sql("set hoodie.sql.bulk.insert.enable = true")
    spark.sql("set hoodie.sql.insert.mode= strict")
    val tableName2 = generateTableName
    spark.sql(
      s"""
         |create table $tableName2 (
         |  id int,
         |  name string,
         |  price double,
         |  ts long
         |) using hudi
         | options (
         |   primaryKey = 'id',
         |   preCombineField = 'ts'
         | )
       """.stripMargin)
    checkException(s"insert into $tableName2 values(1, 'a1', 10, 1000)")(
      "Table with primaryKey can not use bulk insert in strict mode."
    )
    val tableName3 = generateTableName
    spark.sql(
      s"""
         |create table $tableName3 (
         |  id int,
         |  name string,
         |  price double,
         |  dt string
         |) using hudi
         | partitioned by (dt)
       """.stripMargin)
    checkException(s"insert overwrite table $tableName3 values(1, 'a1', 10, '2021-07-18')")(
      "Insert Overwrite Partition can not use bulk insert."
    )
    spark.sql("set hoodie.sql.bulk.insert.enable = false")
    spark.sql("set hoodie.sql.insert.mode= upsert")
  }
  test("Test bulk insert") {
    withTempDir { tmp =>
      Seq("cow", "mor").foreach {tableType =>
        // Test bulk insert for single partition
        val tableName = generateTableName
        spark.sql(
          s"""
             |create table $tableName (
             |  id int,
             |  name string,
             |  price double,
             |  dt string
             |) using hudi
             | options (
             |  type = '$tableType'
             | )
             | partitioned by (dt)
             | location '${tmp.getCanonicalPath}/$tableName'
       """.stripMargin)
        spark.sql("set hoodie.datasource.write.insert.drop.duplicates = false")
        // Enable the bulk insert
        spark.sql("set hoodie.sql.bulk.insert.enable = true")
        spark.sql(s"insert into $tableName values(1, 'a1', 10, '2021-07-18')")
        checkAnswer(s"select id, name, price, dt from $tableName")(
          Seq(1, "a1", 10.0, "2021-07-18")
        )
        // Disable the bulk insert
        spark.sql("set hoodie.sql.bulk.insert.enable = false")
        spark.sql(s"insert into $tableName values(2, 'a2', 10, '2021-07-18')")
        checkAnswer(s"select id, name, price, dt from $tableName order by id")(
          Seq(1, "a1", 10.0, "2021-07-18"),
          Seq(2, "a2", 10.0, "2021-07-18")
        )
        // Test bulk insert for multi-level partition
        val tableMultiPartition = generateTableName
        spark.sql(
          s"""
             |create table $tableMultiPartition (
             |  id int,
             |  name string,
             |  price double,
             |  dt string,
             |  hh string
             |) using hudi
             | options (
             |  type = '$tableType'
             | )
             | partitioned by (dt, hh)
             | location '${tmp.getCanonicalPath}/$tableMultiPartition'
       """.stripMargin)
        // Enable the bulk insert
        spark.sql("set hoodie.sql.bulk.insert.enable = true")
        spark.sql(s"insert into $tableMultiPartition values(1, 'a1', 10, '2021-07-18', '12')")
        checkAnswer(s"select id, name, price, dt, hh from $tableMultiPartition")(
          Seq(1, "a1", 10.0, "2021-07-18", "12")
        )
        // Disable the bulk insert
        spark.sql("set hoodie.sql.bulk.insert.enable = false")
        spark.sql(s"insert into $tableMultiPartition " +
          s"values(2, 'a2', 10, '2021-07-18','12')")
        checkAnswer(s"select id, name, price, dt, hh from $tableMultiPartition order by id")(
          Seq(1, "a1", 10.0, "2021-07-18", "12"),
          Seq(2, "a2", 10.0, "2021-07-18", "12")
        )
        // Test bulk insert for non-partitioned table
        val nonPartitionedTable = generateTableName
        spark.sql(
          s"""
             |create table $nonPartitionedTable (
             |  id int,
             |  name string,
             |  price double
             |) using hudi
             | options (
             |  type = '$tableType'
             | )
             | location '${tmp.getCanonicalPath}/$nonPartitionedTable'
       """.stripMargin)
        spark.sql("set hoodie.sql.bulk.insert.enable = true")
        spark.sql(s"insert into $nonPartitionedTable values(1, 'a1', 10)")
        checkAnswer(s"select id, name, price from $nonPartitionedTable")(
          Seq(1, "a1", 10.0)
        )
        spark.sql(s"insert overwrite table $nonPartitionedTable values(2, 'a2', 10)")
        checkAnswer(s"select id, name, price from $nonPartitionedTable")(
          Seq(2, "a2", 10.0)
        )
        spark.sql("set hoodie.sql.bulk.insert.enable = false")
        // Test CTAS for bulk insert
        val tableName2 = generateTableName
        spark.sql(
          s"""
             |create table $tableName2
             |using hudi
             |options(
             | type = '$tableType',
             | primaryKey = 'id'
             |)
             | location '${tmp.getCanonicalPath}/$tableName2'
             | as
             | select * from $tableName
             |""".stripMargin)
        checkAnswer(s"select id, name, price, dt from $tableName2 order by id")(
          Seq(1, "a1", 10.0, "2021-07-18"),
          Seq(2, "a2", 10.0, "2021-07-18")
        )
      }
    }
  }
  test("Test combine before insert") {
    withTempDir{tmp =>
      val tableName = generateTableName
      spark.sql(
        s"""
           |create table $tableName (
           |  id int,
           |  name string,
           |  price double,
           |  ts long
           |) using hudi
           | location '${tmp.getCanonicalPath}/$tableName'
           | options (
           |  primaryKey = 'id',
           |  preCombineField = 'ts'
           | )
       """.stripMargin)
      spark.sql(
        s"""
           |insert overwrite table $tableName
           |select * from (
           | select 1 as id, 'a1' as name, 10 as price, 1000 as ts
           | union all
           | select 1 as id, 'a1' as name, 11 as price, 1001 as ts
           | )
           |""".stripMargin
      )
      checkAnswer(s"select id, name, price, ts from $tableName")(
        Seq(1, "a1", 11.0, 1001)
      )
    }
  }
  test("Test insert pk-table") {
    withTempDir{tmp =>
      val tableName = generateTableName
      spark.sql(
        s"""
           |create table $tableName (
           |  id int,
           |  name string,
           |  price double,
           |  ts long
           |) using hudi
           | location '${tmp.getCanonicalPath}/$tableName'
           | options (
           |  primaryKey = 'id',
           |  preCombineField = 'ts'
           | )
       """.stripMargin)
      spark.sql(s"insert into $tableName values(1, 'a1', 10, 1000)")
      spark.sql(s"insert into $tableName values(1, 'a1', 11, 1000)")
      checkAnswer(s"select id, name, price, ts from $tableName")(
        Seq(1, "a1", 11.0, 1000)
      )
    }
  }
 }