HUDI-123 Rename code packages/constants to org.apache.hudi (#830)

- Rename com.uber.hoodie to org.apache.hudi - Flag to pass com.uber.hoodie Input formats for hoodie-sync - Works with HUDI demo. - Also tested for backwards compatibility with datasets built by com.uber.hoodie packages - Migration guide : https://cwiki.apache.org/confluence/display/HUDI/Migration+Guide+From+com.uber.hoodie+to+org.apache.hudi
2019-08-11 17:48:17 -07:00
parent 722b6be04a
commit a4f9d7575f
546 changed files with 3858 additions and 3562 deletions
--- a/hudi-spark/src/test/scala/DataSourceDefaultsTest.scala
+++ b/hudi-spark/src/test/scala/DataSourceDefaultsTest.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.avro.generic.GenericRecord
+import org.apache.hudi.common.util.{Option, SchemaTestUtil, TypedProperties}
+import org.apache.hudi.exception.HoodieException
+import org.apache.hudi.{ComplexKeyGenerator, DataSourceWriteOptions, EmptyHoodieRecordPayload, OverwriteWithLatestAvroPayload, SimpleKeyGenerator}
+import org.junit.Assert._
+import org.junit.{Before, Test}
+import org.scalatest.junit.AssertionsForJUnit
+
+/**
+  * Tests on the default key generator, payload classes.
+  */
+class DataSourceDefaultsTest extends AssertionsForJUnit {
+
+  val schema = SchemaTestUtil.getComplexEvolvedSchema
+  var baseRecord: GenericRecord = null
+
+  @Before def initialize(): Unit = {
+    baseRecord = SchemaTestUtil
+      .generateAvroRecordFromJson(schema, 1, "001", "f1")
+  }
+
+
+  private def getKeyConfig(recordKeyFieldName: String, partitionPathField: String): TypedProperties = {
+    val props = new TypedProperties()
+    props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, recordKeyFieldName)
+    props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, partitionPathField)
+    props
+  }
+
+  @Test def testSimpleKeyGenerator() = {
+    // top level, valid fields
+    val hk1 = new SimpleKeyGenerator(getKeyConfig("field1", "name")).getKey(baseRecord)
+    assertEquals("field1", hk1.getRecordKey)
+    assertEquals("name1", hk1.getPartitionPath)
+
+    // partition path field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      new SimpleKeyGenerator(props).getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException => {
+        // do nothing
+      }
+    };
+
+    // recordkey field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionField")
+      new SimpleKeyGenerator(props).getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException => {
+        // do nothing
+      }
+    };
+
+    // nested field as record key and partition path
+    val hk2 = new SimpleKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.isAdmin"))
+      .getKey(baseRecord)
+    assertEquals("UserId1@001", hk2.getRecordKey)
+    assertEquals("false", hk2.getPartitionPath)
+
+    // Nested record key not found
+    try {
+      new SimpleKeyGenerator(getKeyConfig("testNestedRecord.NotThere", "testNestedRecord.isAdmin"))
+        .getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieException => {
+        // do nothing
+      }
+    };
+
+    // if partition path can't be found, return default partition path
+    val hk3 = new SimpleKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.notThere"))
+      .getKey(baseRecord);
+    assertEquals("default", hk3.getPartitionPath)
+  }
+
+  @Test def testComplexKeyGenerator() = {
+    // top level, valid fields
+    val hk1 = new ComplexKeyGenerator(getKeyConfig("field1,name", "field1,name")).getKey(baseRecord)
+    assertEquals("field1:field1,name:name1", hk1.getRecordKey)
+    assertEquals("field1/name1", hk1.getPartitionPath)
+
+    // partition path field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "field1")
+      new ComplexKeyGenerator(props).getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException => {
+        // do nothing
+      }
+    };
+
+    // recordkey field not specified
+    try {
+      val props = new TypedProperties()
+      props.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "partitionField")
+      new ComplexKeyGenerator(props).getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: IllegalArgumentException => {
+        // do nothing
+      }
+    };
+
+    // nested field as record key and partition path
+    val hk2 = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId,testNestedRecord.isAdmin", "testNestedRecord.userId,testNestedRecord.isAdmin"))
+      .getKey(baseRecord)
+    assertEquals("testNestedRecord.userId:UserId1@001,testNestedRecord.isAdmin:false", hk2.getRecordKey)
+    assertEquals("UserId1@001/false", hk2.getPartitionPath)
+
+    // Nested record key not found
+    try {
+      new ComplexKeyGenerator(getKeyConfig("testNestedRecord.NotThere", "testNestedRecord.isAdmin"))
+        .getKey(baseRecord)
+      fail("Should have errored out")
+    } catch {
+      case e: HoodieException => {
+        // do nothing
+      }
+    };
+
+    // if partition path can't be found, return default partition path
+    val hk3 = new ComplexKeyGenerator(getKeyConfig("testNestedRecord.userId", "testNestedRecord.notThere"))
+      .getKey(baseRecord);
+    assertEquals("default", hk3.getPartitionPath)
+  }
+
+  @Test def testOverwriteWithLatestAvroPayload() = {
+    val overWritePayload1 = new OverwriteWithLatestAvroPayload(baseRecord, 1)
+    val laterRecord = SchemaTestUtil
+      .generateAvroRecordFromJson(schema, 2, "001", "f1")
+    val overWritePayload2 = new OverwriteWithLatestAvroPayload(laterRecord, 2)
+
+    // it will provide the record with greatest combine value
+    val combinedPayload12 = overWritePayload1.preCombine(overWritePayload2)
+    val combinedGR12 = combinedPayload12.getInsertValue(schema).get().asInstanceOf[GenericRecord]
+    assertEquals("field2", combinedGR12.get("field1").toString)
+
+    // and it will be deterministic, to order of processing.
+    val combinedPayload21 = overWritePayload2.preCombine(overWritePayload1)
+    val combinedGR21 = combinedPayload21.getInsertValue(schema).get().asInstanceOf[GenericRecord]
+    assertEquals("field2", combinedGR21.get("field1").toString)
+  }
+
+  @Test def testEmptyHoodieRecordPayload() = {
+    val emptyPayload1 = new EmptyHoodieRecordPayload(baseRecord, 1)
+    val laterRecord = SchemaTestUtil
+      .generateAvroRecordFromJson(schema, 2, "001", "f1")
+    val emptyPayload2 = new EmptyHoodieRecordPayload(laterRecord, 2)
+
+    // it will provide an empty record
+    val combinedPayload12 = emptyPayload1.preCombine(emptyPayload2)
+    val combined12 = combinedPayload12.getInsertValue(schema)
+    assertEquals(Option.empty(), combined12)
+  }
+}
--- a/hudi-spark/src/test/scala/DataSourceTest.scala
+++ b/hudi-spark/src/test/scala/DataSourceTest.scala
@@ -0,0 +1,278 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hudi.common.HoodieTestDataGenerator
+import org.apache.hudi.common.util.FSUtils
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
+import org.apache.spark.sql._
+import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime}
+import org.junit.Assert._
+import org.junit.rules.TemporaryFolder
+import org.junit.{Before, Test}
+import org.scalatest.junit.AssertionsForJUnit
+
+import scala.collection.JavaConversions._
+import scala.concurrent.ExecutionContext.Implicits.global
+import scala.concurrent.duration.Duration
+import scala.concurrent.{Await, Future}
+
+/**
+  * Basic tests on the spark datasource
+  */
+class DataSourceTest extends AssertionsForJUnit {
+
+  var spark: SparkSession = null
+  var dataGen: HoodieTestDataGenerator = null
+  val commonOpts = Map(
+    "hoodie.insert.shuffle.parallelism" -> "4",
+    "hoodie.upsert.shuffle.parallelism" -> "4",
+    DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "_row_key",
+    DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "partition",
+    DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "timestamp",
+    HoodieWriteConfig.TABLE_NAME -> "hoodie_test"
+  )
+  var basePath: String = null
+  var fs: FileSystem = null
+
+  @Before def initialize() {
+    spark = SparkSession.builder
+      .appName("Hoodie Datasource test")
+      .master("local[2]")
+      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .getOrCreate
+    dataGen = new HoodieTestDataGenerator()
+    val folder = new TemporaryFolder
+    folder.create
+    basePath = folder.getRoot.getAbsolutePath
+    fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration)
+  }
+
+  @Test def testCopyOnWriteStorage() {
+    // Insert Operation
+    val records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("000", 100)).toList
+    val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+
+    assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000"))
+    val commitInstantTime1: String = HoodieDataSourceHelpers.latestCommit(fs, basePath)
+
+    // Read RO View
+    val hoodieROViewDF1 = spark.read.format("org.apache.hudi")
+      .load(basePath + "/*/*/*/*");
+    assertEquals(100, hoodieROViewDF1.count())
+
+    val records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("001", 100)).toList
+    val inputDF2: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    val uniqueKeyCnt = inputDF2.select("_row_key").distinct().count()
+
+    // Upsert Operation
+    inputDF2.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .mode(SaveMode.Append)
+      .save(basePath)
+
+    val commitInstantTime2: String = HoodieDataSourceHelpers.latestCommit(fs, basePath)
+    assertEquals(2, HoodieDataSourceHelpers.listCommitsSince(fs, basePath, "000").size())
+
+    // Read RO View
+    val hoodieROViewDF2 = spark.read.format("org.apache.hudi")
+      .load(basePath + "/*/*/*/*");
+    assertEquals(100, hoodieROViewDF2.count()) // still 100, since we only updated
+
+    // Read Incremental View
+    // we have 2 commits, try pulling the first commit (which is not the latest)
+    val firstCommit = HoodieDataSourceHelpers.listCommitsSince(fs, basePath, "000").get(0);
+    val hoodieIncViewDF1 = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, "000")
+      .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY, firstCommit)
+      .load(basePath);
+    assertEquals(100, hoodieIncViewDF1.count()) // 100 initial inserts must be pulled
+    var countsPerCommit = hoodieIncViewDF1.groupBy("_hoodie_commit_time").count().collect();
+    assertEquals(1, countsPerCommit.length)
+    assertEquals(firstCommit, countsPerCommit(0).get(0))
+
+    // pull the latest commit
+    val hoodieIncViewDF2 = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, commitInstantTime1)
+      .load(basePath);
+
+    assertEquals(uniqueKeyCnt, hoodieIncViewDF2.count()) // 100 records must be pulled
+    countsPerCommit = hoodieIncViewDF2.groupBy("_hoodie_commit_time").count().collect();
+    assertEquals(1, countsPerCommit.length)
+    assertEquals(commitInstantTime2, countsPerCommit(0).get(0))
+  }
+
+  @Test def testMergeOnReadStorage() {
+    // Bulk Insert Operation
+    val records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("001", 100)).toList
+    val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same
+      .option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+
+    assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000"))
+
+    // Read RO View
+    val hoodieROViewDF1 = spark.read.format("org.apache.hudi").load(basePath + "/*/*/*/*")
+    assertEquals(100, hoodieROViewDF1.count()) // still 100, since we only updated
+  }
+
+  @Test def testDropInsertDup(): Unit = {
+    val insert1Cnt = 10
+    val insert2DupKeyCnt = 9
+    val insert2NewKeyCnt = 2
+
+    val totalUniqueKeyToGenerate = insert1Cnt + insert2NewKeyCnt
+    val allRecords =  dataGen.generateInserts("001", totalUniqueKeyToGenerate)
+    val inserts1 = allRecords.subList(0, insert1Cnt)
+    val inserts2New = dataGen.generateSameKeyInserts("002", allRecords.subList(insert1Cnt, insert1Cnt + insert2NewKeyCnt))
+    val inserts2Dup = dataGen.generateSameKeyInserts("002", inserts1.subList(0, insert2DupKeyCnt))
+
+    val records1 = DataSourceTestUtils.convertToStringList(inserts1).toList
+    val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+    val hoodieROViewDF1 = spark.read.format("org.apache.hudi")
+      .load(basePath + "/*/*/*/*")
+    assertEquals(insert1Cnt, hoodieROViewDF1.count())
+
+    val commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, basePath)
+    val records2 = DataSourceTestUtils
+      .convertToStringList(inserts2Dup ++ inserts2New)
+      .toList
+    val inputDF2: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    inputDF2.write.format("org.apache.hudi")
+      .options(commonOpts)
+      .option(DataSourceWriteOptions.INSERT_DROP_DUPS_OPT_KEY, "true")
+      .mode(SaveMode.Append)
+      .save(basePath)
+    val hoodieROViewDF2 = spark.read.format("org.apache.hudi")
+      .load(basePath + "/*/*/*/*")
+    assertEquals(hoodieROViewDF2.count(), totalUniqueKeyToGenerate)
+
+    val hoodieIncViewDF2 = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, commitInstantTime1)
+      .load(basePath)
+    assertEquals(hoodieIncViewDF2.count(), insert2NewKeyCnt)
+  }
+
+  //@Test (TODO: re-enable after fixing noisyness)
+  def testStructuredStreaming(): Unit = {
+    fs.delete(new Path(basePath), true)
+    val sourcePath = basePath + "/source"
+    val destPath = basePath + "/dest"
+    fs.mkdirs(new Path(sourcePath))
+
+    // First chunk of data
+    val records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("000", 100)).toList
+    val inputDF1: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+
+    // Second chunk of data
+    val records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("001", 100)).toList
+    val inputDF2: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    val uniqueKeyCnt = inputDF2.select("_row_key").distinct().count()
+
+    // define the source of streaming
+    val streamingInput =
+      spark.readStream
+      .schema(inputDF1.schema)
+      .json(sourcePath)
+
+    val f1 = Future {
+      println("streaming starting")
+    //'writeStream' can be called only on streaming Dataset/DataFrame
+      streamingInput
+        .writeStream
+        .format("org.apache.hudi")
+        .options(commonOpts)
+        .trigger(new ProcessingTime(100))
+        .option("checkpointLocation", basePath + "/checkpoint")
+        .outputMode(OutputMode.Append)
+        .start(destPath)
+        .awaitTermination(10000)
+      println("streaming ends")
+    }
+
+    val f2 = Future {
+      inputDF1.write.mode(SaveMode.Append).json(sourcePath)
+      // wait for spark streaming to process one microbatch
+      Thread.sleep(3000)
+      assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, destPath, "000"))
+      val commitInstantTime1: String = HoodieDataSourceHelpers.latestCommit(fs, destPath)
+      // Read RO View
+      val hoodieROViewDF1 = spark.read.format("org.apache.hudi")
+        .load(destPath + "/*/*/*/*")
+      assert(hoodieROViewDF1.count() == 100)
+
+      inputDF2.write.mode(SaveMode.Append).json(sourcePath)
+      // wait for spark streaming to process one microbatch
+      Thread.sleep(10000)
+      val commitInstantTime2: String = HoodieDataSourceHelpers.latestCommit(fs, destPath)
+
+      assertEquals(2, HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").size())
+      // Read RO View
+      val hoodieROViewDF2 = spark.read.format("org.apache.hudi")
+        .load(destPath + "/*/*/*/*")
+      assertEquals(100, hoodieROViewDF2.count()) // still 100, since we only updated
+
+
+      // Read Incremental View
+      // we have 2 commits, try pulling the first commit (which is not the latest)
+      val firstCommit = HoodieDataSourceHelpers.listCommitsSince(fs, destPath, "000").get(0)
+      val hoodieIncViewDF1 = spark.read.format("org.apache.hudi")
+        .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL)
+        .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, "000")
+        .option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY, firstCommit)
+        .load(destPath)
+      assertEquals(100, hoodieIncViewDF1.count())
+      // 100 initial inserts must be pulled
+      var countsPerCommit = hoodieIncViewDF1.groupBy("_hoodie_commit_time").count().collect()
+      assertEquals(1, countsPerCommit.length)
+      assertEquals(firstCommit, countsPerCommit(0).get(0))
+
+      // pull the latest commit
+      val hoodieIncViewDF2 = spark.read.format("org.apache.hudi")
+        .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL)
+        .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, commitInstantTime1)
+        .load(destPath)
+
+      assertEquals(uniqueKeyCnt, hoodieIncViewDF2.count()) // 100 records must be pulled
+      countsPerCommit = hoodieIncViewDF2.groupBy("_hoodie_commit_time").count().collect()
+      assertEquals(1, countsPerCommit.length)
+      assertEquals(commitInstantTime2, countsPerCommit(0).get(0))
+    }
+
+    Await.result(Future.sequence(Seq(f1, f2)), Duration.Inf)
+
+  }
+}
--- a/hudi-spark/src/test/scala/org/apache/hudi/HoodieSparkSqlWriterSuite.scala
+++ b/hudi-spark/src/test/scala/org/apache/hudi/HoodieSparkSqlWriterSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.exception.HoodieException
+import org.apache.spark.sql.{SaveMode, SparkSession}
+import org.scalatest.{FunSuite, Matchers}
+
+class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
+
+  test("Parameters With Write Defaults") {
+    val originals = HoodieSparkSqlWriter.parametersWithWriteDefaults(Map.empty)
+    val rhsKey = "hoodie.right.hand.side.key"
+    val rhsVal = "hoodie.right.hand.side.val"
+    val modifier = Map(OPERATION_OPT_KEY -> INSERT_OPERATION_OPT_VAL, STORAGE_TYPE_OPT_KEY -> MOR_STORAGE_TYPE_OPT_VAL, rhsKey -> rhsVal)
+    val modified = HoodieSparkSqlWriter.parametersWithWriteDefaults(modifier)
+    val matcher = (k: String, v: String) => modified(k) should be(v)
+
+    originals foreach {
+      case (OPERATION_OPT_KEY, _) => matcher(OPERATION_OPT_KEY, INSERT_OPERATION_OPT_VAL)
+      case (STORAGE_TYPE_OPT_KEY, _) => matcher(STORAGE_TYPE_OPT_KEY, MOR_STORAGE_TYPE_OPT_VAL)
+      case (`rhsKey`, _) => matcher(rhsKey, rhsVal)
+      case (k, v) => matcher(k, v)
+    }
+  }
+
+  test("throw hoodie exception when invalid serializer") {
+    val session = SparkSession.builder().appName("hoodie_test").master("local").getOrCreate()
+    val sqlContext = session.sqlContext
+    val options = Map("path" -> "hoodie/test/path", HoodieWriteConfig.TABLE_NAME -> "hoodie_test_tbl")
+    val e = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.ErrorIfExists, options, session.emptyDataFrame))
+    assert(e.getMessage.contains("spark.serializer"))
+  }
+
+}