[HUDI-2706] refactor spark-sql to make consistent with DataFrame api (#3936)
This commit is contained in:
@@ -206,7 +206,7 @@ public class HoodieJavaApp {
|
||||
.option(DataSourceWriteOptions.OPERATION().key(), "delete")
|
||||
.option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key")
|
||||
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition")
|
||||
.option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "_row_key")
|
||||
.option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp")
|
||||
.option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(),
|
||||
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
||||
: SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
|
||||
|
||||
@@ -18,7 +18,7 @@ set hoodie.delete.shuffle.parallelism = 1;
|
||||
|
||||
# CTAS
|
||||
|
||||
create table h0 using hudi options(type = '${tableType}')
|
||||
create table h0 using hudi options(type = '${tableType}', primaryKey = 'id')
|
||||
as select 1 as id, 'a1' as name, 10 as price;
|
||||
+----------+
|
||||
| ok |
|
||||
@@ -30,7 +30,7 @@ select id, name, price from h0;
|
||||
+-----------+
|
||||
|
||||
create table h0_p using hudi partitioned by(dt)
|
||||
options(type = '${tableType}')
|
||||
options(type = '${tableType}', primaryKey = 'id')
|
||||
as select cast('2021-05-07 00:00:00' as timestamp) as dt,
|
||||
1 as id, 'a1' as name, 10 as price;
|
||||
+----------+
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.hudi.config.HoodieWriteConfig
|
||||
import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
|
||||
|
||||
import org.apache.spark.sql.hudi.command.SqlKeyGenerator
|
||||
|
||||
import org.junit.jupiter.api.Assertions.assertTrue
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class HoodieSparkSqlWriterSuite2 {
|
||||
|
||||
@Test
|
||||
def testGetOriginKeyGenerator(): Unit = {
|
||||
// for dataframe write
|
||||
val m1 = Map(
|
||||
HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key -> classOf[ComplexKeyGenerator].getName
|
||||
)
|
||||
val kg1 = HoodieWriterUtils.getOriginKeyGenerator(m1)
|
||||
assertTrue(kg1 == classOf[ComplexKeyGenerator].getName)
|
||||
|
||||
// for sql write
|
||||
val m2 = Map(
|
||||
HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getName,
|
||||
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> classOf[SimpleKeyGenerator].getName
|
||||
)
|
||||
val kg2 = HoodieWriterUtils.getOriginKeyGenerator(m2)
|
||||
assertTrue(kg2 == classOf[SimpleKeyGenerator].getName)
|
||||
}
|
||||
}
|
||||
@@ -102,7 +102,10 @@ class TestDataSourceForBootstrap {
|
||||
.save(srcPath)
|
||||
|
||||
// Perform bootstrap
|
||||
val commitInstantTime1 = runMetadataBootstrapAndVerifyCommit(DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL)
|
||||
val commitInstantTime1 = runMetadataBootstrapAndVerifyCommit(
|
||||
DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL,
|
||||
extraOpts = Map(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator")
|
||||
)
|
||||
|
||||
// Read bootstrapped table and verify count
|
||||
var hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
|
||||
|
||||
@@ -38,7 +38,7 @@ class TestAlterTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '$tablePath'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -127,7 +127,7 @@ class TestAlterTable extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$partitionedTable'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|
||||
@@ -38,7 +38,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
| )
|
||||
| using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -77,7 +77,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
| options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
@@ -105,7 +105,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
| )
|
||||
| using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -151,7 +151,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
| options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|
||||
@@ -31,7 +31,7 @@ class TestCompactionTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| type = 'mor',
|
||||
| preCombineField = 'ts'
|
||||
@@ -82,7 +82,7 @@ class TestCompactionTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| type = 'mor',
|
||||
| preCombineField = 'ts'
|
||||
|
||||
@@ -43,7 +43,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -62,6 +62,53 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
)(table.schema.fields)
|
||||
}
|
||||
|
||||
test("Test Create Hoodie Table With Options") {
|
||||
val tableName = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
| create table $tableName (
|
||||
| id int,
|
||||
| name string,
|
||||
| price double,
|
||||
| ts long,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| partitioned by (dt)
|
||||
| options (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
""".stripMargin)
|
||||
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
|
||||
assertResult(table.properties("type"))("cow")
|
||||
assertResult(table.properties("primaryKey"))("id")
|
||||
assertResult(table.properties("preCombineField"))("ts")
|
||||
assertResult(tableName)(table.identifier.table)
|
||||
assertResult("hudi")(table.provider.get)
|
||||
assertResult(CatalogTableType.MANAGED)(table.tableType)
|
||||
assertResult(
|
||||
HoodieRecord.HOODIE_META_COLUMNS.asScala.map(StructField(_, StringType))
|
||||
++ Seq(
|
||||
StructField("id", IntegerType),
|
||||
StructField("name", StringType),
|
||||
StructField("price", DoubleType),
|
||||
StructField("ts", LongType),
|
||||
StructField("dt", StringType))
|
||||
)(table.schema.fields)
|
||||
|
||||
val tablePath = table.storage.properties("path")
|
||||
val metaClient = HoodieTableMetaClient.builder()
|
||||
.setBasePath(tablePath)
|
||||
.setConf(spark.sessionState.newHadoopConf())
|
||||
.build()
|
||||
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
|
||||
assertResult(true)(tableConfig.contains(HoodieTableConfig.CREATE_SCHEMA.key))
|
||||
assertResult("dt")(tableConfig(HoodieTableConfig.PARTITION_FIELDS.key))
|
||||
assertResult("id")(tableConfig(HoodieTableConfig.RECORDKEY_FIELDS.key))
|
||||
assertResult("ts")(tableConfig(HoodieTableConfig.PRECOMBINE_FIELD.key))
|
||||
assertResult(classOf[ComplexKeyGenerator].getCanonicalName)(tableConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key))
|
||||
}
|
||||
|
||||
test("Test Create External Hoodie Table") {
|
||||
withTempDir { tmp =>
|
||||
// Test create cow table.
|
||||
@@ -74,7 +121,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id,name',
|
||||
| type = 'cow'
|
||||
| )
|
||||
@@ -93,8 +140,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
StructField("price", DoubleType),
|
||||
StructField("ts", LongType))
|
||||
)(table.schema.fields)
|
||||
assertResult(table.storage.properties("type"))("cow")
|
||||
assertResult(table.storage.properties("primaryKey"))("id,name")
|
||||
assertResult(table.properties("type"))("cow")
|
||||
assertResult(table.properties("primaryKey"))("id,name")
|
||||
|
||||
spark.sql(s"drop table $tableName")
|
||||
// Test create mor partitioned table
|
||||
@@ -108,15 +155,15 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
|) using hudi
|
||||
| partitioned by (dt)
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| type = 'mor'
|
||||
| )
|
||||
| location '${tmp.getCanonicalPath}/h0'
|
||||
""".stripMargin)
|
||||
val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
|
||||
assertResult(table2.storage.properties("type"))("mor")
|
||||
assertResult(table2.storage.properties("primaryKey"))("id")
|
||||
assertResult(table2.properties("type"))("mor")
|
||||
assertResult(table2.properties("primaryKey"))("id")
|
||||
assertResult(Seq("dt"))(table2.partitionColumnNames)
|
||||
assertResult(classOf[HoodieParquetRealtimeInputFormat].getCanonicalName)(table2.storage.inputFormat.get)
|
||||
|
||||
@@ -129,8 +176,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
|location '${tmp.getCanonicalPath}/h0'
|
||||
""".stripMargin)
|
||||
val table3 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName3))
|
||||
assertResult(table3.storage.properties("type"))("mor")
|
||||
assertResult(table3.storage.properties("primaryKey"))("id")
|
||||
assertResult(table3.properties("type"))("mor")
|
||||
assertResult(table3.properties("primaryKey"))("id")
|
||||
assertResult(
|
||||
HoodieRecord.HOODIE_META_COLUMNS.asScala.map(StructField(_, StringType))
|
||||
++ Seq(
|
||||
@@ -156,7 +203,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id1',
|
||||
| type = 'cow'
|
||||
| )
|
||||
@@ -173,7 +220,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts1',
|
||||
| type = 'cow'
|
||||
@@ -191,7 +238,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts',
|
||||
| type = 'cow1'
|
||||
@@ -208,7 +255,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
val tableName1 = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName1 using hudi
|
||||
| create table $tableName1 using hudi
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName1'
|
||||
| AS
|
||||
| select 1 as id, 'a1' as name, 10 as price, 1000 as ts
|
||||
@@ -223,6 +271,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
s"""
|
||||
| create table $tableName2 using hudi
|
||||
| partitioned by (dt)
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName2'
|
||||
| AS
|
||||
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt
|
||||
@@ -240,7 +289,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
s"""
|
||||
| create table $tableName3 using hudi
|
||||
| partitioned by (dt)
|
||||
| options(primaryKey = 'id')
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||
| AS
|
||||
| select null as id, 'a1' as name, 10 as price, '2021-05-07' as dt
|
||||
@@ -252,6 +301,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
s"""
|
||||
| create table $tableName3 using hudi
|
||||
| partitioned by (dt)
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||
| AS
|
||||
| select cast('2021-05-06 00:00:00' as timestamp) as dt, 1 as id, 'a1' as name, 10 as
|
||||
@@ -267,6 +317,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
s"""
|
||||
| create table $tableName4 using hudi
|
||||
| partitioned by (dt)
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName4'
|
||||
| AS
|
||||
| select cast('2021-05-06' as date) as dt, 1 as id, 'a1' as name, 10 as
|
||||
@@ -303,7 +354,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
| options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
@@ -380,7 +431,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
| options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
@@ -455,7 +506,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
| options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
@@ -514,6 +565,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| name string,
|
||||
| price double
|
||||
|) using hudi
|
||||
|tblproperties(primaryKey = 'id')
|
||||
|""".stripMargin
|
||||
)
|
||||
|
||||
@@ -527,6 +579,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
| name string,
|
||||
| price double
|
||||
|) using hudi
|
||||
|tblproperties(primaryKey = 'id')
|
||||
|""".stripMargin
|
||||
)
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ class TestDeleteTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|
||||
@@ -0,0 +1,184 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.hudi
|
||||
|
||||
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, OverwriteWithLatestAvroPayload}
|
||||
import org.apache.hudi.common.table.HoodieTableConfig
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase
|
||||
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.types._
|
||||
|
||||
import org.junit.jupiter.api.Assertions.assertTrue
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
|
||||
import org.scalatest.Matchers.intercept
|
||||
|
||||
class TestHoodieOptionConfig extends HoodieClientTestBase {
|
||||
|
||||
var spark: SparkSession = _
|
||||
|
||||
/**
|
||||
* Setup method running before each test.
|
||||
*/
|
||||
@BeforeEach override def setUp() {
|
||||
initSparkContexts()
|
||||
spark = sqlContext.sparkSession
|
||||
}
|
||||
|
||||
@Test
|
||||
def testWithDefaultSqlOptions(): Unit = {
|
||||
val ops1 = Map("primaryKey" -> "id")
|
||||
val with1 = HoodieOptionConfig.withDefaultSqlOptions(ops1)
|
||||
assertTrue(with1.size == 3)
|
||||
assertTrue(with1("primaryKey") == "id")
|
||||
assertTrue(with1("type") == "cow")
|
||||
assertTrue(with1("payloadClass") == classOf[DefaultHoodieRecordPayload].getName)
|
||||
|
||||
val ops2 = Map("primaryKey" -> "id",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "mor",
|
||||
"payloadClass" -> classOf[OverwriteWithLatestAvroPayload].getName
|
||||
)
|
||||
val with2 = HoodieOptionConfig.withDefaultSqlOptions(ops2)
|
||||
assertTrue(ops2 == with2)
|
||||
}
|
||||
|
||||
@Test
|
||||
def testMappingSqlOptionToTableConfig(): Unit = {
|
||||
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "mor",
|
||||
"hoodie.index.type" -> "INMEMORY",
|
||||
"hoodie.compact.inline" -> "true"
|
||||
)
|
||||
val tableConfigs = HoodieOptionConfig.mappingSqlOptionToTableConfig(sqlOptions)
|
||||
|
||||
assertTrue(tableConfigs.size == 5)
|
||||
assertTrue(tableConfigs(HoodieTableConfig.RECORDKEY_FIELDS.key) == "id,addr")
|
||||
assertTrue(tableConfigs(HoodieTableConfig.PRECOMBINE_FIELD.key) == "timestamp")
|
||||
assertTrue(tableConfigs(HoodieTableConfig.TYPE.key) == "MERGE_ON_READ")
|
||||
assertTrue(tableConfigs("hoodie.index.type") == "INMEMORY")
|
||||
assertTrue(tableConfigs("hoodie.compact.inline") == "true")
|
||||
}
|
||||
|
||||
@Test
|
||||
def testDeleteHooideOptions(): Unit = {
|
||||
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "mor",
|
||||
"hoodie.index.type" -> "INMEMORY",
|
||||
"hoodie.compact.inline" -> "true",
|
||||
"key123" -> "value456"
|
||||
)
|
||||
val tableConfigs = HoodieOptionConfig.deleteHooideOptions(sqlOptions)
|
||||
assertTrue(tableConfigs.size == 1)
|
||||
assertTrue(tableConfigs("key123") == "value456")
|
||||
}
|
||||
|
||||
@Test
|
||||
def testExtractSqlOptions(): Unit = {
|
||||
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "mor",
|
||||
"hoodie.index.type" -> "INMEMORY",
|
||||
"hoodie.compact.inline" -> "true",
|
||||
"key123" -> "value456"
|
||||
)
|
||||
val tableConfigs = HoodieOptionConfig.extractSqlOptions(sqlOptions)
|
||||
assertTrue(tableConfigs.size == 3)
|
||||
assertTrue(tableConfigs.keySet == Set("primaryKey", "preCombineField", "type"))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testValidateTable(): Unit = {
|
||||
val baseSqlOptions = Map(
|
||||
"hoodie.datasource.write.hive_style_partitioning" -> "true",
|
||||
"hoodie.datasource.write.partitionpath.urlencode" -> "false",
|
||||
"hoodie.table.keygenerator.class" -> "org.apache.hudi.keygen.ComplexKeyGenerator"
|
||||
)
|
||||
|
||||
val schema = StructType(
|
||||
Seq(StructField("id", IntegerType, true),
|
||||
StructField("name", StringType, true),
|
||||
StructField("timestamp", TimestampType, true),
|
||||
StructField("dt", StringType, true))
|
||||
)
|
||||
|
||||
// miss primaryKey parameter
|
||||
val sqlOptions1 = baseSqlOptions ++ Map(
|
||||
"type" -> "mor"
|
||||
)
|
||||
|
||||
val e1 = intercept[IllegalArgumentException] {
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions1)
|
||||
}
|
||||
assertTrue(e1.getMessage.contains("No `primaryKey` is specified."))
|
||||
|
||||
// primary field not found
|
||||
val sqlOptions2 = baseSqlOptions ++ Map(
|
||||
"primaryKey" -> "xxx",
|
||||
"type" -> "mor"
|
||||
)
|
||||
val e2 = intercept[IllegalArgumentException] {
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions2)
|
||||
}
|
||||
assertTrue(e2.getMessage.contains("Can't find primary key"))
|
||||
|
||||
// preCombine field not found
|
||||
val sqlOptions3 = baseSqlOptions ++ Map(
|
||||
"primaryKey" -> "id",
|
||||
"preCombineField" -> "ts",
|
||||
"type" -> "mor"
|
||||
)
|
||||
val e3 = intercept[IllegalArgumentException] {
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions3)
|
||||
}
|
||||
assertTrue(e3.getMessage.contains("Can't find precombine key"))
|
||||
|
||||
// miss type parameter
|
||||
val sqlOptions4 = baseSqlOptions ++ Map(
|
||||
"primaryKey" -> "id",
|
||||
"preCombineField" -> "timestamp"
|
||||
)
|
||||
val e4 = intercept[IllegalArgumentException] {
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions4)
|
||||
}
|
||||
assertTrue(e4.getMessage.contains("No `type` is specified."))
|
||||
|
||||
// type is invalid
|
||||
val sqlOptions5 = baseSqlOptions ++ Map(
|
||||
"primaryKey" -> "id",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "abc"
|
||||
)
|
||||
val e5 = intercept[IllegalArgumentException] {
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions5)
|
||||
}
|
||||
assertTrue(e5.getMessage.contains("'type' must be 'cow' or 'mor'"))
|
||||
|
||||
// right options and schema
|
||||
val sqlOptions6 = baseSqlOptions ++ Map(
|
||||
"primaryKey" -> "id",
|
||||
"preCombineField" -> "timestamp",
|
||||
"type" -> "cow"
|
||||
)
|
||||
HoodieOptionConfig.validateTable(spark, schema, sqlOptions6)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -36,6 +36,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long,
|
||||
| dt string
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| partitioned by (dt)
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
""".stripMargin)
|
||||
@@ -75,7 +76,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'cow',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -115,7 +116,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName2'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -146,6 +147,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long,
|
||||
| dt string
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| partitioned by (dt)
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
""".stripMargin)
|
||||
@@ -191,6 +193,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
| ) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tblNonPartition'
|
||||
""".stripMargin)
|
||||
spark.sql(s"insert into $tblNonPartition select 1, 'a1', 10, 1000")
|
||||
@@ -245,6 +248,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| dt $partitionType
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| partitioned by (dt)
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
""".stripMargin)
|
||||
@@ -273,6 +277,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| name string,
|
||||
| price double
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
""".stripMargin)
|
||||
|
||||
@@ -293,6 +298,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| dt string
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| partitioned by (dt)
|
||||
""".stripMargin)
|
||||
checkException(s"insert into $tableName partition(dt = '2021-06-20')" +
|
||||
@@ -305,7 +311,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
" count: 3,columns: (1,a1,10)"
|
||||
)
|
||||
spark.sql("set hoodie.sql.bulk.insert.enable = true")
|
||||
spark.sql("set hoodie.sql.insert.mode= strict")
|
||||
spark.sql("set hoodie.sql.insert.mode = strict")
|
||||
|
||||
val tableName2 = generateTableName
|
||||
spark.sql(
|
||||
@@ -316,7 +322,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -325,6 +331,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
"Table with primaryKey can not use bulk insert in strict mode."
|
||||
)
|
||||
|
||||
spark.sql("set hoodie.sql.insert.mode = non-strict")
|
||||
val tableName3 = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
@@ -334,16 +341,18 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| dt string
|
||||
|) using hudi
|
||||
| tblproperties (primaryKey = 'id')
|
||||
| partitioned by (dt)
|
||||
""".stripMargin)
|
||||
checkException(s"insert overwrite table $tableName3 values(1, 'a1', 10, '2021-07-18')")(
|
||||
"Insert Overwrite Partition can not use bulk insert."
|
||||
)
|
||||
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||
spark.sql("set hoodie.sql.insert.mode= upsert")
|
||||
spark.sql("set hoodie.sql.insert.mode = upsert")
|
||||
}
|
||||
|
||||
test("Test bulk insert") {
|
||||
spark.sql("set hoodie.sql.insert.mode = non-strict")
|
||||
withTempDir { tmp =>
|
||||
Seq("cow", "mor").foreach {tableType =>
|
||||
// Test bulk insert for single partition
|
||||
@@ -356,8 +365,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| dt string
|
||||
|) using hudi
|
||||
| options (
|
||||
| type = '$tableType'
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id'
|
||||
| )
|
||||
| partitioned by (dt)
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
@@ -391,8 +401,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| dt string,
|
||||
| hh string
|
||||
|) using hudi
|
||||
| options (
|
||||
| type = '$tableType'
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id'
|
||||
| )
|
||||
| partitioned by (dt, hh)
|
||||
| location '${tmp.getCanonicalPath}/$tableMultiPartition'
|
||||
@@ -423,8 +434,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| name string,
|
||||
| price double
|
||||
|) using hudi
|
||||
| options (
|
||||
| type = '$tableType'
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id'
|
||||
| )
|
||||
| location '${tmp.getCanonicalPath}/$nonPartitionedTable'
|
||||
""".stripMargin)
|
||||
@@ -445,7 +457,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
s"""
|
||||
|create table $tableName2
|
||||
|using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id'
|
||||
|)
|
||||
@@ -459,9 +471,11 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
)
|
||||
}
|
||||
}
|
||||
spark.sql("set hoodie.sql.insert.mode = upsert")
|
||||
}
|
||||
|
||||
test("Test combine before insert") {
|
||||
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||
withTempDir{tmp =>
|
||||
val tableName = generateTableName
|
||||
spark.sql(
|
||||
@@ -473,7 +487,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -495,6 +509,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
}
|
||||
|
||||
test("Test insert pk-table") {
|
||||
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||
withTempDir{tmp =>
|
||||
val tableName = generateTableName
|
||||
spark.sql(
|
||||
@@ -506,7 +521,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
|
||||
@@ -34,7 +34,7 @@ class TestMergeIntoLogOnlyTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| type = 'mor',
|
||||
| preCombineField = 'ts',
|
||||
|
||||
@@ -35,7 +35,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -137,7 +137,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$targetTable'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -203,7 +203,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -313,7 +313,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id'
|
||||
| )
|
||||
@@ -369,7 +369,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| v long,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'v'
|
||||
@@ -439,7 +439,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| _ts long
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type ='$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = '_ts'
|
||||
@@ -457,7 +457,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| _ts long
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type ='$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = '_ts'
|
||||
@@ -553,7 +553,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| c $dataType
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'c'
|
||||
| )
|
||||
@@ -604,7 +604,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| type = 'mor',
|
||||
| preCombineField = 'ts',
|
||||
@@ -665,7 +665,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -711,7 +711,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
|
||||
@@ -35,7 +35,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
| ts long,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -145,7 +145,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|options(primaryKey = 'id')
|
||||
|tblproperties(primaryKey = 'id')
|
||||
|location '${tmp.getCanonicalPath}'
|
||||
|as
|
||||
|select 1 as id, 'a1' as name
|
||||
@@ -187,7 +187,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
| m_value map<string, string>,
|
||||
| ts long
|
||||
| ) using hudi
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
@@ -251,7 +251,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -333,7 +333,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -376,7 +376,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey ='id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
|
||||
@@ -32,7 +32,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| _ts long
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type ='$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = '_ts'
|
||||
@@ -60,7 +60,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
||||
| name string,
|
||||
| price double
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type ='$tableType',
|
||||
| primaryKey = 'id'
|
||||
|)
|
||||
@@ -92,7 +92,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| _ts long
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type = 'cow',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = '_ts'
|
||||
@@ -117,7 +117,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| _ts long
|
||||
|) using hudi
|
||||
|options(
|
||||
|tblproperties(
|
||||
| type = 'mor',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = '_ts'
|
||||
|
||||
@@ -32,7 +32,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
||||
| price double,
|
||||
| ts long
|
||||
|) using hudi
|
||||
|options (
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
)
|
||||
@@ -59,7 +59,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
||||
| dt string
|
||||
) using hudi
|
||||
| partitioned by (dt)
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
@@ -109,7 +109,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
||||
| day string
|
||||
| ) using hudi
|
||||
| partitioned by (year, month, day)
|
||||
| options (
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
|
||||
@@ -33,7 +33,7 @@ class TestUpdateTable extends TestHoodieSqlBase {
|
||||
| ts long
|
||||
|) using hudi
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| options (
|
||||
| tblproperties (
|
||||
| type = '$tableType',
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|
||||
Reference in New Issue
Block a user