[HUDI-3198] Improve Spark SQL create table from existing hudi table (#4584)
To modify SQL statement for creating hudi table based on an existing hudi path. From: ```sql create table hudi_tbl using hudi tblproperties (primaryKey='id', preCombineField='ts', type='cow') partitioned by (pt) location '/path/to/hudi' ``` To: ```sql create table hudi_tbl using hudi location '/path/to/hudi' ```
This commit is contained in:
@@ -100,11 +100,6 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (dt)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
|
||||
@@ -149,11 +144,6 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (dt)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
|
||||
@@ -210,7 +200,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
|
||||
import spark.implicits._
|
||||
val df = Seq((1, "z3", "v1", "2021", "10", "01"), (2, "l4", "v1", "2021", "10","02"))
|
||||
.toDF("id", "name", "ts", "year", "month", "day")
|
||||
.toDF("id", "name", "ts", "year", "month", "day")
|
||||
|
||||
df.write.format("hudi")
|
||||
.option(HoodieWriteConfig.TBL_NAME.key, tableName)
|
||||
@@ -229,11 +219,6 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (year, month, day)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
|
||||
@@ -278,11 +263,6 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (year, month, day)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
assertResult(Seq("dt"))(table2.partitionColumnNames)
|
||||
assertResult(classOf[HoodieParquetRealtimeInputFormat].getCanonicalName)(table2.storage.inputFormat.get)
|
||||
|
||||
// Test create a external table with an exist table in the path
|
||||
// Test create a external table with an existing table in the path
|
||||
val tableName3 = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
@@ -285,17 +285,18 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
val tableName3 = generateTableName
|
||||
// CTAS failed with null primaryKey
|
||||
assertThrows[Exception] {
|
||||
spark.sql(
|
||||
s"""
|
||||
| create table $tableName3 using hudi
|
||||
| partitioned by (dt)
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||
| AS
|
||||
| select null as id, 'a1' as name, 10 as price, '2021-05-07' as dt
|
||||
|
|
||||
""".stripMargin
|
||||
)}
|
||||
spark.sql(
|
||||
s"""
|
||||
| create table $tableName3 using hudi
|
||||
| partitioned by (dt)
|
||||
| tblproperties(primaryKey = 'id')
|
||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||
| AS
|
||||
| select null as id, 'a1' as name, 10 as price, '2021-05-07' as dt
|
||||
|
|
||||
""".stripMargin
|
||||
)
|
||||
}
|
||||
// Create table with timestamp type partition
|
||||
spark.sql(
|
||||
s"""
|
||||
@@ -357,7 +358,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
}
|
||||
}
|
||||
|
||||
test("Test Create Table From Exist Hoodie Table") {
|
||||
test("Test Create Table From Existing Hoodie Table") {
|
||||
withTempDir { tmp =>
|
||||
Seq("2021-08-02", "2021/08/02").foreach { partitionValue =>
|
||||
val tableName = generateTableName
|
||||
@@ -377,15 +378,19 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(tablePath)
|
||||
|
||||
// Create a table over the exist old table.
|
||||
// Create a table over the existing table.
|
||||
// Fail to create table if only specify partition columns, no table schema.
|
||||
checkExceptionContain(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|partitioned by (dt)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin
|
||||
) ("It is not allowed to specify partition columns when the table schema is not defined.")
|
||||
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (dt)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
checkAnswer(s"select id, name, value, ts, dt from $tableName")(
|
||||
@@ -434,7 +439,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
}
|
||||
}
|
||||
|
||||
test("Test Create Table From Exist Hoodie Table For Multi-Level Partitioned Table") {
|
||||
test("Test Create Table From Existing Hoodie Table For Multi-Level Partitioned Table") {
|
||||
withTempDir { tmp =>
|
||||
Seq("2021-08-02", "2021/08/02").foreach { day =>
|
||||
val tableName = generateTableName
|
||||
@@ -454,15 +459,10 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(tablePath)
|
||||
|
||||
// Create a table over the exist old table.
|
||||
// Create a table over the existing table.
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|partitioned by (day, hh)
|
||||
|location '$tablePath'
|
||||
|""".stripMargin)
|
||||
checkAnswer(s"select id, name, value, ts, day, hh from $tableName")(
|
||||
@@ -511,7 +511,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
}
|
||||
}
|
||||
|
||||
test("Test Create Table From Exist Hoodie Table For None Partitioned Table") {
|
||||
test("Test Create Table From Existing Hoodie Table For None Partitioned Table") {
|
||||
withTempDir{tmp =>
|
||||
// Write a table by spark dataframe.
|
||||
val tableName = generateTableName
|
||||
@@ -529,14 +529,10 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(tmp.getCanonicalPath)
|
||||
|
||||
// Create a table over the exist old table.
|
||||
// Create a table over the existing table.
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table $tableName using hudi
|
||||
|tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
|)
|
||||
|location '${tmp.getCanonicalPath}'
|
||||
|""".stripMargin)
|
||||
checkAnswer(s"select id, name, value, ts from $tableName")(
|
||||
@@ -583,7 +579,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
}
|
||||
}
|
||||
|
||||
test("Test Create Table Exists In Catalog") {
|
||||
test("Test Create Table Existing In Catalog") {
|
||||
val tableName = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
@@ -598,7 +594,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
||||
|
||||
spark.sql(s"alter table $tableName add columns(ts bigint)")
|
||||
|
||||
// Check "create table if not exist" works after schema evolution.
|
||||
// Check "create table if not exists" works after schema evolution.
|
||||
spark.sql(
|
||||
s"""
|
||||
|create table if not exists $tableName (
|
||||
|
||||
Reference in New Issue
Block a user