1
0

[HUDI-3254] Introduce HoodieCatalog to manage tables for Spark Datasource V2 (#4611)

This commit is contained in:
leesf
2022-02-14 22:26:58 +08:00
committed by GitHub
parent 5ca4480a38
commit 0db1e978c6
26 changed files with 1288 additions and 81 deletions

View File

@@ -32,7 +32,7 @@ import org.apache.hudi.functional.TestBootstrap
import org.apache.hudi.hive.HiveSyncConfig
import org.apache.hudi.keygen.{ComplexKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator}
import org.apache.hudi.testutils.DataSourceTestUtils
import org.apache.spark.SparkContext
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.sql._
import org.apache.spark.sql.functions.{expr, lit}
@@ -94,11 +94,17 @@ class TestHoodieSparkSqlWriter {
* Utility method for initializing the spark context.
*/
def initSparkContext(): Unit = {
val sparkConf = new SparkConf()
if (!HoodieSparkUtils.beforeSpark3_2()) {
sparkConf.set("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
}
spark = SparkSession.builder()
.appName(hoodieFooTableName)
.master("local[2]")
.withExtensions(new HoodieSparkSessionExtension)
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.config(sparkConf)
.getOrCreate()
sc = spark.sparkContext
sc.setLogLevel("ERROR")

View File

@@ -18,8 +18,10 @@
package org.apache.spark.sql.hudi
import org.apache.hadoop.fs.Path
import org.apache.hudi.HoodieSparkUtils
import org.apache.hudi.common.fs.FSUtils
import org.apache.log4j.Level
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.util.Utils
@@ -49,10 +51,20 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
.config("hoodie.delete.shuffle.parallelism", "4")
.config("spark.sql.warehouse.dir", sparkWareHouse.getCanonicalPath)
.config("spark.sql.session.timeZone", "CTT")
.config(sparkConf())
.getOrCreate()
private var tableId = 0
def sparkConf(): SparkConf = {
val sparkConf = new SparkConf()
if (!HoodieSparkUtils.beforeSpark3_2()) {
sparkConf.set("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
}
sparkConf
}
protected def withTempDir(f: File => Unit): Unit = {
val tempDir = Utils.createTempDir()
try f(tempDir) finally {

View File

@@ -87,7 +87,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
| on s0.id = $tableName.id
| when matched then update set
| id = s0.id, name = s0.name, price = s0.price + $tableName.price, ts = s0.ts
| when not matched and id % 2 = 0 then insert *
| when not matched and s0.id % 2 = 0 then insert *
""".stripMargin)
checkAnswer(s"select id, name, price, ts from $tableName")(
Seq(1, "a1", 30.0, 1002),
@@ -102,9 +102,9 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
| select 1 as id, 'a1' as name, 12 as price, 1003 as ts
| ) s0
| on s0.id = $tableName.id
| when matched and id != 1 then update set
| when matched and s0.id != 1 then update set
| id = s0.id, name = s0.name, price = s0.price, ts = s0.ts
| when matched and id = 1 then delete
| when matched and s0.id = 1 then delete
| when not matched then insert *
""".stripMargin)
val cnt = spark.sql(s"select * from $tableName where id = 1").count()
@@ -178,7 +178,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
| )
| ) s0
| on s0.s_id = t0.id
| when matched and ts = 1001 then update set id = s0.s_id, name = t0.name, price =
| when matched and s0.ts = 1001 then update set id = s0.s_id, name = t0.name, price =
| s0.price, ts = s0.ts
""".stripMargin
)
@@ -233,7 +233,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
| select 1 as id, 'a1' as name, 12 as price, 1001 as ts, '2021-03-21' as dt
| ) as s0
| on t0.id = s0.id
| when matched and id % 2 = 0 then update set *
| when matched and s0.id % 2 = 0 then update set *
""".stripMargin
)
checkAnswer(s"select id,name,price,dt from $tableName")(
@@ -488,7 +488,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|merge into $targetTable t0
|using $sourceTable s0
|on t0.id = s0.id
|when matched and cast(_ts as string) > '1000' then update set *
|when matched and cast(s0._ts as string) > '1000' then update set *
""".stripMargin)
checkAnswer(s"select id, name, price, _ts from $targetTable")(
Seq(1, "a1", 12, 1001)
@@ -512,7 +512,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|using $sourceTable s0
|on t0.id = s0.id
|when matched then update set *
|when not matched and name = 'a2' then insert *
|when not matched and s0.name = 'a2' then insert *
""".stripMargin)
checkAnswer(s"select id, name, price, _ts from $targetTable order by id")(
Seq(1, "a1", 12, 1001),