1
0

[HUDI-3748] write and select hudi table when enable hoodie.datasource.write.drop.partition.columns (#5201)

This commit is contained in:
Yann Byron
2022-04-05 16:31:41 +08:00
committed by GitHub
parent 325b3d610a
commit 3195f51562
15 changed files with 335 additions and 67 deletions

View File

@@ -749,8 +749,17 @@ class TestCOWDataSource extends HoodieClientTestBase {
@ParameterizedTest @ValueSource(booleans = Array(true, false))
def testCopyOnWriteWithDropPartitionColumns(enableDropPartitionColumns: Boolean) {
val resultContainPartitionColumn = copyOnWriteTableSelect(enableDropPartitionColumns)
assertEquals(enableDropPartitionColumns, !resultContainPartitionColumn)
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 100)).toList
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
inputDF1.write.format("org.apache.hudi")
.options(commonOpts)
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
.mode(SaveMode.Overwrite)
.save(basePath)
val snapshotDF1 = spark.read.format("org.apache.hudi").load(basePath)
assertEquals(snapshotDF1.count(), 100)
assertEquals(3, snapshotDF1.select("partition").distinct().count())
}
@Test
@@ -863,22 +872,6 @@ class TestCOWDataSource extends HoodieClientTestBase {
assertEquals(500, hoodieIncViewDF.count())
}
def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
inputDF1.write.format("org.apache.hudi")
.options(commonOpts)
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
.mode(SaveMode.Overwrite)
.save(basePath)
val snapshotDF1 = spark.read.format("org.apache.hudi")
.load(basePath + "/*/*/*/*")
snapshotDF1.registerTempTable("tmptable")
val result = spark.sql("select * from tmptable limit 1").collect()(0)
result.schema.contains(new StructField("partition", StringType, true))
}
@Test
def testWriteSmallPrecisionDecimalTable(): Unit = {
val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList

View File

@@ -630,4 +630,37 @@ class TestInsertTable extends TestHoodieSqlBase {
}
}
}
test("Test enable hoodie.datasource.write.drop.partition.columns when write") {
spark.sql("set hoodie.sql.bulk.insert.enable = false")
Seq("mor", "cow").foreach { tableType =>
withTempDir { tmp =>
val tableName = generateTableName
spark.sql(
s"""
| create table $tableName (
| id int,
| name string,
| price double,
| ts long,
| dt string
| ) using hudi
| partitioned by (dt)
| location '${tmp.getCanonicalPath}/$tableName'
| tblproperties (
| primaryKey = 'id',
| preCombineField = 'ts',
| type = '$tableType',
| hoodie.datasource.write.drop.partition.columns = 'true'
| )
""".stripMargin)
spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (1, 'a1', 10, 1000)")
spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (2, 'a2', 20, 1000)")
checkAnswer(s"select id, name, price, ts, dt from $tableName")(
Seq(1, "a1", 10, 1000, "2021-12-25"),
Seq(2, "a2", 20, 1000, "2021-12-25")
)
}
}
}
}