[HUDI-3748] write and select hudi table when enable hoodie.datasource.write.drop.partition.columns (#5201)
This commit is contained in:
@@ -749,8 +749,17 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
|
||||
@ParameterizedTest @ValueSource(booleans = Array(true, false))
|
||||
def testCopyOnWriteWithDropPartitionColumns(enableDropPartitionColumns: Boolean) {
|
||||
val resultContainPartitionColumn = copyOnWriteTableSelect(enableDropPartitionColumns)
|
||||
assertEquals(enableDropPartitionColumns, !resultContainPartitionColumn)
|
||||
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 100)).toList
|
||||
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
|
||||
inputDF1.write.format("org.apache.hudi")
|
||||
.options(commonOpts)
|
||||
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(basePath)
|
||||
val snapshotDF1 = spark.read.format("org.apache.hudi").load(basePath)
|
||||
assertEquals(snapshotDF1.count(), 100)
|
||||
assertEquals(3, snapshotDF1.select("partition").distinct().count())
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -863,22 +872,6 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
assertEquals(500, hoodieIncViewDF.count())
|
||||
}
|
||||
|
||||
def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
|
||||
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
|
||||
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
|
||||
inputDF1.write.format("org.apache.hudi")
|
||||
.options(commonOpts)
|
||||
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(basePath)
|
||||
val snapshotDF1 = spark.read.format("org.apache.hudi")
|
||||
.load(basePath + "/*/*/*/*")
|
||||
snapshotDF1.registerTempTable("tmptable")
|
||||
val result = spark.sql("select * from tmptable limit 1").collect()(0)
|
||||
result.schema.contains(new StructField("partition", StringType, true))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testWriteSmallPrecisionDecimalTable(): Unit = {
|
||||
val records1 = recordsToStrings(dataGen.generateInserts("001", 5)).toList
|
||||
|
||||
@@ -630,4 +630,37 @@ class TestInsertTable extends TestHoodieSqlBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("Test enable hoodie.datasource.write.drop.partition.columns when write") {
|
||||
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||
Seq("mor", "cow").foreach { tableType =>
|
||||
withTempDir { tmp =>
|
||||
val tableName = generateTableName
|
||||
spark.sql(
|
||||
s"""
|
||||
| create table $tableName (
|
||||
| id int,
|
||||
| name string,
|
||||
| price double,
|
||||
| ts long,
|
||||
| dt string
|
||||
| ) using hudi
|
||||
| partitioned by (dt)
|
||||
| location '${tmp.getCanonicalPath}/$tableName'
|
||||
| tblproperties (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts',
|
||||
| type = '$tableType',
|
||||
| hoodie.datasource.write.drop.partition.columns = 'true'
|
||||
| )
|
||||
""".stripMargin)
|
||||
spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (1, 'a1', 10, 1000)")
|
||||
spark.sql(s"insert into $tableName partition(dt='2021-12-25') values (2, 'a2', 20, 1000)")
|
||||
checkAnswer(s"select id, name, price, ts, dt from $tableName")(
|
||||
Seq(1, "a1", 10, 1000, "2021-12-25"),
|
||||
Seq(2, "a2", 20, 1000, "2021-12-25")
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user