[HUDI-1363] Provide option to drop partition columns (#3465)
- Co-authored-by: Sivabalan Narayanan <n.siva.b@gmail.com>
This commit is contained in:
@@ -95,7 +95,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
List<Row> rows = DataSourceTestUtils.generateRandomRows(10);
|
||||
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
|
||||
Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
StructType resultSchema = result.schema();
|
||||
|
||||
assertEquals(result.count(), 10);
|
||||
@@ -158,7 +158,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
rows.addAll(updates);
|
||||
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
|
||||
Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
StructType resultSchema = result.schema();
|
||||
|
||||
assertEquals(result.count(), enablePreCombine ? 10 : 15);
|
||||
@@ -238,7 +238,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
|
||||
try {
|
||||
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
fail("Should have thrown exception");
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
@@ -249,7 +249,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
dataset = sqlContext.createDataFrame(rows, structType);
|
||||
try {
|
||||
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
fail("Should have thrown exception");
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
@@ -260,7 +260,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
dataset = sqlContext.createDataFrame(rows, structType);
|
||||
try {
|
||||
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
fail("Should have thrown exception");
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
@@ -271,7 +271,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
|
||||
dataset = sqlContext.createDataFrame(rows, structType);
|
||||
try {
|
||||
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false);
|
||||
"testNamespace", new NonSortPartitionerWithRows(), false, false);
|
||||
fail("Should have thrown exception");
|
||||
} catch (Exception e) {
|
||||
// ignore
|
||||
|
||||
@@ -775,4 +775,26 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
val resultSchema = new StructType(recordsReadDF.schema.filter(p=> !p.name.startsWith("_hoodie")).toArray)
|
||||
assertEquals(resultSchema, schema1)
|
||||
}
|
||||
|
||||
@ParameterizedTest @ValueSource(booleans = Array(true, false))
|
||||
def testCopyOnWriteWithDropPartitionColumns(enableDropPartitionColumns: Boolean) {
|
||||
val resultContainPartitionColumn = copyOnWriteTableSelect(enableDropPartitionColumns)
|
||||
assertEquals(enableDropPartitionColumns, !resultContainPartitionColumn)
|
||||
}
|
||||
|
||||
def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
|
||||
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
|
||||
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
|
||||
inputDF1.write.format("org.apache.hudi")
|
||||
.options(commonOpts)
|
||||
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
|
||||
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
|
||||
.mode(SaveMode.Overwrite)
|
||||
.save(basePath)
|
||||
val snapshotDF1 = spark.read.format("org.apache.hudi")
|
||||
.load(basePath + "/*/*/*/*")
|
||||
snapshotDF1.registerTempTable("tmptable")
|
||||
val result = spark.sql("select * from tmptable limit 1").collect()(0)
|
||||
result.schema.contains(new StructField("partition", StringType, true))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user