1
0

[HUDI-1363] Provide option to drop partition columns (#3465)

- Co-authored-by: Sivabalan Narayanan <n.siva.b@gmail.com>
This commit is contained in:
Sagar Sumit
2021-08-13 22:31:26 +05:30
committed by GitHub
parent d4c2974eae
commit 9689278014
7 changed files with 87 additions and 20 deletions

View File

@@ -95,7 +95,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
List<Row> rows = DataSourceTestUtils.generateRandomRows(10);
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
StructType resultSchema = result.schema();
assertEquals(result.count(), 10);
@@ -158,7 +158,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
rows.addAll(updates);
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
Dataset<Row> result = HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
StructType resultSchema = result.schema();
assertEquals(result.count(), enablePreCombine ? 10 : 15);
@@ -238,7 +238,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
Dataset<Row> dataset = sqlContext.createDataFrame(rows, structType);
try {
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
fail("Should have thrown exception");
} catch (Exception e) {
// ignore
@@ -249,7 +249,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
dataset = sqlContext.createDataFrame(rows, structType);
try {
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
fail("Should have thrown exception");
} catch (Exception e) {
// ignore
@@ -260,7 +260,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
dataset = sqlContext.createDataFrame(rows, structType);
try {
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
fail("Should have thrown exception");
} catch (Exception e) {
// ignore
@@ -271,7 +271,7 @@ public class TestHoodieDatasetBulkInsertHelper extends HoodieClientTestBase {
dataset = sqlContext.createDataFrame(rows, structType);
try {
HoodieDatasetBulkInsertHelper.prepareHoodieDatasetForBulkInsert(sqlContext, config, dataset, "testStructName",
"testNamespace", new NonSortPartitionerWithRows(), false);
"testNamespace", new NonSortPartitionerWithRows(), false, false);
fail("Should have thrown exception");
} catch (Exception e) {
// ignore

View File

@@ -775,4 +775,26 @@ class TestCOWDataSource extends HoodieClientTestBase {
val resultSchema = new StructType(recordsReadDF.schema.filter(p=> !p.name.startsWith("_hoodie")).toArray)
assertEquals(resultSchema, schema1)
}
@ParameterizedTest @ValueSource(booleans = Array(true, false))
def testCopyOnWriteWithDropPartitionColumns(enableDropPartitionColumns: Boolean) {
val resultContainPartitionColumn = copyOnWriteTableSelect(enableDropPartitionColumns)
assertEquals(enableDropPartitionColumns, !resultContainPartitionColumn)
}
def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
inputDF1.write.format("org.apache.hudi")
.options(commonOpts)
.option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.option(DataSourceWriteOptions.DROP_PARTITION_COLUMNS.key, enableDropPartitionColumns)
.mode(SaveMode.Overwrite)
.save(basePath)
val snapshotDF1 = spark.read.format("org.apache.hudi")
.load(basePath + "/*/*/*/*")
snapshotDF1.registerTempTable("tmptable")
val result = spark.sql("select * from tmptable limit 1").collect()(0)
result.schema.contains(new StructField("partition", StringType, true))
}
}