[HUDI-4303] Use Hive sentinel value as partition default to avoid type caste issues (#5954)
This commit is contained in:
@@ -24,6 +24,7 @@ import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model._
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil
|
||||
import org.apache.hudi.common.util.Option
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH
|
||||
import org.apache.hudi.config.HoodiePayloadConfig
|
||||
import org.apache.hudi.exception.{HoodieException, HoodieKeyException}
|
||||
import org.apache.hudi.keygen._
|
||||
@@ -146,17 +147,17 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
|
||||
baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow)
|
||||
|
||||
assertEquals("default", keyGen.getKey(baseRecord).getPartitionPath)
|
||||
assertEquals("default", keyGen.getPartitionPath(baseRow))
|
||||
assertEquals(UTF8String.fromString("default"), keyGen.getPartitionPath(internalRow, structType))
|
||||
assertEquals(DEFAULT_PARTITION_PATH, keyGen.getKey(baseRecord).getPartitionPath)
|
||||
assertEquals(DEFAULT_PARTITION_PATH, keyGen.getPartitionPath(baseRow))
|
||||
assertEquals(UTF8String.fromString(DEFAULT_PARTITION_PATH), keyGen.getPartitionPath(internalRow, structType))
|
||||
|
||||
baseRecord.put("name", null)
|
||||
baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow)
|
||||
|
||||
assertEquals("default", keyGen.getKey(baseRecord).getPartitionPath)
|
||||
assertEquals("default", keyGen.getPartitionPath(baseRow))
|
||||
assertEquals(UTF8String.fromString("default"), keyGen.getPartitionPath(internalRow, structType))
|
||||
assertEquals(DEFAULT_PARTITION_PATH, keyGen.getKey(baseRecord).getPartitionPath)
|
||||
assertEquals(DEFAULT_PARTITION_PATH, keyGen.getPartitionPath(baseRow))
|
||||
assertEquals(UTF8String.fromString(DEFAULT_PARTITION_PATH), keyGen.getPartitionPath(internalRow, structType))
|
||||
}
|
||||
|
||||
{
|
||||
@@ -335,7 +336,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
|
||||
baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow)
|
||||
|
||||
val expectedKey = new HoodieKey("field1:field1,name:__empty__", "field1/default")
|
||||
val expectedKey = new HoodieKey("field1:field1,name:__empty__", "field1/" + DEFAULT_PARTITION_PATH)
|
||||
|
||||
assertEquals(expectedKey, keyGen.getKey(baseRecord))
|
||||
|
||||
@@ -353,7 +354,7 @@ class TestDataSourceDefaults extends ScalaAssertionSupport {
|
||||
baseRow = KeyGeneratorTestUtilities.getRow(baseRecord, schema, structType)
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow)
|
||||
|
||||
val expectedKey = new HoodieKey("field1:field1,name:__null__", "field1/default")
|
||||
val expectedKey = new HoodieKey("field1:field1,name:__null__", "field1/" + DEFAULT_PARTITION_PATH)
|
||||
|
||||
assertEquals(expectedKey, keyGen.getKey(baseRecord))
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, T
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator
|
||||
import org.apache.hudi.common.testutils.RawTripTestPayload.{deleteRecordsToStrings, recordsToStrings}
|
||||
import org.apache.hudi.common.util
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH
|
||||
import org.apache.hudi.config.HoodieWriteConfig
|
||||
import org.apache.hudi.exception.{HoodieException, HoodieUpsertException}
|
||||
import org.apache.hudi.keygen._
|
||||
@@ -41,7 +42,7 @@ import org.joda.time.DateTime
|
||||
import org.joda.time.format.DateTimeFormat
|
||||
import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows, assertTrue, fail}
|
||||
import org.junit.jupiter.api.function.Executable
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
|
||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.{CsvSource, ValueSource}
|
||||
|
||||
@@ -614,13 +615,14 @@ class TestCOWDataSource extends HoodieClientTestBase {
|
||||
.load(basePath)
|
||||
assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= col("driver")).count() == 0)
|
||||
|
||||
// Use the `driver,rider` field as the partition key, If no such field exists, the default value `default` is used
|
||||
// Use the `driver,rider` field as the partition key, If no such field exists,
|
||||
// the default value [[PartitionPathEncodeUtils#DEFAULT_PARTITION_PATH]] is used
|
||||
writer = getDataFrameWriter(classOf[SimpleKeyGenerator].getName)
|
||||
writer.partitionBy("driver", "rider")
|
||||
.save(basePath)
|
||||
recordsReadDF = spark.read.format("org.apache.hudi")
|
||||
.load(basePath)
|
||||
assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= lit("default")).count() == 0)
|
||||
assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= lit(DEFAULT_PARTITION_PATH)).count() == 0)
|
||||
}
|
||||
|
||||
@Test def testSparkPartitionByWithComplexKeyGenerator() {
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
package org.apache.spark.sql.hudi
|
||||
|
||||
import org.apache.spark.sql.Row
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils.DEFAULT_PARTITION_PATH
|
||||
|
||||
class TestShowPartitions extends HoodieSparkSqlTestBase {
|
||||
|
||||
@@ -90,7 +90,7 @@ class TestShowPartitions extends HoodieSparkSqlTestBase {
|
||||
| select 3 as id, 'a3' as name, 10 as price, 1000 as ts, null as dt
|
||||
""".stripMargin)
|
||||
checkAnswer(s"show partitions $tableName")(
|
||||
Seq("dt=2021-01-01"), Seq("dt=2021-01-02"), Seq("dt=default")
|
||||
Seq("dt=2021-01-01"), Seq("dt=2021-01-02"), Seq("dt=%s".format(DEFAULT_PARTITION_PATH))
|
||||
)
|
||||
}
|
||||
|
||||
@@ -138,12 +138,12 @@ class TestShowPartitions extends HoodieSparkSqlTestBase {
|
||||
Seq("year=2021/month=01/day=01"),
|
||||
Seq("year=2021/month=01/day=02"),
|
||||
Seq("year=2021/month=02/day=01"),
|
||||
Seq("year=2021/month=02/day=default"),
|
||||
Seq("year=2021/month=default/day=01"),
|
||||
Seq("year=default/month=01/day=default"),
|
||||
Seq("year=default/month=01/day=02"),
|
||||
Seq("year=default/month=default/day=01"),
|
||||
Seq("year=2022/month=default/day=default")
|
||||
Seq("year=2021/month=02/day=%s".format(DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=2021/month=%s/day=01".format(DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=%s/month=01/day=%s".format(DEFAULT_PARTITION_PATH, DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=%s/month=01/day=02".format(DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=%s/month=%s/day=01".format(DEFAULT_PARTITION_PATH, DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=2022/month=%s/day=%s".format(DEFAULT_PARTITION_PATH, DEFAULT_PARTITION_PATH))
|
||||
)
|
||||
|
||||
// check partial partitions
|
||||
@@ -151,14 +151,14 @@ class TestShowPartitions extends HoodieSparkSqlTestBase {
|
||||
Seq("year=2021/month=01/day=01")
|
||||
)
|
||||
checkAnswer(s"show partitions $tableName partition(year='2021', month='02')")(
|
||||
Seq("year=2021/month=02/day=default"),
|
||||
Seq("year=2021/month=02/day=%s".format(DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=2021/month=02/day=01")
|
||||
)
|
||||
checkAnswer(s"show partitions $tableName partition(day='01')")(
|
||||
Seq("year=2021/month=02/day=01"),
|
||||
Seq("year=2021/month=default/day=01"),
|
||||
Seq("year=2021/month=%s/day=01".format(DEFAULT_PARTITION_PATH)),
|
||||
Seq("year=2021/month=01/day=01"),
|
||||
Seq("year=default/month=default/day=01")
|
||||
Seq("year=%s/month=%s/day=01".format(DEFAULT_PARTITION_PATH, DEFAULT_PARTITION_PATH))
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user