1
0

[HUDI-2053] Insert Static Partition With DateType Return Incorrect Partition Value (#3133)

This commit is contained in:
pengzhiwei
2021-06-24 19:09:37 +08:00
committed by GitHub
parent 7e50f9a5a6
commit 84dd3ca18b
4 changed files with 84 additions and 3 deletions

View File

@@ -20,7 +20,6 @@ package org.apache.spark.sql.hudi
import scala.collection.JavaConverters._
import java.net.URI
import java.util.Locale
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hudi.SparkAdapterSupport
@@ -30,7 +29,7 @@ import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
import org.apache.spark.sql.catalyst.expressions.{And, Cast, Expression, Literal}
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.internal.SQLConf
@@ -106,6 +105,10 @@ object HoodieSqlUtils extends SparkAdapterSupport {
}
}
def removeMetaFields(attrs: Seq[Attribute]): Seq[Attribute] = {
attrs.filterNot(attr => isMetaField(attr.name))
}
/**
* Get the table location.
* @param tableId

View File

@@ -128,6 +128,11 @@ object InsertIntoHoodieTableCommand {
s"Required partition columns is: ${targetPartitionSchema.json}, Current static partitions " +
s"is: ${staticPartitionValues.mkString("," + "")}")
assert(staticPartitionValues.size + query.output.size == table.schema.size,
s"Required select columns count: ${removeMetaFields(table.schema).size}, " +
s"Current select columns(including static partition column) count: " +
s"${staticPartitionValues.size + removeMetaFields(query.output).size}columns: " +
s"(${(removeMetaFields(query.output).map(_.name) ++ staticPartitionValues.keys).mkString(",")})")
val queryDataFields = if (staticPartitionValues.isEmpty) { // insert dynamic partition
query.output.dropRight(targetPartitionSchema.fields.length)
} else { // insert static partition
@@ -156,7 +161,7 @@ object InsertIntoHoodieTableCommand {
targetPartitionSchema.fields.map(f => {
val staticPartitionValue = staticPartitionValues.getOrElse(f.name,
s"Missing static partition value for: ${f.name}")
val castAttr = Literal.create(staticPartitionValue, f.dataType)
val castAttr = castIfNeeded(Literal.create(staticPartitionValue), f.dataType, conf)
Alias(castAttr, f.name)()
})
}

View File

@@ -78,4 +78,13 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
protected def checkAnswer(sql: String)(expects: Seq[Any]*): Unit = {
assertResult(expects.map(row => Row(row: _*)).toArray)(spark.sql(sql).collect())
}
protected def checkException(sql: String)(errorMsg: String): Unit = {
try {
spark.sql(sql)
} catch {
case e: Throwable =>
assertResult(errorMsg)(e.getMessage)
}
}
}

View File

@@ -220,4 +220,68 @@ class TestInsertTable extends TestHoodieSqlBase {
)
}
}
test("Test Different Type of Partition Column") {
withTempDir { tmp =>
val typeAndValue = Seq(
("string", "'1000'"),
("int", 1000),
("bigint", 10000),
("timestamp", "'2021-05-20 00:00:00'"),
("date", "'2021-05-20'")
)
typeAndValue.foreach { case (partitionType, partitionValue) =>
val tableName = generateTableName
// Create table
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| dt $partitionType
|) using hudi
| partitioned by (dt)
| location '${tmp.getCanonicalPath}/$tableName'
""".stripMargin)
spark.sql(s"insert into $tableName partition(dt = $partitionValue) select 1, 'a1', 10")
spark.sql(s"insert into $tableName select 2, 'a2', 10, $partitionValue")
checkAnswer(s"select id, name, price, cast(dt as string) from $tableName order by id")(
Seq(1, "a1", 10, removeQuotes(partitionValue).toString),
Seq(2, "a2", 10, removeQuotes(partitionValue).toString)
)
}
}
}
test("Test Insert Exception") {
val tableName = generateTableName
spark.sql(
s"""
|create table $tableName (
| id int,
| name string,
| price double,
| dt string
|) using hudi
| partitioned by (dt)
""".stripMargin)
checkException(s"insert into $tableName partition(dt = '2021-06-20')" +
s" select 1, 'a1', 10, '2021-06-20'") (
"assertion failed: Required select columns count: 4, Current select columns(including static partition column)" +
" count: 5columns: (1,a1,10,2021-06-20,dt)"
)
checkException(s"insert into $tableName select 1, 'a1', 10")(
"assertion failed: Required select columns count: 4, Current select columns(including static partition column)" +
" count: 3columns: (1,a1,10)"
)
}
private def removeQuotes(value: Any): Any = {
value match {
case s: String => s.stripPrefix("'").stripSuffix("'")
case _=> value
}
}
}