[HUDI-3136] Fix merge/insert/show partitions error on Spark3.2 (#4490)
This commit is contained in:
@@ -254,7 +254,9 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
|
|||||||
case action: MergeAction =>
|
case action: MergeAction =>
|
||||||
// SPARK-34962: use UpdateStarAction as the explicit representation of * in UpdateAction.
|
// SPARK-34962: use UpdateStarAction as the explicit representation of * in UpdateAction.
|
||||||
// So match and covert this in Spark3.2 env.
|
// So match and covert this in Spark3.2 env.
|
||||||
UpdateAction(action.condition, Seq.empty)
|
val (resolvedCondition, resolvedAssignments) =
|
||||||
|
resolveConditionAssignments(action.condition, Seq.empty)
|
||||||
|
UpdateAction(resolvedCondition, resolvedAssignments)
|
||||||
}
|
}
|
||||||
// Resolve the notMatchedActions
|
// Resolve the notMatchedActions
|
||||||
val resolvedNotMatchedActions = notMatchedActions.map {
|
val resolvedNotMatchedActions = notMatchedActions.map {
|
||||||
@@ -265,7 +267,9 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
|
|||||||
case action: MergeAction =>
|
case action: MergeAction =>
|
||||||
// SPARK-34962: use InsertStarAction as the explicit representation of * in InsertAction.
|
// SPARK-34962: use InsertStarAction as the explicit representation of * in InsertAction.
|
||||||
// So match and covert this in Spark3.2 env.
|
// So match and covert this in Spark3.2 env.
|
||||||
InsertAction(action.condition, Seq.empty)
|
val (resolvedCondition, resolvedAssignments) =
|
||||||
|
resolveConditionAssignments(action.condition, Seq.empty)
|
||||||
|
InsertAction(resolvedCondition, resolvedAssignments)
|
||||||
}
|
}
|
||||||
// Return the resolved MergeIntoTable
|
// Return the resolved MergeIntoTable
|
||||||
MergeIntoTable(target, resolvedSource, resolvedMergeCondition,
|
MergeIntoTable(target, resolvedSource, resolvedMergeCondition,
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import org.apache.avro.Schema
|
|||||||
import org.apache.avro.generic.{GenericRecord, IndexedRecord}
|
import org.apache.avro.generic.{GenericRecord, IndexedRecord}
|
||||||
|
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecord}
|
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecord, OverwriteWithLatestAvroPayload}
|
||||||
import org.apache.hudi.common.util.{Option => HOption}
|
import org.apache.hudi.common.util.{Option => HOption}
|
||||||
import org.apache.hudi.config.HoodieWriteConfig
|
import org.apache.hudi.config.HoodieWriteConfig
|
||||||
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
||||||
@@ -254,7 +254,7 @@ object InsertIntoHoodieTableCommand extends Logging {
|
|||||||
// on reading.
|
// on reading.
|
||||||
classOf[ValidateDuplicateKeyPayload].getCanonicalName
|
classOf[ValidateDuplicateKeyPayload].getCanonicalName
|
||||||
} else {
|
} else {
|
||||||
classOf[DefaultHoodieRecordPayload].getCanonicalName
|
classOf[OverwriteWithLatestAvroPayload].getCanonicalName
|
||||||
}
|
}
|
||||||
logInfo(s"insert statement use write operation type: $operation, payloadClass: $payloadClassName")
|
logInfo(s"insert statement use write operation type: $operation, payloadClass: $payloadClassName")
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
|
|||||||
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
|
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
|
||||||
import org.apache.spark.sql.execution.command.TruncateTableCommand
|
import org.apache.spark.sql.execution.command.TruncateTableCommand
|
||||||
|
|
||||||
|
import scala.util.control.NonFatal
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Command for truncate hudi table.
|
* Command for truncate hudi table.
|
||||||
*/
|
*/
|
||||||
@@ -36,10 +38,16 @@ class TruncateHoodieTableCommand(
|
|||||||
override def run(sparkSession: SparkSession): Seq[Row] = {
|
override def run(sparkSession: SparkSession): Seq[Row] = {
|
||||||
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
|
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
|
||||||
val properties = hoodieCatalogTable.tableConfig.getProps
|
val properties = hoodieCatalogTable.tableConfig.getProps
|
||||||
val tablePath = hoodieCatalogTable.tableLocation
|
|
||||||
|
|
||||||
// Delete all data in the table directory
|
try {
|
||||||
super.run(sparkSession)
|
// Delete all data in the table directory
|
||||||
|
super.run(sparkSession)
|
||||||
|
} catch {
|
||||||
|
// TruncateTableCommand will delete the related directories first, and then refresh the table.
|
||||||
|
// It will fail when refresh table, because the hudi meta directory(.hoodie) has been deleted at the first step.
|
||||||
|
// So here ignore this failure, and refresh table later.
|
||||||
|
case NonFatal(_) =>
|
||||||
|
}
|
||||||
|
|
||||||
// If we have not specified the partition, truncate will delete all the data in the table path
|
// If we have not specified the partition, truncate will delete all the data in the table path
|
||||||
// include the hoodi.properties. In this case we should reInit the table.
|
// include the hoodi.properties. In this case we should reInit the table.
|
||||||
@@ -50,6 +58,10 @@ class TruncateHoodieTableCommand(
|
|||||||
.fromProperties(properties)
|
.fromProperties(properties)
|
||||||
.initTable(hadoopConf, hoodieCatalogTable.tableLocation)
|
.initTable(hadoopConf, hoodieCatalogTable.tableLocation)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// After deleting the data, refresh the table to make sure we don't keep around a stale
|
||||||
|
// file relation in the metastore cache and cached table data in the cache manager.
|
||||||
|
sparkSession.catalog.refreshTable(hoodieCatalogTable.table.identifier.quotedString)
|
||||||
Seq.empty[Row]
|
Seq.empty[Row]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
|||||||
Seq("year=2021/month=02/day=default"),
|
Seq("year=2021/month=02/day=default"),
|
||||||
Seq("year=2021/month=02/day=01")
|
Seq("year=2021/month=02/day=01")
|
||||||
)
|
)
|
||||||
checkAnswer(s"show partitions $tableName partition(day=01)")(
|
checkAnswer(s"show partitions $tableName partition(day='01')")(
|
||||||
Seq("year=2021/month=02/day=01"),
|
Seq("year=2021/month=02/day=01"),
|
||||||
Seq("year=2021/month=default/day=01"),
|
Seq("year=2021/month=default/day=01"),
|
||||||
Seq("year=2021/month=01/day=01"),
|
Seq("year=2021/month=01/day=01"),
|
||||||
|
|||||||
Reference in New Issue
Block a user