1
0

[HUDI-3108] Fix Purge Drop MOR Table Cause error (#4455)

This commit is contained in:
ForwardXu
2021-12-29 20:23:23 +08:00
committed by GitHub
parent a29b27c7ca
commit 504747ecf4

View File

@@ -18,14 +18,13 @@
package org.apache.spark.sql.hudi.command package org.apache.spark.sql.hudi.command
import org.apache.hadoop.fs.Path import org.apache.hadoop.fs.Path
import org.apache.hudi.client.common.HoodieSparkEngineContext import org.apache.hudi.client.common.HoodieSparkEngineContext
import org.apache.hudi.common.fs.FSUtils import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.model.HoodieTableType
import org.apache.spark.sql._ import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException} import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, HoodieCatalogTable} import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, HoodieCatalogTable}
import org.apache.spark.sql.hive.HiveClientUtils import org.apache.spark.sql.hive.HiveClientUtils
import org.apache.spark.sql.hudi.HoodieSqlUtils.isEnableHive import org.apache.spark.sql.hudi.HoodieSqlUtils.isEnableHive
@@ -38,6 +37,9 @@ case class DropHoodieTableCommand(
purge: Boolean) purge: Boolean)
extends HoodieLeafRunnableCommand { extends HoodieLeafRunnableCommand {
val MOR_SNAPSHOT_TABLE_SUFFIX = "_rt"
val MOR_READ_OPTIMIZED_TABLE_SUFFIX = "_ro"
override def run(sparkSession: SparkSession): Seq[Row] = { override def run(sparkSession: SparkSession): Seq[Row] = {
val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}" val fullTableName = s"${tableIdentifier.database}.${tableIdentifier.table}"
logInfo(s"start execute drop table command for $fullTableName") logInfo(s"start execute drop table command for $fullTableName")
@@ -69,9 +71,11 @@ extends HoodieLeafRunnableCommand {
// Drop table in the catalog // Drop table in the catalog
val enableHive = isEnableHive(sparkSession) val enableHive = isEnableHive(sparkSession)
if (enableHive) { if (enableHive) {
dropHiveDataSourceTable(sparkSession, table, ifExists, purge) dropHiveDataSourceTable(sparkSession, hoodieCatalogTable)
} else { } else {
catalog.dropTable(tableIdentifier, ifExists, purge) if (catalog.tableExists(tableIdentifier)) {
catalog.dropTable(tableIdentifier, ifExists, purge)
}
} }
// Recursively delete table directories // Recursively delete table directories
@@ -85,25 +89,41 @@ extends HoodieLeafRunnableCommand {
} }
private def dropHiveDataSourceTable( private def dropHiveDataSourceTable(
sparkSession: SparkSession, sparkSession: SparkSession,
table: CatalogTable, hoodieCatalogTable: HoodieCatalogTable): Unit = {
ifExists: Boolean, val table = hoodieCatalogTable.table
purge: Boolean): Unit = {
val dbName = table.identifier.database.get val dbName = table.identifier.database.get
val tableName = table.identifier.table val tableName = hoodieCatalogTable.tableName
// check database exists // check database exists
val dbExists = sparkSession.sessionState.catalog.databaseExists(dbName) val dbExists = sparkSession.sessionState.catalog.databaseExists(dbName)
if (!dbExists) { if (!dbExists) {
throw new NoSuchDatabaseException(dbName) throw new NoSuchDatabaseException(dbName)
} }
// check table exists
if (!sparkSession.sessionState.catalog.tableExists(table.identifier)) { if (HoodieTableType.MERGE_ON_READ == hoodieCatalogTable.tableType && purge) {
throw new NoSuchTableException(dbName, table.identifier.table) val snapshotTableName = tableName + MOR_SNAPSHOT_TABLE_SUFFIX
val roTableName = tableName + MOR_READ_OPTIMIZED_TABLE_SUFFIX
dropHiveTable(sparkSession, dbName, snapshotTableName)
dropHiveTable(sparkSession, dbName, roTableName)
} }
val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf, dropHiveTable(sparkSession, dbName, tableName, purge)
sparkSession.sessionState.newHadoopConf()) }
// drop hive table.
client.dropTable(dbName, tableName, ifExists, purge) private def dropHiveTable(
sparkSession: SparkSession,
dbName: String,
tableName: String,
purge: Boolean = false): Unit = {
// check table exists
if (sparkSession.sessionState.catalog.tableExists(new TableIdentifier(tableName, Option(dbName)))) {
val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf,
sparkSession.sessionState.newHadoopConf())
// drop hive table.
client.dropTable(dbName, tableName, ifExists, purge)
}
} }
} }