1
0

[HUDI-4149] Drop-Table fails when underlying table directory is broken (#5672)

This commit is contained in:
Jin Xing
2022-05-30 19:09:26 +08:00
committed by GitHub
parent 329da34ee0
commit 918c4f4e0b
4 changed files with 182 additions and 50 deletions

View File

@@ -34,6 +34,7 @@ set hoodie.delete.shuffle.parallelism = 1;
# CTAS
create table h0 using hudi options(type = '${tableType}', primaryKey = 'id')
location '${tmpDir}/h0'
as select 1 as id, 'a1' as name, 10 as price;
+----------+
| ok |
@@ -46,6 +47,7 @@ select id, name, price from h0;
create table h0_p using hudi partitioned by(dt)
options(type = '${tableType}', primaryKey = 'id')
location '${tmpDir}/h0_p'
as select cast('2021-05-07 00:00:00' as timestamp) as dt,
1 as id, 'a1' as name, 10 as price;
+----------+

View File

@@ -17,6 +17,8 @@
package org.apache.spark.sql.hudi
import org.apache.hadoop.fs.{LocalFileSystem, Path}
import org.apache.hudi.common.fs.FSUtils
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
@@ -230,6 +232,115 @@ class TestDropTable extends HoodieSparkSqlTestBase {
}
}
test("Drop an EXTERNAL table which path is lost.") {
withTempDir { tmp =>
val tableName = generateTableName
val tablePath = s"${tmp.getCanonicalPath}/$tableName"
val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration);
spark.sql(
s"""
|create table $tableName (
|id int,
|ts int,
|value string
|)using hudi
| location '$tablePath'
| tblproperties (
| primaryKey = 'id',
| preCombineField = 'ts'
| )
|""".stripMargin)
assert(filesystem.exists(new Path(tablePath)), s"Table path doesn't exists (${tablePath}).")
filesystem.delete(new Path(tablePath), true)
spark.sql(s"drop table ${tableName}")
checkAnswer("show tables")()
}
}
test("Drop an MOR table and related RT & RO when path is lost.") {
withTempDir { tmp =>
val tableName = generateTableName
val tablePath = s"${tmp.getCanonicalPath}/$tableName"
val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration);
spark.sql(
s"""
|create table $tableName (
|id int,
|ts int,
|value string
|)using hudi
| location '$tablePath'
| tblproperties (
| primaryKey = 'id',
| preCombineField = 'ts',
| type = 'mor'
| )
|""".stripMargin)
assert(filesystem.exists(new Path(tablePath)), s"Table path doesn't exist (${tablePath}).")
spark.sql(
s"""
|create table ${tableName}_ro using hudi
| location '${tmp.getCanonicalPath}/$tableName'
| tblproperties (
| type = 'mor',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
""".stripMargin)
alterSerdeProperties(spark.sessionState.catalog, TableIdentifier(s"${tableName}_ro"),
Map("hoodie.query.as.ro.table" -> "true"))
spark.sql(
s"""
|create table ${tableName}_rt using hudi
| location '${tmp.getCanonicalPath}/$tableName'
| tblproperties (
| type = 'mor',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
""".stripMargin)
alterSerdeProperties(spark.sessionState.catalog, TableIdentifier(s"${tableName}_rt"),
Map("hoodie.query.as.ro.table" -> "false"))
filesystem.delete(new Path(tablePath), true)
spark.sql(s"drop table ${tableName}")
spark.sql(s"drop table ${tableName}_ro")
spark.sql(s"drop table ${tableName}_rt")
checkAnswer("show tables")()
}
}
test("Drop an MANAGED table which path is lost.") {
val tableName = generateTableName
spark.sql(
s"""
|create table $tableName (
|id int,
|ts int,
|value string
|)using hudi
| tblproperties (
| primaryKey = 'id',
| preCombineField = 'ts'
| )
|""".stripMargin)
val tablePath = new Path(
spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName)).location)
val filesystem = FSUtils.getFs(tablePath, spark.sparkContext.hadoopConfiguration);
assert(filesystem.exists(tablePath), s"Table path doesn't exists ($tablePath).")
filesystem.delete(tablePath, true)
spark.sql(s"drop table ${tableName}")
checkAnswer("show tables")()
}
private def alterSerdeProperties(sessionCatalog: SessionCatalog, tableIdt: TableIdentifier,
newProperties: Map[String, String]): Unit = {
val catalogTable = spark.sessionState.catalog.getTableMetadata(tableIdt)