[HUDI-2057] CTAS Generate An External Table When Create Managed Table (#3146)
This commit is contained in:
@@ -456,6 +456,12 @@ object DataSourceWriteOptions {
|
||||
.defaultValue("true")
|
||||
.withDocumentation("")
|
||||
|
||||
// Create table as managed table
|
||||
val HIVE_CREATE_MANAGED_TABLE: ConfigProperty[Boolean] = ConfigProperty
|
||||
.key("hoodie.datasource.hive_sync.create_managed_table")
|
||||
.defaultValue(false)
|
||||
.withDocumentation("Whether to sync the table as managed table.")
|
||||
|
||||
// Async Compaction - Enabled by default for MOR
|
||||
val ASYNC_COMPACT_ENABLE_OPT_KEY: ConfigProperty[String] = ConfigProperty
|
||||
.key("hoodie.datasource.compaction.async.enable")
|
||||
|
||||
@@ -439,8 +439,8 @@ object HoodieSparkSqlWriter {
|
||||
serdeProp.put(ConfigUtils.SPARK_QUERY_AS_RT_KEY, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL)
|
||||
|
||||
hiveSyncConfig.serdeProperties = ConfigUtils.configToString(serdeProp)
|
||||
|
||||
}
|
||||
hiveSyncConfig.createManagedTable = hoodieConfig.getBoolean(HIVE_CREATE_MANAGED_TABLE)
|
||||
hiveSyncConfig
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,8 @@ object HoodieWriterUtils {
|
||||
HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.key -> HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY.defaultValue,
|
||||
HIVE_STYLE_PARTITIONING_OPT_KEY.key -> HIVE_STYLE_PARTITIONING_OPT_KEY.defaultValue,
|
||||
HIVE_USE_JDBC_OPT_KEY.key -> HIVE_USE_JDBC_OPT_KEY.defaultValue,
|
||||
HIVE_CREATE_MANAGED_TABLE.key() -> HIVE_CREATE_MANAGED_TABLE.defaultValue.toString,
|
||||
HIVE_SYNC_AS_DATA_SOURCE_TABLE.key() -> HIVE_SYNC_AS_DATA_SOURCE_TABLE.defaultValue(),
|
||||
ASYNC_COMPACT_ENABLE_OPT_KEY.key -> ASYNC_COMPACT_ENABLE_OPT_KEY.defaultValue,
|
||||
ENABLE_ROW_WRITER_OPT_KEY.key -> ENABLE_ROW_WRITER_OPT_KEY.defaultValue
|
||||
) ++ DataSourceOptionsHelper.translateConfigurations(parameters)
|
||||
|
||||
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hudi.command
|
||||
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
import org.apache.hadoop.fs.Path
|
||||
import org.apache.hudi.DataSourceWriteOptions
|
||||
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
||||
@@ -69,6 +70,9 @@ case class CreateHoodieTableAsSelectCommand(
|
||||
|
||||
// Execute the insert query
|
||||
try {
|
||||
// Set if sync as a managed table.
|
||||
sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key(),
|
||||
(table.tableType == CatalogTableType.MANAGED).toString)
|
||||
val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
|
||||
mode == SaveMode.Overwrite, refreshTable = false)
|
||||
if (success) {
|
||||
|
||||
@@ -542,7 +542,8 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
|
||||
"path" -> basePath,
|
||||
DataSourceWriteOptions.TABLE_NAME_OPT_KEY.key -> "test_hoodie",
|
||||
DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY.key -> "partition",
|
||||
DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX.key -> "true"
|
||||
DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX.key -> "true",
|
||||
DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> "true"
|
||||
)
|
||||
val parameters = HoodieWriterUtils.parametersWithWriteDefaults(params)
|
||||
val hoodieConfig = HoodieWriterUtils.convertMapToHoodieConfig(parameters)
|
||||
@@ -559,6 +560,7 @@ class HoodieSparkSqlWriterSuite extends FunSuite with Matchers {
|
||||
new Path(basePath), newHoodieConfig).asInstanceOf[HiveSyncConfig]
|
||||
|
||||
assertTrue(hiveSyncConfig.skipROSuffix)
|
||||
assertTrue(hiveSyncConfig.createManagedTable)
|
||||
assertResult("spark.sql.sources.provider=hudi\n" +
|
||||
"spark.sql.sources.schema.partCol.0=partition\n" +
|
||||
"spark.sql.sources.schema.numParts=1\n" +
|
||||
|
||||
@@ -104,6 +104,9 @@ public class HiveSyncConfig implements Serializable {
|
||||
@Parameter(names = {"--decode-partition"}, description = "Decode the partition value if the partition has encoded during writing")
|
||||
public Boolean decodePartition = false;
|
||||
|
||||
@Parameter(names = {"--managed-table"}, description = "Create a managed table")
|
||||
public Boolean createManagedTable = false;
|
||||
|
||||
// enhance the similar function in child class
|
||||
public static HiveSyncConfig copy(HiveSyncConfig cfg) {
|
||||
HiveSyncConfig newConfig = new HiveSyncConfig();
|
||||
@@ -123,6 +126,7 @@ public class HiveSyncConfig implements Serializable {
|
||||
newConfig.decodePartition = cfg.decodePartition;
|
||||
newConfig.tableProperties = cfg.tableProperties;
|
||||
newConfig.serdeProperties = cfg.serdeProperties;
|
||||
newConfig.createManagedTable = cfg.createManagedTable;
|
||||
return newConfig;
|
||||
}
|
||||
|
||||
@@ -151,6 +155,7 @@ public class HiveSyncConfig implements Serializable {
|
||||
+ ", help=" + help
|
||||
+ ", supportTimestamp=" + supportTimestamp
|
||||
+ ", decodePartition=" + decodePartition
|
||||
+ ", createManagedTable=" + createManagedTable
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -413,7 +413,12 @@ public class HiveSchemaUtil {
|
||||
}
|
||||
|
||||
String partitionsStr = String.join(",", partitionFields);
|
||||
StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (config.createManagedTable) {
|
||||
sb.append("CREATE TABLE IF NOT EXISTS ");
|
||||
} else {
|
||||
sb.append("CREATE EXTERNAL TABLE IF NOT EXISTS ");
|
||||
}
|
||||
sb.append(HIVE_ESCAPE_CHARACTER).append(config.databaseName).append(HIVE_ESCAPE_CHARACTER)
|
||||
.append(".").append(HIVE_ESCAPE_CHARACTER).append(tableName).append(HIVE_ESCAPE_CHARACTER);
|
||||
sb.append("( ").append(columns).append(")");
|
||||
|
||||
@@ -66,6 +66,10 @@ public class TestHiveSyncTool {
|
||||
return Arrays.asList(new Object[][] {{true, true}, {true, false}, {false, true}, {false, false}});
|
||||
}
|
||||
|
||||
private static Iterable<Object[]> useJdbcAndSchemaFromCommitMetadataAndManagedTable() {
|
||||
return Arrays.asList(new Object[][] {{true, true, true}, {true, false, false}, {false, true, true}, {false, false, false}});
|
||||
}
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {
|
||||
HiveTestUtil.setUp();
|
||||
@@ -269,6 +273,38 @@ public class TestHiveSyncTool {
|
||||
String ddl = String.join("\n", results);
|
||||
assertTrue(ddl.contains("'path'='" + hiveSyncConfig.basePath + "'"));
|
||||
assertTrue(ddl.contains("'hoodie.datasource.query.type'='" + expectQueryType + "'"));
|
||||
assertTrue(ddl.toLowerCase().contains("create external table"));
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource({"useJdbcAndSchemaFromCommitMetadataAndManagedTable"})
|
||||
public void testSyncManagedTable(boolean useJdbc,
|
||||
boolean useSchemaFromCommitMetadata,
|
||||
boolean isManagedTable) throws Exception {
|
||||
HiveSyncConfig hiveSyncConfig = HiveTestUtil.hiveSyncConfig;
|
||||
|
||||
hiveSyncConfig.useJdbc = useJdbc;
|
||||
hiveSyncConfig.createManagedTable = isManagedTable;
|
||||
String instantTime = "100";
|
||||
HiveTestUtil.createCOWTable(instantTime, 5, useSchemaFromCommitMetadata);
|
||||
|
||||
HiveSyncTool tool = new HiveSyncTool(hiveSyncConfig, HiveTestUtil.getHiveConf(), HiveTestUtil.fileSystem);
|
||||
tool.syncHoodieTable();
|
||||
|
||||
SessionState.start(HiveTestUtil.getHiveConf());
|
||||
Driver hiveDriver = new org.apache.hadoop.hive.ql.Driver(HiveTestUtil.getHiveConf());
|
||||
String dbTableName = hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName;
|
||||
hiveDriver.run("SHOW TBLPROPERTIES " + dbTableName);
|
||||
|
||||
List<String> results = new ArrayList<>();
|
||||
hiveDriver.run("SHOW CREATE TABLE " + dbTableName);
|
||||
hiveDriver.getResults(results);
|
||||
String ddl = String.join("\n", results).toLowerCase();
|
||||
if (isManagedTable) {
|
||||
assertTrue(ddl.contains("create table"));
|
||||
} else {
|
||||
assertTrue(ddl.contains("create external table"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user