[HUDI-2212] Missing PrimaryKey In Hoodie Properties For CTAS Table (#3332)
This commit is contained in:
@@ -81,10 +81,10 @@ public class HoodieTableConfig extends HoodieConfig implements Serializable {
|
|||||||
.withDocumentation("Field used in preCombining before actual write. By default, when two records have the same key value, "
|
.withDocumentation("Field used in preCombining before actual write. By default, when two records have the same key value, "
|
||||||
+ "the largest value for the precombine field determined by Object.compareTo(..), is picked.");
|
+ "the largest value for the precombine field determined by Object.compareTo(..), is picked.");
|
||||||
|
|
||||||
public static final ConfigProperty<String> HOODIE_TABLE_PARTITION_COLUMNS_PROP = ConfigProperty
|
public static final ConfigProperty<String> HOODIE_TABLE_PARTITION_FIELDS_PROP = ConfigProperty
|
||||||
.key("hoodie.table.partition.columns")
|
.key("hoodie.table.partition.fields")
|
||||||
.noDefaultValue()
|
.noDefaultValue()
|
||||||
.withDocumentation("Columns used to partition the table. Concatenated values of these fields are used as "
|
.withDocumentation("Fields used to partition the table. Concatenated values of these fields are used as "
|
||||||
+ "the partition path, by invoking toString()");
|
+ "the partition path, by invoking toString()");
|
||||||
|
|
||||||
public static final ConfigProperty<String> HOODIE_TABLE_RECORDKEY_FIELDS = ConfigProperty
|
public static final ConfigProperty<String> HOODIE_TABLE_RECORDKEY_FIELDS = ConfigProperty
|
||||||
@@ -250,9 +250,17 @@ public class HoodieTableConfig extends HoodieConfig implements Serializable {
|
|||||||
return getString(HOODIE_TABLE_PRECOMBINE_FIELD_PROP);
|
return getString(HOODIE_TABLE_PRECOMBINE_FIELD_PROP);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Option<String[]> getPartitionColumns() {
|
public Option<String[]> getRecordKeyFields() {
|
||||||
if (contains(HOODIE_TABLE_PARTITION_COLUMNS_PROP)) {
|
if (contains(HOODIE_TABLE_RECORDKEY_FIELDS)) {
|
||||||
return Option.of(Arrays.stream(getString(HOODIE_TABLE_PARTITION_COLUMNS_PROP).split(","))
|
return Option.of(Arrays.stream(getString(HOODIE_TABLE_RECORDKEY_FIELDS).split(","))
|
||||||
|
.filter(p -> p.length() > 0).collect(Collectors.toList()).toArray(new String[]{}));
|
||||||
|
}
|
||||||
|
return Option.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Option<String[]> getPartitionFields() {
|
||||||
|
if (contains(HOODIE_TABLE_PARTITION_FIELDS_PROP)) {
|
||||||
|
return Option.of(Arrays.stream(getString(HOODIE_TABLE_PARTITION_FIELDS_PROP).split(","))
|
||||||
.filter(p -> p.length() > 0).collect(Collectors.toList()).toArray(new String[]{}));
|
.filter(p -> p.length() > 0).collect(Collectors.toList()).toArray(new String[]{}));
|
||||||
}
|
}
|
||||||
return Option.empty();
|
return Option.empty();
|
||||||
|
|||||||
@@ -619,7 +619,7 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
private Integer timelineLayoutVersion;
|
private Integer timelineLayoutVersion;
|
||||||
private String baseFileFormat;
|
private String baseFileFormat;
|
||||||
private String preCombineField;
|
private String preCombineField;
|
||||||
private String partitionColumns;
|
private String partitionFields;
|
||||||
private String bootstrapIndexClass;
|
private String bootstrapIndexClass;
|
||||||
private String bootstrapBasePath;
|
private String bootstrapBasePath;
|
||||||
private Boolean populateMetaFields;
|
private Boolean populateMetaFields;
|
||||||
@@ -681,8 +681,8 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public PropertyBuilder setPartitionColumns(String partitionColumns) {
|
public PropertyBuilder setPartitionFields(String partitionFields) {
|
||||||
this.partitionColumns = partitionColumns;
|
this.partitionFields = partitionFields;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -741,9 +741,9 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP)) {
|
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP)) {
|
||||||
setPreCombineField(hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP));
|
setPreCombineField(hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP));
|
||||||
}
|
}
|
||||||
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_PARTITION_COLUMNS_PROP)) {
|
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP)) {
|
||||||
setPartitionColumns(
|
setPartitionFields(
|
||||||
hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_PARTITION_COLUMNS_PROP));
|
hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP));
|
||||||
}
|
}
|
||||||
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS)) {
|
if (hoodieConfig.contains(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS)) {
|
||||||
setRecordKeyFields(hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS));
|
setRecordKeyFields(hoodieConfig.getString(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS));
|
||||||
@@ -801,8 +801,8 @@ public class HoodieTableMetaClient implements Serializable {
|
|||||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP, preCombineField);
|
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PRECOMBINE_FIELD_PROP, preCombineField);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (null != partitionColumns) {
|
if (null != partitionFields) {
|
||||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PARTITION_COLUMNS_PROP, partitionColumns);
|
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_PARTITION_FIELDS_PROP, partitionFields);
|
||||||
}
|
}
|
||||||
if (null != recordKeyFields) {
|
if (null != recordKeyFields) {
|
||||||
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, recordKeyFields);
|
tableConfig.setValue(HoodieTableConfig.HOODIE_TABLE_RECORDKEY_FIELDS, recordKeyFields);
|
||||||
|
|||||||
@@ -218,9 +218,10 @@ public class StreamerUtil {
|
|||||||
HoodieTableMetaClient.withPropertyBuilder()
|
HoodieTableMetaClient.withPropertyBuilder()
|
||||||
.setTableType(conf.getString(FlinkOptions.TABLE_TYPE))
|
.setTableType(conf.getString(FlinkOptions.TABLE_TYPE))
|
||||||
.setTableName(conf.getString(FlinkOptions.TABLE_NAME))
|
.setTableName(conf.getString(FlinkOptions.TABLE_NAME))
|
||||||
|
.setRecordKeyFields(conf.getString(FlinkOptions.RECORD_KEY_FIELD, null))
|
||||||
.setPayloadClassName(conf.getString(FlinkOptions.PAYLOAD_CLASS))
|
.setPayloadClassName(conf.getString(FlinkOptions.PAYLOAD_CLASS))
|
||||||
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
||||||
.setPartitionColumns(conf.getString(FlinkOptions.PARTITION_PATH_FIELD, null))
|
.setPartitionFields(conf.getString(FlinkOptions.PARTITION_PATH_FIELD, null))
|
||||||
.setPreCombineField(conf.getString(FlinkOptions.PRECOMBINE_FIELD))
|
.setPreCombineField(conf.getString(FlinkOptions.PRECOMBINE_FIELD))
|
||||||
.setTimelineLayoutVersion(1)
|
.setTimelineLayoutVersion(1)
|
||||||
.initTable(hadoopConf, basePath);
|
.initTable(hadoopConf, basePath);
|
||||||
|
|||||||
@@ -60,9 +60,9 @@ public class TestStreamerUtil {
|
|||||||
.setBasePath(tempFile.getAbsolutePath())
|
.setBasePath(tempFile.getAbsolutePath())
|
||||||
.setConf(new org.apache.hadoop.conf.Configuration())
|
.setConf(new org.apache.hadoop.conf.Configuration())
|
||||||
.build();
|
.build();
|
||||||
assertTrue(metaClient1.getTableConfig().getPartitionColumns().isPresent(),
|
assertTrue(metaClient1.getTableConfig().getPartitionFields().isPresent(),
|
||||||
"Missing partition columns in the hoodie.properties.");
|
"Missing partition columns in the hoodie.properties.");
|
||||||
assertArrayEquals(metaClient1.getTableConfig().getPartitionColumns().get(), new String[] { "p0", "p1" });
|
assertArrayEquals(metaClient1.getTableConfig().getPartitionFields().get(), new String[] { "p0", "p1" });
|
||||||
assertEquals(metaClient1.getTableConfig().getPreCombineField(), "ts");
|
assertEquals(metaClient1.getTableConfig().getPreCombineField(), "ts");
|
||||||
|
|
||||||
// Test for non-partitioned table.
|
// Test for non-partitioned table.
|
||||||
@@ -73,7 +73,7 @@ public class TestStreamerUtil {
|
|||||||
.setBasePath(tempFile.getAbsolutePath())
|
.setBasePath(tempFile.getAbsolutePath())
|
||||||
.setConf(new org.apache.hadoop.conf.Configuration())
|
.setConf(new org.apache.hadoop.conf.Configuration())
|
||||||
.build();
|
.build();
|
||||||
assertFalse(metaClient2.getTableConfig().getPartitionColumns().isPresent());
|
assertFalse(metaClient2.getTableConfig().getPartitionFields().isPresent());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ case class HoodieFileIndex(
|
|||||||
*/
|
*/
|
||||||
private lazy val _partitionSchemaFromProperties: StructType = {
|
private lazy val _partitionSchemaFromProperties: StructType = {
|
||||||
val tableConfig = metaClient.getTableConfig
|
val tableConfig = metaClient.getTableConfig
|
||||||
val partitionColumns = tableConfig.getPartitionColumns
|
val partitionColumns = tableConfig.getPartitionFields
|
||||||
val nameFieldMap = schema.fields.map(filed => filed.name -> filed).toMap
|
val nameFieldMap = schema.fields.map(filed => filed.name -> filed).toMap
|
||||||
|
|
||||||
if (partitionColumns.isPresent) {
|
if (partitionColumns.isPresent) {
|
||||||
|
|||||||
@@ -119,15 +119,17 @@ object HoodieSparkSqlWriter {
|
|||||||
val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP)
|
val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_BASE_FILE_FORMAT_PROP)
|
||||||
val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP)
|
val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP)
|
||||||
val partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator)
|
val partitionColumns = HoodieWriterUtils.getPartitionColumns(keyGenerator)
|
||||||
|
val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY)
|
||||||
|
|
||||||
val tableMetaClient = HoodieTableMetaClient.withPropertyBuilder()
|
val tableMetaClient = HoodieTableMetaClient.withPropertyBuilder()
|
||||||
.setTableType(tableType)
|
.setTableType(tableType)
|
||||||
.setTableName(tblName)
|
.setTableName(tblName)
|
||||||
|
.setRecordKeyFields(recordKeyFields)
|
||||||
.setBaseFileFormat(baseFileFormat)
|
.setBaseFileFormat(baseFileFormat)
|
||||||
.setArchiveLogFolder(archiveLogFolder)
|
.setArchiveLogFolder(archiveLogFolder)
|
||||||
.setPayloadClassName(hoodieConfig.getString(PAYLOAD_CLASS_OPT_KEY))
|
.setPayloadClassName(hoodieConfig.getString(PAYLOAD_CLASS_OPT_KEY))
|
||||||
.setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null))
|
.setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null))
|
||||||
.setPartitionColumns(partitionColumns)
|
.setPartitionFields(partitionColumns)
|
||||||
.setPopulateMetaFields(parameters.getOrElse(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()).toBoolean)
|
.setPopulateMetaFields(parameters.getOrElse(HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.key(), HoodieTableConfig.HOODIE_POPULATE_META_FIELDS.defaultValue()).toBoolean)
|
||||||
.initTable(sparkContext.hadoopConfiguration, path.get)
|
.initTable(sparkContext.hadoopConfiguration, path.get)
|
||||||
tableConfig = tableMetaClient.getTableConfig
|
tableConfig = tableMetaClient.getTableConfig
|
||||||
@@ -302,15 +304,18 @@ object HoodieSparkSqlWriter {
|
|||||||
if (!tableExists) {
|
if (!tableExists) {
|
||||||
val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP)
|
val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.HOODIE_ARCHIVELOG_FOLDER_PROP)
|
||||||
val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters)
|
val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters)
|
||||||
|
val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY)
|
||||||
|
|
||||||
HoodieTableMetaClient.withPropertyBuilder()
|
HoodieTableMetaClient.withPropertyBuilder()
|
||||||
.setTableType(HoodieTableType.valueOf(tableType))
|
.setTableType(HoodieTableType.valueOf(tableType))
|
||||||
.setTableName(tableName)
|
.setTableName(tableName)
|
||||||
|
.setRecordKeyFields(recordKeyFields)
|
||||||
.setArchiveLogFolder(archiveLogFolder)
|
.setArchiveLogFolder(archiveLogFolder)
|
||||||
.setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS_OPT_KEY))
|
.setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS_OPT_KEY))
|
||||||
.setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null))
|
.setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD_OPT_KEY, null))
|
||||||
.setBootstrapIndexClass(bootstrapIndexClass)
|
.setBootstrapIndexClass(bootstrapIndexClass)
|
||||||
.setBootstrapBasePath(bootstrapBasePath)
|
.setBootstrapBasePath(bootstrapBasePath)
|
||||||
.setPartitionColumns(partitionColumns)
|
.setPartitionFields(partitionColumns)
|
||||||
.initTable(sparkContext.hadoopConfiguration, path)
|
.initTable(sparkContext.hadoopConfiguration, path)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -471,6 +476,9 @@ object HoodieSparkSqlWriter {
|
|||||||
hiveSyncConfig.syncAsSparkDataSourceTable = hoodieConfig.getStringOrDefault(HIVE_SYNC_AS_DATA_SOURCE_TABLE).toBoolean
|
hiveSyncConfig.syncAsSparkDataSourceTable = hoodieConfig.getStringOrDefault(HIVE_SYNC_AS_DATA_SOURCE_TABLE).toBoolean
|
||||||
hiveSyncConfig.sparkSchemaLengthThreshold = sqlConf.getConf(StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD)
|
hiveSyncConfig.sparkSchemaLengthThreshold = sqlConf.getConf(StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD)
|
||||||
hiveSyncConfig.createManagedTable = hoodieConfig.getBoolean(HIVE_CREATE_MANAGED_TABLE)
|
hiveSyncConfig.createManagedTable = hoodieConfig.getBoolean(HIVE_CREATE_MANAGED_TABLE)
|
||||||
|
|
||||||
|
hiveSyncConfig.serdeProperties = hoodieConfig.getString(HIVE_TABLE_SERDE_PROPERTIES)
|
||||||
|
hiveSyncConfig.tableProperties = hoodieConfig.getString(HIVE_TABLE_PROPERTIES)
|
||||||
hiveSyncConfig
|
hiveSyncConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hudi.command
|
|||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.Path
|
import org.apache.hadoop.fs.Path
|
||||||
import org.apache.hudi.DataSourceWriteOptions
|
import org.apache.hudi.DataSourceWriteOptions
|
||||||
|
import org.apache.hudi.hive.util.ConfigUtils
|
||||||
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
||||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
||||||
@@ -27,6 +28,8 @@ import org.apache.spark.sql.execution.SparkPlan
|
|||||||
import org.apache.spark.sql.execution.command.DataWritingCommand
|
import org.apache.spark.sql.execution.command.DataWritingCommand
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils.getTableLocation
|
import org.apache.spark.sql.hudi.HoodieSqlUtils.getTableLocation
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Command for create table as query statement.
|
* Command for create table as query statement.
|
||||||
*/
|
*/
|
||||||
@@ -71,8 +74,14 @@ case class CreateHoodieTableAsSelectCommand(
|
|||||||
// Execute the insert query
|
// Execute the insert query
|
||||||
try {
|
try {
|
||||||
// Set if sync as a managed table.
|
// Set if sync as a managed table.
|
||||||
sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key(),
|
sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key,
|
||||||
(table.tableType == CatalogTableType.MANAGED).toString)
|
(table.tableType == CatalogTableType.MANAGED).toString)
|
||||||
|
// Sync the options to hive serde properties
|
||||||
|
sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key,
|
||||||
|
ConfigUtils.configToString(table.storage.properties.asJava))
|
||||||
|
// Sync the table properties to hive
|
||||||
|
sparkSession.sessionState.conf.setConfString(DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key,
|
||||||
|
ConfigUtils.configToString(table.properties.asJava))
|
||||||
val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
|
val success = InsertIntoHoodieTableCommand.run(sparkSession, tableWithSchema, reOrderedQuery, Map.empty,
|
||||||
mode == SaveMode.Overwrite, refreshTable = false)
|
mode == SaveMode.Overwrite, refreshTable = false)
|
||||||
if (success) {
|
if (success) {
|
||||||
|
|||||||
@@ -103,9 +103,9 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
val tableSchema = avroSchema.map(SchemaConverters.toSqlType(_).dataType
|
val tableSchema = avroSchema.map(SchemaConverters.toSqlType(_).dataType
|
||||||
.asInstanceOf[StructType])
|
.asInstanceOf[StructType])
|
||||||
|
|
||||||
// Get options from the external table
|
// Get options from the external table and append with the options in ddl.
|
||||||
val options = HoodieOptionConfig.mappingTableConfigToSqlOption(
|
val options = HoodieOptionConfig.mappingTableConfigToSqlOption(
|
||||||
metaClient.getTableConfig.getProps.asScala.toMap)
|
metaClient.getTableConfig.getProps.asScala.toMap) ++ table.storage.properties
|
||||||
|
|
||||||
val userSpecifiedSchema = table.schema
|
val userSpecifiedSchema = table.schema
|
||||||
if (userSpecifiedSchema.isEmpty && tableSchema.isDefined) {
|
if (userSpecifiedSchema.isEmpty && tableSchema.isDefined) {
|
||||||
@@ -329,7 +329,7 @@ object CreateHoodieTableCommand extends Logging {
|
|||||||
.fromProperties(properties)
|
.fromProperties(properties)
|
||||||
.setTableName(tableName)
|
.setTableName(tableName)
|
||||||
.setTableCreateSchema(SchemaConverters.toAvroType(table.schema).toString())
|
.setTableCreateSchema(SchemaConverters.toAvroType(table.schema).toString())
|
||||||
.setPartitionColumns(table.partitionColumnNames.mkString(","))
|
.setPartitionFields(table.partitionColumnNames.mkString(","))
|
||||||
.initTable(conf, location)
|
.initTable(conf, location)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,8 @@
|
|||||||
|
|
||||||
package org.apache.spark.sql.hudi
|
package org.apache.spark.sql.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient
|
||||||
|
|
||||||
class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
||||||
|
|
||||||
test("Test MergeInto for MOR table 2") {
|
test("Test MergeInto for MOR table 2") {
|
||||||
@@ -135,4 +137,39 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
)("assertion failed: Target table's field(price) cannot be the right-value of the update clause for MOR table.")
|
)("assertion failed: Target table's field(price) cannot be the right-value of the update clause for MOR table.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("Test Merge Into CTAS Table") {
|
||||||
|
withTempDir { tmp =>
|
||||||
|
val tableName = generateTableName
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName using hudi
|
||||||
|
|options(primaryKey = 'id')
|
||||||
|
|location '${tmp.getCanonicalPath}'
|
||||||
|
|as
|
||||||
|
|select 1 as id, 'a1' as name
|
||||||
|
|""".stripMargin
|
||||||
|
)
|
||||||
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(tmp.getCanonicalPath)
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
// check record key in hoodie.properties
|
||||||
|
assertResult("id")(metaClient.getTableConfig.getRecordKeyFields.get().mkString(","))
|
||||||
|
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|merge into $tableName h0
|
||||||
|
|using (
|
||||||
|
| select 1 as s_id, 'a1_1' as name
|
||||||
|
|) s0
|
||||||
|
|on h0.id = s0.s_id
|
||||||
|
|when matched then update set *
|
||||||
|
|""".stripMargin
|
||||||
|
)
|
||||||
|
checkAnswer(s"select id, name from $tableName")(
|
||||||
|
Seq(1, "a1_1")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -255,7 +255,7 @@ public class DeltaSync implements Serializable {
|
|||||||
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
||||||
.setPayloadClassName(cfg.payloadClassName)
|
.setPayloadClassName(cfg.payloadClassName)
|
||||||
.setBaseFileFormat(cfg.baseFileFormat)
|
.setBaseFileFormat(cfg.baseFileFormat)
|
||||||
.setPartitionColumns(partitionColumns)
|
.setPartitionFields(partitionColumns)
|
||||||
.setPreCombineField(cfg.sourceOrderingField)
|
.setPreCombineField(cfg.sourceOrderingField)
|
||||||
.initTable(new Configuration(jssc.hadoopConfiguration()),
|
.initTable(new Configuration(jssc.hadoopConfiguration()),
|
||||||
cfg.targetBasePath);
|
cfg.targetBasePath);
|
||||||
@@ -354,7 +354,7 @@ public class DeltaSync implements Serializable {
|
|||||||
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
.setArchiveLogFolder(HOODIE_ARCHIVELOG_FOLDER_PROP.defaultValue())
|
||||||
.setPayloadClassName(cfg.payloadClassName)
|
.setPayloadClassName(cfg.payloadClassName)
|
||||||
.setBaseFileFormat(cfg.baseFileFormat)
|
.setBaseFileFormat(cfg.baseFileFormat)
|
||||||
.setPartitionColumns(partitionColumns)
|
.setPartitionFields(partitionColumns)
|
||||||
.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
|
.initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user