[HUDI-2706] refactor spark-sql to make consistent with DataFrame api (#3936)
This commit is contained in:
@@ -113,7 +113,7 @@ public class DefaultHoodieRecordPayload extends OverwriteWithLatestAvroPayload {
|
|||||||
Object persistedOrderingVal = getNestedFieldVal((GenericRecord) currentValue,
|
Object persistedOrderingVal = getNestedFieldVal((GenericRecord) currentValue,
|
||||||
properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), true);
|
properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), true);
|
||||||
Comparable incomingOrderingVal = (Comparable) getNestedFieldVal((GenericRecord) incomingRecord,
|
Comparable incomingOrderingVal = (Comparable) getNestedFieldVal((GenericRecord) incomingRecord,
|
||||||
properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), false);
|
properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), true);
|
||||||
return persistedOrderingVal == null || ((Comparable) persistedOrderingVal).compareTo(incomingOrderingVal) <= 0;
|
return persistedOrderingVal == null || ((Comparable) persistedOrderingVal).compareTo(incomingOrderingVal) <= 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,11 +19,13 @@ package org.apache.hudi
|
|||||||
|
|
||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.avro.generic.GenericRecord
|
import org.apache.avro.generic.GenericRecord
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||||
import org.apache.hadoop.hive.conf.HiveConf
|
import org.apache.hadoop.hive.conf.HiveConf
|
||||||
|
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
import org.apache.hudi.DataSourceOptionsHelper.{allAlternatives, translateConfigurations}
|
import org.apache.hudi.HoodieWriterUtils._
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils
|
import org.apache.hudi.avro.HoodieAvroUtils
|
||||||
import org.apache.hudi.client.{HoodieWriteResult, SparkRDDWriteClient}
|
import org.apache.hudi.client.{HoodieWriteResult, SparkRDDWriteClient}
|
||||||
import org.apache.hudi.common.config.{HoodieConfig, HoodieMetadataConfig, TypedProperties}
|
import org.apache.hudi.common.config.{HoodieConfig, HoodieMetadataConfig, TypedProperties}
|
||||||
@@ -42,19 +44,21 @@ import org.apache.hudi.internal.DataSourceInternalWriterHelper
|
|||||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||||
import org.apache.hudi.sync.common.AbstractSyncTool
|
import org.apache.hudi.sync.common.AbstractSyncTool
|
||||||
import org.apache.hudi.table.BulkInsertPartitioner
|
import org.apache.hudi.table.BulkInsertPartitioner
|
||||||
|
|
||||||
import org.apache.log4j.LogManager
|
import org.apache.log4j.LogManager
|
||||||
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext
|
import org.apache.spark.api.java.JavaSparkContext
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
|
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
|
||||||
import org.apache.spark.sql.types.StructType
|
import org.apache.spark.sql.types.StructType
|
||||||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
|
import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
|
||||||
import org.apache.spark.{SPARK_VERSION, SparkContext}
|
import org.apache.spark.{SPARK_VERSION, SparkContext}
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.Properties
|
import java.util.Properties
|
||||||
|
|
||||||
import scala.collection.JavaConversions._
|
import scala.collection.JavaConversions._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
import scala.collection.mutable.StringBuilder
|
|
||||||
import scala.collection.mutable.ListBuffer
|
import scala.collection.mutable.ListBuffer
|
||||||
|
|
||||||
object HoodieSparkSqlWriter {
|
object HoodieSparkSqlWriter {
|
||||||
@@ -141,7 +145,7 @@ object HoodieSparkSqlWriter {
|
|||||||
.setPartitionFields(partitionColumns)
|
.setPartitionFields(partitionColumns)
|
||||||
.setPopulateMetaFields(populateMetaFields)
|
.setPopulateMetaFields(populateMetaFields)
|
||||||
.setRecordKeyFields(hoodieConfig.getString(RECORDKEY_FIELD))
|
.setRecordKeyFields(hoodieConfig.getString(RECORDKEY_FIELD))
|
||||||
.setKeyGeneratorClassProp(hoodieConfig.getString(KEYGENERATOR_CLASS_NAME))
|
.setKeyGeneratorClassProp(HoodieWriterUtils.getOriginKeyGenerator(parameters))
|
||||||
.setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
|
.setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
|
||||||
.setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
|
.setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
|
||||||
.initTable(sparkContext.hadoopConfiguration, path)
|
.initTable(sparkContext.hadoopConfiguration, path)
|
||||||
@@ -713,22 +717,6 @@ object HoodieSparkSqlWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private def validateTableConfig(spark: SparkSession, params: Map[String, String],
|
|
||||||
tableConfig: HoodieTableConfig): Unit = {
|
|
||||||
val resolver = spark.sessionState.conf.resolver
|
|
||||||
val diffConfigs = StringBuilder.newBuilder
|
|
||||||
params.foreach { case (key, value) =>
|
|
||||||
val existingValue = getStringFromTableConfigWithAlternatives(tableConfig, key)
|
|
||||||
if (null != existingValue && !resolver(existingValue, value)) {
|
|
||||||
diffConfigs.append(s"$key:\t$value\t${tableConfig.getString(key)}\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (diffConfigs.nonEmpty) {
|
|
||||||
diffConfigs.insert(0, "\nConfig conflict(key\tcurrent value\texisting value):\n")
|
|
||||||
throw new HoodieException(diffConfigs.toString.trim)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private def mergeParamsAndGetHoodieConfig(optParams: Map[String, String],
|
private def mergeParamsAndGetHoodieConfig(optParams: Map[String, String],
|
||||||
tableConfig: HoodieTableConfig): (Map[String, String], HoodieConfig) = {
|
tableConfig: HoodieTableConfig): (Map[String, String], HoodieConfig) = {
|
||||||
val mergedParams = mutable.Map.empty ++
|
val mergedParams = mutable.Map.empty ++
|
||||||
@@ -745,16 +733,4 @@ object HoodieSparkSqlWriter {
|
|||||||
val params = mergedParams.toMap
|
val params = mergedParams.toMap
|
||||||
(params, HoodieWriterUtils.convertMapToHoodieConfig(params))
|
(params, HoodieWriterUtils.convertMapToHoodieConfig(params))
|
||||||
}
|
}
|
||||||
|
|
||||||
private def getStringFromTableConfigWithAlternatives(tableConfig: HoodieTableConfig, key: String): String = {
|
|
||||||
if (null == tableConfig) {
|
|
||||||
null
|
|
||||||
} else {
|
|
||||||
if (allAlternatives.contains(key)) {
|
|
||||||
tableConfig.getString(allAlternatives(key))
|
|
||||||
} else {
|
|
||||||
tableConfig.getString(key)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,15 +17,19 @@
|
|||||||
|
|
||||||
package org.apache.hudi
|
package org.apache.hudi
|
||||||
|
|
||||||
|
import java.util.Properties
|
||||||
|
|
||||||
|
import org.apache.hudi.DataSourceOptionsHelper.allAlternatives
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
import org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE
|
import org.apache.hudi.common.config.HoodieMetadataConfig.ENABLE
|
||||||
import org.apache.hudi.common.config.{HoodieConfig, TypedProperties}
|
import org.apache.hudi.common.config.{HoodieConfig, TypedProperties}
|
||||||
|
import org.apache.hudi.common.table.HoodieTableConfig
|
||||||
|
import org.apache.hudi.exception.HoodieException
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.apache.spark.sql.hudi.command.SqlKeyGenerator
|
||||||
|
|
||||||
import java.util.Properties
|
|
||||||
import scala.collection.JavaConversions.mapAsJavaMap
|
import scala.collection.JavaConversions.mapAsJavaMap
|
||||||
import scala.collection.JavaConverters.{mapAsScalaMapConverter, _}
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.JavaConverters.mapAsScalaMapConverter
|
|
||||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* WriterUtils to assist in write path in Datasource and tests.
|
* WriterUtils to assist in write path in Datasource and tests.
|
||||||
@@ -102,4 +106,68 @@ object HoodieWriterUtils {
|
|||||||
properties.putAll(mapAsJavaMap(parameters))
|
properties.putAll(mapAsJavaMap(parameters))
|
||||||
new HoodieConfig(properties)
|
new HoodieConfig(properties)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def getOriginKeyGenerator(parameters: Map[String, String]): String = {
|
||||||
|
val kg = parameters.getOrElse(KEYGENERATOR_CLASS_NAME.key(), null)
|
||||||
|
if (classOf[SqlKeyGenerator].getCanonicalName == kg) {
|
||||||
|
parameters.getOrElse(SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME, null)
|
||||||
|
} else {
|
||||||
|
kg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detects conflicts between new parameters and existing table configurations
|
||||||
|
*/
|
||||||
|
def validateTableConfig(spark: SparkSession, params: Map[String, String],
|
||||||
|
tableConfig: HoodieConfig): Unit = {
|
||||||
|
val resolver = spark.sessionState.conf.resolver
|
||||||
|
val diffConfigs = StringBuilder.newBuilder
|
||||||
|
params.foreach { case (key, value) =>
|
||||||
|
val existingValue = getStringFromTableConfigWithAlternatives(tableConfig, key)
|
||||||
|
if (null != existingValue && !resolver(existingValue, value)) {
|
||||||
|
diffConfigs.append(s"$key:\t$value\t${tableConfig.getString(key)}\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null != tableConfig) {
|
||||||
|
val datasourceRecordKey = params.getOrElse(RECORDKEY_FIELD.key(), null)
|
||||||
|
val tableConfigRecordKey = tableConfig.getString(HoodieTableConfig.RECORDKEY_FIELDS)
|
||||||
|
if (null != datasourceRecordKey && null != tableConfigRecordKey
|
||||||
|
&& datasourceRecordKey != tableConfigRecordKey) {
|
||||||
|
diffConfigs.append(s"RecordKey:\t$datasourceRecordKey\t$tableConfigRecordKey\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
val datasourcePreCombineKey = params.getOrElse(PRECOMBINE_FIELD.key(), null)
|
||||||
|
val tableConfigPreCombineKey = tableConfig.getString(HoodieTableConfig.PRECOMBINE_FIELD)
|
||||||
|
if (null != datasourcePreCombineKey && null != tableConfigPreCombineKey
|
||||||
|
&& datasourcePreCombineKey != tableConfigPreCombineKey) {
|
||||||
|
diffConfigs.append(s"PreCombineKey:\t$datasourcePreCombineKey\t$tableConfigPreCombineKey\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
val datasourceKeyGen = getOriginKeyGenerator(params)
|
||||||
|
val tableConfigKeyGen = tableConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)
|
||||||
|
if (null != datasourceKeyGen && null != tableConfigKeyGen
|
||||||
|
&& datasourceKeyGen != tableConfigKeyGen) {
|
||||||
|
diffConfigs.append(s"KeyGenerator:\t$datasourceKeyGen\t$tableConfigKeyGen\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (diffConfigs.nonEmpty) {
|
||||||
|
diffConfigs.insert(0, "\nConfig conflict(key\tcurrent value\texisting value):\n")
|
||||||
|
throw new HoodieException(diffConfigs.toString.trim)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private def getStringFromTableConfigWithAlternatives(tableConfig: HoodieConfig, key: String): String = {
|
||||||
|
if (null == tableConfig) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
if (allAlternatives.contains(key)) {
|
||||||
|
tableConfig.getString(allAlternatives(key))
|
||||||
|
} else {
|
||||||
|
tableConfig.getString(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,10 @@ package org.apache.spark.sql.hudi
|
|||||||
import org.apache.hudi.DataSourceWriteOptions
|
import org.apache.hudi.DataSourceWriteOptions
|
||||||
import org.apache.hudi.common.model.DefaultHoodieRecordPayload
|
import org.apache.hudi.common.model.DefaultHoodieRecordPayload
|
||||||
import org.apache.hudi.common.table.HoodieTableConfig
|
import org.apache.hudi.common.table.HoodieTableConfig
|
||||||
|
import org.apache.hudi.common.util.ValidationUtils
|
||||||
|
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -43,6 +47,7 @@ object HoodieOptionConfig {
|
|||||||
.withSqlKey("primaryKey")
|
.withSqlKey("primaryKey")
|
||||||
.withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD.key)
|
.withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD.key)
|
||||||
.withTableConfigKey(HoodieTableConfig.RECORDKEY_FIELDS.key)
|
.withTableConfigKey(HoodieTableConfig.RECORDKEY_FIELDS.key)
|
||||||
|
.defaultValue(DataSourceWriteOptions.RECORDKEY_FIELD.defaultValue())
|
||||||
.build()
|
.build()
|
||||||
|
|
||||||
val SQL_KEY_TABLE_TYPE: HoodieOption[String] = buildConf()
|
val SQL_KEY_TABLE_TYPE: HoodieOption[String] = buildConf()
|
||||||
@@ -102,6 +107,8 @@ object HoodieOptionConfig {
|
|||||||
|
|
||||||
private lazy val reverseValueMapping = valueMapping.map(f => f._2 -> f._1)
|
private lazy val reverseValueMapping = valueMapping.map(f => f._2 -> f._1)
|
||||||
|
|
||||||
|
def withDefaultSqlOptions(options: Map[String, String]): Map[String, String] = defaultSqlOptions ++ options
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Mapping the sql's short name key/value in the options to the hoodie's config key/value.
|
* Mapping the sql's short name key/value in the options to the hoodie's config key/value.
|
||||||
* @param options
|
* @param options
|
||||||
@@ -119,14 +126,13 @@ object HoodieOptionConfig {
|
|||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
def mappingSqlOptionToTableConfig(options: Map[String, String]): Map[String, String] = {
|
def mappingSqlOptionToTableConfig(options: Map[String, String]): Map[String, String] = {
|
||||||
defaultTableConfig ++
|
options.map { case (k, v) =>
|
||||||
options.map { case (k, v) =>
|
if (keyTableConfigMapping.contains(k)) {
|
||||||
if (keyTableConfigMapping.contains(k)) {
|
keyTableConfigMapping(k) -> valueMapping.getOrElse(v, v)
|
||||||
keyTableConfigMapping(k) -> valueMapping.getOrElse(v, v)
|
} else {
|
||||||
} else {
|
k -> v
|
||||||
k -> v
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -136,16 +142,19 @@ object HoodieOptionConfig {
|
|||||||
options.map(kv => tableConfigKeyToSqlKey.getOrElse(kv._1, kv._1) -> reverseValueMapping.getOrElse(kv._2, kv._2))
|
options.map(kv => tableConfigKeyToSqlKey.getOrElse(kv._1, kv._1) -> reverseValueMapping.getOrElse(kv._2, kv._2))
|
||||||
}
|
}
|
||||||
|
|
||||||
private lazy val defaultTableConfig: Map[String, String] = {
|
private lazy val defaultSqlOptions: Map[String, String] = {
|
||||||
HoodieOptionConfig.getClass.getDeclaredFields
|
HoodieOptionConfig.getClass.getDeclaredFields
|
||||||
.filter(f => f.getType == classOf[HoodieOption[_]])
|
.filter(f => f.getType == classOf[HoodieOption[_]])
|
||||||
.map(f => {f.setAccessible(true); f.get(HoodieOptionConfig).asInstanceOf[HoodieOption[_]]})
|
.map(f => {f.setAccessible(true); f.get(HoodieOptionConfig).asInstanceOf[HoodieOption[_]]})
|
||||||
.filter(option => option.tableConfigKey.isDefined && option.defaultValue.isDefined)
|
.filter(option => option.tableConfigKey.isDefined && option.defaultValue.isDefined)
|
||||||
.map(option => option.tableConfigKey.get ->
|
.map(option => option.sqlKeyName -> option.defaultValue.get.toString)
|
||||||
valueMapping.getOrElse(option.defaultValue.get.toString, option.defaultValue.get.toString))
|
|
||||||
.toMap
|
.toMap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private lazy val defaultTableConfig: Map[String, String] = {
|
||||||
|
mappingSqlOptionToHoodieParam(defaultSqlOptions)
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the primary key from the table options.
|
* Get the primary key from the table options.
|
||||||
* @param options
|
* @param options
|
||||||
@@ -154,7 +163,7 @@ object HoodieOptionConfig {
|
|||||||
def getPrimaryColumns(options: Map[String, String]): Array[String] = {
|
def getPrimaryColumns(options: Map[String, String]): Array[String] = {
|
||||||
val params = mappingSqlOptionToHoodieParam(options)
|
val params = mappingSqlOptionToHoodieParam(options)
|
||||||
params.get(DataSourceWriteOptions.RECORDKEY_FIELD.key)
|
params.get(DataSourceWriteOptions.RECORDKEY_FIELD.key)
|
||||||
.map(_.split(",").filter(_.length > 0))
|
.map(_.split(",").filter(_.nonEmpty))
|
||||||
.getOrElse(Array.empty)
|
.getOrElse(Array.empty)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -171,7 +180,47 @@ object HoodieOptionConfig {
|
|||||||
|
|
||||||
def getPreCombineField(options: Map[String, String]): Option[String] = {
|
def getPreCombineField(options: Map[String, String]): Option[String] = {
|
||||||
val params = mappingSqlOptionToHoodieParam(options)
|
val params = mappingSqlOptionToHoodieParam(options)
|
||||||
params.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)
|
params.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key).filter(_.nonEmpty)
|
||||||
|
}
|
||||||
|
|
||||||
|
def deleteHooideOptions(options: Map[String, String]): Map[String, String] = {
|
||||||
|
options.filterNot(_._1.startsWith("hoodie.")).filterNot(kv => keyMapping.contains(kv._1))
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract primaryKey, preCombineField, type options
|
||||||
|
def extractSqlOptions(options: Map[String, String]): Map[String, String] = {
|
||||||
|
val targetOptions = keyMapping.keySet -- Set(SQL_PAYLOAD_CLASS.sqlKeyName)
|
||||||
|
options.filterKeys(targetOptions.contains)
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate primaryKey, preCombineField and type options
|
||||||
|
def validateTable(spark: SparkSession, schema: StructType, options: Map[String, String]): Unit = {
|
||||||
|
val resolver = spark.sessionState.conf.resolver
|
||||||
|
|
||||||
|
// validate primary key
|
||||||
|
val primaryKeys = options.get(SQL_KEY_TABLE_PRIMARY_KEY.sqlKeyName)
|
||||||
|
.map(_.split(",").filter(_.length > 0))
|
||||||
|
ValidationUtils.checkArgument(primaryKeys.nonEmpty, "No `primaryKey` is specified.")
|
||||||
|
primaryKeys.get.foreach { primaryKey =>
|
||||||
|
ValidationUtils.checkArgument(schema.exists(f => resolver(f.name, primaryKey)),
|
||||||
|
s"Can't find primary key `$primaryKey` in ${schema.treeString}.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate precombine key
|
||||||
|
val precombineKey = options.get(SQL_KEY_PRECOMBINE_FIELD.sqlKeyName)
|
||||||
|
if (precombineKey.isDefined && precombineKey.get.nonEmpty) {
|
||||||
|
ValidationUtils.checkArgument(schema.exists(f => resolver(f.name, precombineKey.get)),
|
||||||
|
s"Can't find precombine key `${precombineKey.get}` in ${schema.treeString}.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// validate table type
|
||||||
|
val tableType = options.get(SQL_KEY_TABLE_TYPE.sqlKeyName)
|
||||||
|
ValidationUtils.checkArgument(tableType.nonEmpty, "No `type` is specified.")
|
||||||
|
ValidationUtils.checkArgument(
|
||||||
|
tableType.get.equalsIgnoreCase(HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_COW) ||
|
||||||
|
tableType.get.equalsIgnoreCase(HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_MOR),
|
||||||
|
s"'type' must be '${HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_COW}' or " +
|
||||||
|
s"'${HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_MOR}'")
|
||||||
}
|
}
|
||||||
|
|
||||||
def buildConf[T](): HoodieOptions[T] = {
|
def buildConf[T](): HoodieOptions[T] = {
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ object HoodieSqlUtils extends SparkAdapterSupport {
|
|||||||
val sparkEngine = new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext))
|
val sparkEngine = new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext))
|
||||||
val metadataConfig = {
|
val metadataConfig = {
|
||||||
val properties = new Properties()
|
val properties = new Properties()
|
||||||
properties.putAll((spark.sessionState.conf.getAllConfs ++ table.storage.properties).asJava)
|
properties.putAll((spark.sessionState.conf.getAllConfs ++ table.storage.properties ++ table.properties).asJava)
|
||||||
HoodieMetadataConfig.newBuilder.fromProperties(properties).build()
|
HoodieMetadataConfig.newBuilder.fromProperties(properties).build()
|
||||||
}
|
}
|
||||||
FSUtils.getAllPartitionPaths(sparkEngine, metadataConfig, getTableLocation(table, spark)).asScala
|
FSUtils.getAllPartitionPaths(sparkEngine, metadataConfig, getTableLocation(table, spark)).asScala
|
||||||
|
|||||||
@@ -202,8 +202,9 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
|
|||||||
val targetTableId = getMergeIntoTargetTableId(mergeInto)
|
val targetTableId = getMergeIntoTargetTableId(mergeInto)
|
||||||
val targetTable =
|
val targetTable =
|
||||||
sparkSession.sessionState.catalog.getTableMetadata(targetTableId)
|
sparkSession.sessionState.catalog.getTableMetadata(targetTableId)
|
||||||
val targetTableType = HoodieOptionConfig.getTableType(targetTable.storage.properties)
|
val tblProperties = targetTable.storage.properties ++ targetTable.properties
|
||||||
val preCombineField = HoodieOptionConfig.getPreCombineField(targetTable.storage.properties)
|
val targetTableType = HoodieOptionConfig.getTableType(tblProperties)
|
||||||
|
val preCombineField = HoodieOptionConfig.getPreCombineField(tblProperties)
|
||||||
|
|
||||||
// Get the map of target attribute to value of the update assignments.
|
// Get the map of target attribute to value of the update assignments.
|
||||||
val target2Values = resolvedAssignments.map {
|
val target2Values = resolvedAssignments.map {
|
||||||
|
|||||||
@@ -105,8 +105,13 @@ object AlterHoodieTableAddColumnsCommand {
|
|||||||
val path = getTableLocation(table, sparkSession)
|
val path = getTableLocation(table, sparkSession)
|
||||||
|
|
||||||
val jsc = new JavaSparkContext(sparkSession.sparkContext)
|
val jsc = new JavaSparkContext(sparkSession.sparkContext)
|
||||||
val client = DataSourceUtils.createHoodieClient(jsc, schema.toString,
|
val client = DataSourceUtils.createHoodieClient(
|
||||||
path, table.identifier.table, HoodieWriterUtils.parametersWithWriteDefaults(table.storage.properties).asJava)
|
jsc,
|
||||||
|
schema.toString,
|
||||||
|
path,
|
||||||
|
table.identifier.table,
|
||||||
|
HoodieWriterUtils.parametersWithWriteDefaults(table.storage.properties ++ table.properties).asJava
|
||||||
|
)
|
||||||
|
|
||||||
val hadoopConf = sparkSession.sessionState.newHadoopConf()
|
val hadoopConf = sparkSession.sessionState.newHadoopConf()
|
||||||
val metaClient = HoodieTableMetaClient.builder().setBasePath(path).setConf(hadoopConf).build()
|
val metaClient = HoodieTableMetaClient.builder().setBasePath(path).setConf(hadoopConf).build()
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ extends RunnableCommand {
|
|||||||
.build()
|
.build()
|
||||||
val tableConfig = metaClient.getTableConfig
|
val tableConfig = metaClient.getTableConfig
|
||||||
|
|
||||||
val optParams = withSparkConf(sparkSession, table.storage.properties) {
|
withSparkConf(sparkSession, table.storage.properties) {
|
||||||
Map(
|
Map(
|
||||||
"path" -> path,
|
"path" -> path,
|
||||||
TBL_NAME.key -> tableIdentifier.table,
|
TBL_NAME.key -> tableIdentifier.table,
|
||||||
@@ -104,10 +104,6 @@ extends RunnableCommand {
|
|||||||
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp
|
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
val parameters = HoodieWriterUtils.parametersWithWriteDefaults(optParams)
|
|
||||||
val translatedOptions = DataSourceWriteOptions.translateSqlOptions(parameters)
|
|
||||||
translatedOptions
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def normalizePartitionSpec[T](
|
def normalizePartitionSpec[T](
|
||||||
|
|||||||
@@ -19,9 +19,11 @@ package org.apache.spark.sql.hudi.command
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.Path
|
import org.apache.hadoop.fs.Path
|
||||||
|
|
||||||
import org.apache.hudi.DataSourceWriteOptions
|
import org.apache.hudi.DataSourceWriteOptions
|
||||||
import org.apache.hudi.hive.util.ConfigUtils
|
import org.apache.hudi.hive.util.ConfigUtils
|
||||||
import org.apache.hudi.sql.InsertMode
|
import org.apache.hudi.sql.InsertMode
|
||||||
|
|
||||||
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
|
||||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
|
||||||
@@ -73,9 +75,10 @@ case class CreateHoodieTableAsSelectCommand(
|
|||||||
|
|
||||||
// Execute the insert query
|
// Execute the insert query
|
||||||
try {
|
try {
|
||||||
|
val tblProperties = table.storage.properties ++ table.properties
|
||||||
val options = Map(
|
val options = Map(
|
||||||
DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
|
DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> (table.tableType == CatalogTableType.MANAGED).toString,
|
||||||
DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(table.storage.properties.asJava),
|
DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
|
||||||
DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(table.properties.asJava),
|
DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(table.properties.asJava),
|
||||||
DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
|
DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
|
||||||
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
|
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
|
||||||
@@ -88,7 +91,9 @@ case class CreateHoodieTableAsSelectCommand(
|
|||||||
if (!sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) {
|
if (!sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) {
|
||||||
// Create the table
|
// Create the table
|
||||||
val createTableCommand = CreateHoodieTableCommand(tableWithSchema, mode == SaveMode.Ignore)
|
val createTableCommand = CreateHoodieTableCommand(tableWithSchema, mode == SaveMode.Ignore)
|
||||||
createTableCommand.createTableInCatalog(sparkSession, checkPathForManagedTable = false)
|
val path = getTableLocation(table, sparkSession)
|
||||||
|
val (finalSchema, _, tableSqlOptions) = createTableCommand.parseSchemaAndConfigs(sparkSession, path, ctas = true)
|
||||||
|
createTableCommand.createTableInCatalog(sparkSession, finalSchema, tableSqlOptions)
|
||||||
}
|
}
|
||||||
} else { // failed to insert data, clear table path
|
} else { // failed to insert data, clear table path
|
||||||
clearTablePath(tablePath, hadoopConf)
|
clearTablePath(tablePath, hadoopConf)
|
||||||
|
|||||||
@@ -19,13 +19,17 @@ package org.apache.spark.sql.hudi.command
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.fs.Path
|
import org.apache.hadoop.fs.Path
|
||||||
|
|
||||||
|
import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
|
||||||
|
import org.apache.hudi.HoodieWriterUtils._
|
||||||
import org.apache.hudi.common.model.HoodieFileFormat
|
import org.apache.hudi.common.model.HoodieFileFormat
|
||||||
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
|
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
|
||||||
import org.apache.hudi.common.util.ValidationUtils
|
|
||||||
import org.apache.hudi.hadoop.HoodieParquetInputFormat
|
import org.apache.hudi.hadoop.HoodieParquetInputFormat
|
||||||
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat
|
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat
|
||||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils
|
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils
|
||||||
import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
|
import org.apache.hudi.keygen.ComplexKeyGenerator
|
||||||
|
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||||
|
|
||||||
import org.apache.spark.internal.Logging
|
import org.apache.spark.internal.Logging
|
||||||
import org.apache.spark.sql.avro.SchemaConverters
|
import org.apache.spark.sql.avro.SchemaConverters
|
||||||
import org.apache.spark.sql.catalyst.TableIdentifier
|
import org.apache.spark.sql.catalyst.TableIdentifier
|
||||||
@@ -36,19 +40,17 @@ import org.apache.spark.sql.hive.HiveClientUtils
|
|||||||
import org.apache.spark.sql.hive.HiveExternalCatalog._
|
import org.apache.spark.sql.hive.HiveExternalCatalog._
|
||||||
import org.apache.spark.sql.hudi.HoodieOptionConfig
|
import org.apache.spark.sql.hudi.HoodieOptionConfig
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
||||||
import org.apache.spark.sql.hudi.command.CreateHoodieTableCommand.{initTableIfNeed, isEmptyPath}
|
import org.apache.spark.sql.hudi.command.CreateHoodieTableCommand.checkTableConfigEqual
|
||||||
import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
|
import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
|
||||||
import org.apache.spark.sql.types.StructType
|
import org.apache.spark.sql.types.{StructField, StructType}
|
||||||
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
|
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
|
||||||
import org.apache.spark.{SPARK_VERSION, SparkConf}
|
import org.apache.spark.{SPARK_VERSION, SparkConf}
|
||||||
import java.util.{Locale, Properties}
|
|
||||||
|
|
||||||
import org.apache.hudi.exception.HoodieException
|
import java.util.{Locale, Properties}
|
||||||
import org.apache.hudi.keygen.ComplexKeyGenerator
|
|
||||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
import scala.util.control.NonFatal
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Command for create hoodie table.
|
* Command for create hoodie table.
|
||||||
@@ -56,9 +58,11 @@ import scala.collection.mutable
|
|||||||
case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean)
|
case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean)
|
||||||
extends RunnableCommand with SparkAdapterSupport {
|
extends RunnableCommand with SparkAdapterSupport {
|
||||||
|
|
||||||
override def run(sparkSession: SparkSession): Seq[Row] = {
|
val tableName = formatName(table.identifier.table)
|
||||||
val tableName = table.identifier.unquotedString
|
|
||||||
|
|
||||||
|
val tblProperties = table.storage.properties ++ table.properties
|
||||||
|
|
||||||
|
override def run(sparkSession: SparkSession): Seq[Row] = {
|
||||||
val tableIsExists = sparkSession.sessionState.catalog.tableExists(table.identifier)
|
val tableIsExists = sparkSession.sessionState.catalog.tableExists(table.identifier)
|
||||||
if (tableIsExists) {
|
if (tableIsExists) {
|
||||||
if (ignoreIfExists) {
|
if (ignoreIfExists) {
|
||||||
@@ -66,64 +70,95 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
return Seq.empty[Row]
|
return Seq.empty[Row]
|
||||||
// scalastyle:on
|
// scalastyle:on
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException(s"Table $tableName already exists.")
|
throw new IllegalArgumentException(s"Table ${table.identifier.unquotedString} already exists.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Create table in the catalog
|
|
||||||
val createTable = createTableInCatalog(sparkSession)
|
// get schema with meta fields, table config if hudi table exists, options including
|
||||||
|
// table configs and properties of the catalog table
|
||||||
|
val path = getTableLocation(table, sparkSession)
|
||||||
|
val (finalSchema, existingTableConfig, tableSqlOptions) = parseSchemaAndConfigs(sparkSession, path)
|
||||||
|
|
||||||
// Init the hoodie.properties
|
// Init the hoodie.properties
|
||||||
initTableIfNeed(sparkSession, createTable)
|
initTableIfNeed(sparkSession, path, finalSchema, existingTableConfig, tableSqlOptions)
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Create table in the catalog
|
||||||
|
createTableInCatalog(sparkSession, finalSchema, tableSqlOptions)
|
||||||
|
} catch {
|
||||||
|
case NonFatal(e) =>
|
||||||
|
logWarning(s"Failed to create catalog table in metastore: ${e.getMessage}")
|
||||||
|
}
|
||||||
|
|
||||||
Seq.empty[Row]
|
Seq.empty[Row]
|
||||||
}
|
}
|
||||||
|
|
||||||
def createTableInCatalog(sparkSession: SparkSession,
|
def parseSchemaAndConfigs(sparkSession: SparkSession, path: String, ctas: Boolean = false)
|
||||||
checkPathForManagedTable: Boolean = true): CatalogTable = {
|
: (StructType, Map[String, String], Map[String, String]) = {
|
||||||
|
val resolver = sparkSession.sessionState.conf.resolver
|
||||||
|
val conf = sparkSession.sessionState.newHadoopConf
|
||||||
|
// if CTAS, we treat the table we just created as nonexistent
|
||||||
|
val isTableExists = if (ctas) false else tableExistsInPath(path, conf)
|
||||||
|
var existingTableConfig = Map.empty[String, String]
|
||||||
|
val sqlOptions = HoodieOptionConfig.withDefaultSqlOptions(tblProperties)
|
||||||
|
val catalogTableProps = HoodieOptionConfig.mappingSqlOptionToTableConfig(tblProperties)
|
||||||
|
|
||||||
|
// get final schema and parameters
|
||||||
|
val (finalSchema, tableSqlOptions) = (table.tableType, isTableExists) match {
|
||||||
|
case (CatalogTableType.EXTERNAL, true) =>
|
||||||
|
// If this is an external table & the table has already exists in the location,
|
||||||
|
// load the schema from the table meta.
|
||||||
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(path)
|
||||||
|
.setConf(conf)
|
||||||
|
.build()
|
||||||
|
val tableSchema = getTableSqlSchema(metaClient)
|
||||||
|
existingTableConfig = metaClient.getTableConfig.getProps.asScala.toMap
|
||||||
|
validateTableConfig(sparkSession, catalogTableProps, convertMapToHoodieConfig(existingTableConfig))
|
||||||
|
|
||||||
|
val options = extraTableConfig(sparkSession, isTableExists, existingTableConfig) ++
|
||||||
|
sqlOptions ++ HoodieOptionConfig.mappingTableConfigToSqlOption(existingTableConfig)
|
||||||
|
|
||||||
|
val userSpecifiedSchema = table.schema
|
||||||
|
val schema = if (userSpecifiedSchema.isEmpty && tableSchema.isDefined) {
|
||||||
|
tableSchema.get
|
||||||
|
} else if (userSpecifiedSchema.nonEmpty) {
|
||||||
|
userSpecifiedSchema
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(s"Missing schema for Create Table: $tableName")
|
||||||
|
}
|
||||||
|
|
||||||
|
(addMetaFields(schema), options)
|
||||||
|
|
||||||
|
case (_, false) =>
|
||||||
|
assert(table.schema.nonEmpty, s"Missing schema for Create Table: $tableName")
|
||||||
|
val schema = table.schema
|
||||||
|
val options = extraTableConfig(sparkSession, isTableExists = false) ++ sqlOptions
|
||||||
|
(addMetaFields(schema), options)
|
||||||
|
|
||||||
|
case (CatalogTableType.MANAGED, true) =>
|
||||||
|
throw new AnalysisException(s"Can not create the managed table('$tableName')" +
|
||||||
|
s". The associated location('$path') already exists.")
|
||||||
|
}
|
||||||
|
HoodieOptionConfig.validateTable(sparkSession, finalSchema, tableSqlOptions)
|
||||||
|
|
||||||
|
val dataSchema = finalSchema.filterNot { f =>
|
||||||
|
table.partitionColumnNames.exists(resolver(_, f.name))
|
||||||
|
}
|
||||||
|
verifyDataSchema(table.identifier, table.tableType, dataSchema)
|
||||||
|
|
||||||
|
(finalSchema, existingTableConfig, tableSqlOptions)
|
||||||
|
}
|
||||||
|
|
||||||
|
def createTableInCatalog(sparkSession: SparkSession, finalSchema: StructType,
|
||||||
|
options: Map[String, String]): Unit = {
|
||||||
assert(table.tableType != CatalogTableType.VIEW)
|
assert(table.tableType != CatalogTableType.VIEW)
|
||||||
assert(table.provider.isDefined)
|
assert(table.provider.isDefined)
|
||||||
val sessionState = sparkSession.sessionState
|
val sessionState = sparkSession.sessionState
|
||||||
val tableName = table.identifier.unquotedString
|
|
||||||
val path = getTableLocation(table, sparkSession)
|
val path = getTableLocation(table, sparkSession)
|
||||||
val conf = sparkSession.sessionState.newHadoopConf()
|
val conf = sparkSession.sessionState.newHadoopConf()
|
||||||
val isTableExists = tableExistsInPath(path, conf)
|
|
||||||
// Get the schema & table options
|
|
||||||
val (newSchema, tableOptions) = if (table.tableType == CatalogTableType.EXTERNAL &&
|
|
||||||
isTableExists) {
|
|
||||||
// If this is an external table & the table has already exists in the location,
|
|
||||||
// load the schema from the table meta.
|
|
||||||
val metaClient = HoodieTableMetaClient.builder()
|
|
||||||
.setBasePath(path)
|
|
||||||
.setConf(conf)
|
|
||||||
.build()
|
|
||||||
val tableSchema = getTableSqlSchema(metaClient)
|
|
||||||
|
|
||||||
// Get options from the external table and append with the options in ddl.
|
val tableType = HoodieOptionConfig.getTableType(options)
|
||||||
val originTableConfig = HoodieOptionConfig.mappingTableConfigToSqlOption(
|
|
||||||
metaClient.getTableConfig.getProps.asScala.toMap)
|
|
||||||
val extraConfig = extraTableConfig(sparkSession, isTableExists, originTableConfig)
|
|
||||||
val options = originTableConfig ++ table.storage.properties ++ extraConfig
|
|
||||||
|
|
||||||
val userSpecifiedSchema = table.schema
|
|
||||||
if (userSpecifiedSchema.isEmpty && tableSchema.isDefined) {
|
|
||||||
(addMetaFields(tableSchema.get), options)
|
|
||||||
} else if (userSpecifiedSchema.nonEmpty) {
|
|
||||||
(addMetaFields(userSpecifiedSchema), options)
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException(s"Missing schema for Create Table: $tableName")
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
assert(table.schema.nonEmpty, s"Missing schema for Create Table: $tableName")
|
|
||||||
// SPARK-19724: the default location of a managed table should be non-existent or empty.
|
|
||||||
if (checkPathForManagedTable && table.tableType == CatalogTableType.MANAGED
|
|
||||||
&& !isEmptyPath(path, conf)) {
|
|
||||||
throw new AnalysisException(s"Can not create the managed table('$tableName')" +
|
|
||||||
s". The associated location('$path') already exists.")
|
|
||||||
}
|
|
||||||
// Add the meta fields to the schema if this is a managed table or an empty external table.
|
|
||||||
val options = table.storage.properties ++ extraTableConfig(sparkSession, false)
|
|
||||||
(addMetaFields(table.schema), options)
|
|
||||||
}
|
|
||||||
|
|
||||||
val tableType = HoodieOptionConfig.getTableType(table.storage.properties)
|
|
||||||
val inputFormat = tableType match {
|
val inputFormat = tableType match {
|
||||||
case DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL =>
|
case DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL =>
|
||||||
classOf[HoodieParquetInputFormat].getCanonicalName
|
classOf[HoodieParquetInputFormat].getCanonicalName
|
||||||
@@ -134,31 +169,39 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
val outputFormat = HoodieInputFormatUtils.getOutputFormatClassName(HoodieFileFormat.PARQUET)
|
val outputFormat = HoodieInputFormatUtils.getOutputFormatClassName(HoodieFileFormat.PARQUET)
|
||||||
val serdeFormat = HoodieInputFormatUtils.getSerDeClassName(HoodieFileFormat.PARQUET)
|
val serdeFormat = HoodieInputFormatUtils.getSerDeClassName(HoodieFileFormat.PARQUET)
|
||||||
|
|
||||||
val newStorage = new CatalogStorageFormat(Some(new Path(path).toUri),
|
// only parameters irrelevant to hudi can be set to storage.properties
|
||||||
Some(inputFormat), Some(outputFormat), Some(serdeFormat),
|
val storageProperties = HoodieOptionConfig.deleteHooideOptions(options)
|
||||||
table.storage.compressed, tableOptions + ("path" -> path))
|
val newStorage = new CatalogStorageFormat(
|
||||||
|
Some(new Path(path).toUri),
|
||||||
|
Some(inputFormat),
|
||||||
|
Some(outputFormat),
|
||||||
|
Some(serdeFormat),
|
||||||
|
table.storage.compressed,
|
||||||
|
storageProperties + ("path" -> path))
|
||||||
|
|
||||||
val newDatabaseName = formatName(table.identifier.database
|
val newDatabaseName = formatName(table.identifier.database
|
||||||
.getOrElse(sessionState.catalog.getCurrentDatabase))
|
.getOrElse(sessionState.catalog.getCurrentDatabase))
|
||||||
val newTableName = formatName(table.identifier.table)
|
|
||||||
|
|
||||||
val newTableIdentifier = table.identifier
|
val newTableIdentifier = table.identifier
|
||||||
.copy(table = newTableName, database = Some(newDatabaseName))
|
.copy(table = tableName, database = Some(newDatabaseName))
|
||||||
|
|
||||||
val newTable = table.copy(identifier = newTableIdentifier,
|
// append pk, preCombineKey, type to the properties of table
|
||||||
schema = newSchema, storage = newStorage, createVersion = SPARK_VERSION)
|
val newTblProperties = table.storage.properties ++ table.properties ++ HoodieOptionConfig.extractSqlOptions(options)
|
||||||
// validate the table
|
val newTable = table.copy(
|
||||||
validateTable(newTable)
|
identifier = newTableIdentifier,
|
||||||
|
schema = finalSchema,
|
||||||
|
storage = newStorage,
|
||||||
|
createVersion = SPARK_VERSION,
|
||||||
|
properties = newTblProperties
|
||||||
|
)
|
||||||
|
|
||||||
// Create table in the catalog
|
// Create table in the catalog
|
||||||
val enableHive = isEnableHive(sparkSession)
|
val enableHive = isEnableHive(sparkSession)
|
||||||
if (enableHive) {
|
if (enableHive) {
|
||||||
createHiveDataSourceTable(newTable, sparkSession)
|
createHiveDataSourceTable(newTable, sparkSession)
|
||||||
} else {
|
} else {
|
||||||
sessionState.catalog.createTable(newTable, ignoreIfExists = false,
|
sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
|
||||||
validateLocation = checkPathForManagedTable)
|
|
||||||
}
|
}
|
||||||
newTable
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -170,8 +213,6 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
* @param sparkSession
|
* @param sparkSession
|
||||||
*/
|
*/
|
||||||
private def createHiveDataSourceTable(table: CatalogTable, sparkSession: SparkSession): Unit = {
|
private def createHiveDataSourceTable(table: CatalogTable, sparkSession: SparkSession): Unit = {
|
||||||
// check schema
|
|
||||||
verifyDataSchema(table.identifier, table.tableType, table.schema)
|
|
||||||
val dbName = table.identifier.database.get
|
val dbName = table.identifier.database.get
|
||||||
// check database
|
// check database
|
||||||
val dbExists = sparkSession.sessionState.catalog.databaseExists(dbName)
|
val dbExists = sparkSession.sessionState.catalog.databaseExists(dbName)
|
||||||
@@ -186,7 +227,7 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
val dataSourceProps = tableMetaToTableProps(sparkSession.sparkContext.conf,
|
val dataSourceProps = tableMetaToTableProps(sparkSession.sparkContext.conf,
|
||||||
table, table.schema)
|
table, table.schema)
|
||||||
|
|
||||||
val tableWithDataSourceProps = table.copy(properties = dataSourceProps)
|
val tableWithDataSourceProps = table.copy(properties = dataSourceProps ++ table.properties)
|
||||||
val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf,
|
val client = HiveClientUtils.newClientForMetadata(sparkSession.sparkContext.conf,
|
||||||
sparkSession.sessionState.newHadoopConf())
|
sparkSession.sessionState.newHadoopConf())
|
||||||
// create hive table.
|
// create hive table.
|
||||||
@@ -198,9 +239,8 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#verifyDataSchema
|
// This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#verifyDataSchema
|
||||||
private def verifyDataSchema(tableName: TableIdentifier,
|
private def verifyDataSchema(tableName: TableIdentifier, tableType: CatalogTableType,
|
||||||
tableType: CatalogTableType,
|
dataSchema: Seq[StructField]): Unit = {
|
||||||
dataSchema: StructType): Unit = {
|
|
||||||
if (tableType != CatalogTableType.VIEW) {
|
if (tableType != CatalogTableType.VIEW) {
|
||||||
val invalidChars = Seq(",", ":", ";")
|
val invalidChars = Seq(",", ":", ";")
|
||||||
def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f =>
|
def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f =>
|
||||||
@@ -230,10 +270,10 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#tableMetaToTableProps
|
// This code is forked from org.apache.spark.sql.hive.HiveExternalCatalog#tableMetaToTableProps
|
||||||
private def tableMetaToTableProps( sparkConf: SparkConf,
|
private def tableMetaToTableProps(sparkConf: SparkConf, table: CatalogTable,
|
||||||
table: CatalogTable,
|
schema: StructType): Map[String, String] = {
|
||||||
schema: StructType): Map[String, String] = {
|
|
||||||
val partitionColumns = table.partitionColumnNames
|
val partitionColumns = table.partitionColumnNames
|
||||||
val bucketSpec = table.bucketSpec
|
val bucketSpec = table.bucketSpec
|
||||||
|
|
||||||
@@ -280,24 +320,6 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
properties.toMap
|
properties.toMap
|
||||||
}
|
}
|
||||||
|
|
||||||
private def validateTable(table: CatalogTable): Unit = {
|
|
||||||
val options = table.storage.properties
|
|
||||||
// validate the pk if it exist in the table.
|
|
||||||
HoodieOptionConfig.getPrimaryColumns(options).foreach(pk => table.schema.fieldIndex(pk))
|
|
||||||
// validate the version column if it exist in the table.
|
|
||||||
HoodieOptionConfig.getPreCombineField(options).foreach(v => table.schema.fieldIndex(v))
|
|
||||||
// validate the partition columns
|
|
||||||
table.partitionColumnNames.foreach(p => table.schema.fieldIndex(p))
|
|
||||||
// validate table type
|
|
||||||
options.get(HoodieOptionConfig.SQL_KEY_TABLE_TYPE.sqlKeyName).foreach { tableType =>
|
|
||||||
ValidationUtils.checkArgument(
|
|
||||||
tableType.equalsIgnoreCase(HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_COW) ||
|
|
||||||
tableType.equalsIgnoreCase(HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_MOR),
|
|
||||||
s"'type' must be '${HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_COW}' or " +
|
|
||||||
s"'${HoodieOptionConfig.SQL_VALUE_TABLE_TYPE_MOR}'")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def extraTableConfig(sparkSession: SparkSession, isTableExists: Boolean,
|
def extraTableConfig(sparkSession: SparkSession, isTableExists: Boolean,
|
||||||
originTableConfig: Map[String, String] = Map.empty): Map[String, String] = {
|
originTableConfig: Map[String, String] = Map.empty): Map[String, String] = {
|
||||||
val extraConfig = mutable.Map.empty[String, String]
|
val extraConfig = mutable.Map.empty[String, String]
|
||||||
@@ -322,10 +344,7 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
extraConfig(HoodieTableConfig.URL_ENCODE_PARTITIONING.key) = HoodieTableConfig.URL_ENCODE_PARTITIONING.defaultValue()
|
extraConfig(HoodieTableConfig.URL_ENCODE_PARTITIONING.key) = HoodieTableConfig.URL_ENCODE_PARTITIONING.defaultValue()
|
||||||
}
|
}
|
||||||
|
|
||||||
val primaryColumns = HoodieOptionConfig.getPrimaryColumns(originTableConfig ++ table.storage.properties)
|
if (originTableConfig.contains(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key)) {
|
||||||
if (primaryColumns.isEmpty) {
|
|
||||||
extraConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key) = classOf[UuidKeyGenerator].getCanonicalName
|
|
||||||
} else if (originTableConfig.contains(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key)) {
|
|
||||||
extraConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key) =
|
extraConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key) =
|
||||||
HoodieSparkKeyGeneratorFactory.convertToSparkKeyGenerator(
|
HoodieSparkKeyGeneratorFactory.convertToSparkKeyGenerator(
|
||||||
originTableConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key))
|
originTableConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key))
|
||||||
@@ -334,31 +353,20 @@ case class CreateHoodieTableCommand(table: CatalogTable, ignoreIfExists: Boolean
|
|||||||
}
|
}
|
||||||
extraConfig.toMap
|
extraConfig.toMap
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
object CreateHoodieTableCommand extends Logging {
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Init the hoodie.properties.
|
* Init the hoodie.properties.
|
||||||
*/
|
*/
|
||||||
def initTableIfNeed(sparkSession: SparkSession, table: CatalogTable): Unit = {
|
def initTableIfNeed(sparkSession: SparkSession,
|
||||||
val location = getTableLocation(table, sparkSession)
|
location: String,
|
||||||
|
schema: StructType,
|
||||||
|
originTableConfig: Map[String, String],
|
||||||
|
sqlOptions: Map[String, String]): Unit = {
|
||||||
|
|
||||||
val conf = sparkSession.sessionState.newHadoopConf()
|
|
||||||
// Init the hoodie table
|
|
||||||
val originTableConfig = if (tableExistsInPath(location, conf)) {
|
|
||||||
val metaClient = HoodieTableMetaClient.builder()
|
|
||||||
.setBasePath(location)
|
|
||||||
.setConf(conf)
|
|
||||||
.build()
|
|
||||||
metaClient.getTableConfig.getProps.asScala.toMap
|
|
||||||
} else {
|
|
||||||
Map.empty[String, String]
|
|
||||||
}
|
|
||||||
|
|
||||||
val tableName = table.identifier.table
|
|
||||||
logInfo(s"Init hoodie.properties for $tableName")
|
logInfo(s"Init hoodie.properties for $tableName")
|
||||||
val tableOptions = HoodieOptionConfig.mappingSqlOptionToTableConfig(table.storage.properties)
|
val conf = sparkSession.sessionState.newHadoopConf()
|
||||||
|
|
||||||
|
val tableOptions = HoodieOptionConfig.mappingSqlOptionToTableConfig(sqlOptions)
|
||||||
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.PRECOMBINE_FIELD.key)
|
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.PRECOMBINE_FIELD.key)
|
||||||
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.PARTITION_FIELDS.key)
|
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.PARTITION_FIELDS.key)
|
||||||
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.RECORDKEY_FIELDS.key)
|
checkTableConfigEqual(originTableConfig, tableOptions, HoodieTableConfig.RECORDKEY_FIELDS.key)
|
||||||
@@ -372,10 +380,13 @@ object CreateHoodieTableCommand extends Logging {
|
|||||||
HoodieTableMetaClient.withPropertyBuilder()
|
HoodieTableMetaClient.withPropertyBuilder()
|
||||||
.fromProperties(properties)
|
.fromProperties(properties)
|
||||||
.setTableName(tableName)
|
.setTableName(tableName)
|
||||||
.setTableCreateSchema(SchemaConverters.toAvroType(table.schema).toString())
|
.setTableCreateSchema(SchemaConverters.toAvroType(schema).toString())
|
||||||
.setPartitionFields(table.partitionColumnNames.mkString(","))
|
.setPartitionFields(table.partitionColumnNames.mkString(","))
|
||||||
.initTable(conf, location)
|
.initTable(conf, location)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
object CreateHoodieTableCommand extends Logging {
|
||||||
|
|
||||||
def checkTableConfigEqual(originTableConfig: Map[String, String],
|
def checkTableConfigEqual(originTableConfig: Map[String, String],
|
||||||
newTableConfig: Map[String, String], configKey: String): Unit = {
|
newTableConfig: Map[String, String], configKey: String): Unit = {
|
||||||
|
|||||||
@@ -23,11 +23,12 @@ import org.apache.hudi.config.HoodieWriteConfig
|
|||||||
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
||||||
import org.apache.hudi.hive.ddl.HiveSyncMode
|
import org.apache.hudi.hive.ddl.HiveSyncMode
|
||||||
import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
|
import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
|
||||||
|
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable
|
import org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable
|
||||||
import org.apache.spark.sql.execution.command.RunnableCommand
|
import org.apache.spark.sql.execution.command.RunnableCommand
|
||||||
import org.apache.spark.sql.hudi.HoodieOptionConfig
|
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
|
|
||||||
case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends RunnableCommand
|
case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends RunnableCommand
|
||||||
with SparkAdapterSupport {
|
with SparkAdapterSupport {
|
||||||
@@ -56,8 +57,8 @@ case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends Runnab
|
|||||||
}
|
}
|
||||||
|
|
||||||
private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
|
private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
|
||||||
val targetTable = sparkSession.sessionState.catalog
|
val targetTable = sparkSession.sessionState.catalog.getTableMetadata(tableId)
|
||||||
.getTableMetadata(tableId)
|
val tblProperties = targetTable.storage.properties ++ targetTable.properties
|
||||||
val path = getTableLocation(targetTable, sparkSession)
|
val path = getTableLocation(targetTable, sparkSession)
|
||||||
val conf = sparkSession.sessionState.newHadoopConf()
|
val conf = sparkSession.sessionState.newHadoopConf()
|
||||||
val metaClient = HoodieTableMetaClient.builder()
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
@@ -65,23 +66,27 @@ case class DeleteHoodieTableCommand(deleteTable: DeleteFromTable) extends Runnab
|
|||||||
.setConf(conf)
|
.setConf(conf)
|
||||||
.build()
|
.build()
|
||||||
val tableConfig = metaClient.getTableConfig
|
val tableConfig = metaClient.getTableConfig
|
||||||
val primaryColumns = HoodieOptionConfig.getPrimaryColumns(targetTable.storage.properties)
|
val tableSchema = getTableSqlSchema(metaClient).get
|
||||||
|
val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
|
||||||
|
val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
|
||||||
|
val primaryColumns = tableConfig.getRecordKeyFields.get()
|
||||||
|
|
||||||
assert(primaryColumns.nonEmpty,
|
assert(primaryColumns.nonEmpty,
|
||||||
s"There are no primary key defined in table $tableId, cannot execute delete operator")
|
s"There are no primary key defined in table $tableId, cannot execute delete operator")
|
||||||
withSparkConf(sparkSession, targetTable.storage.properties) {
|
withSparkConf(sparkSession, tblProperties) {
|
||||||
Map(
|
Map(
|
||||||
"path" -> path,
|
"path" -> path,
|
||||||
TBL_NAME.key -> tableId.table,
|
TBL_NAME.key -> tableId.table,
|
||||||
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
||||||
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
||||||
KEYGENERATOR_CLASS_NAME.key -> tableConfig.getKeyGeneratorClassName,
|
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
|
||||||
|
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
|
||||||
OPERATION.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL,
|
OPERATION.key -> DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL,
|
||||||
PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","),
|
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
|
||||||
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
||||||
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
||||||
HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
|
HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
|
||||||
SqlKeyGenerator.PARTITION_SCHEMA -> targetTable.partitionSchema.toDDL
|
SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hudi.command
|
|||||||
|
|
||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.avro.generic.{GenericRecord, IndexedRecord}
|
import org.apache.avro.generic.{GenericRecord, IndexedRecord}
|
||||||
|
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecord}
|
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecord}
|
||||||
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
|
import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
|
||||||
@@ -30,7 +31,8 @@ import org.apache.hudi.hive.MultiPartKeysValueExtractor
|
|||||||
import org.apache.hudi.hive.ddl.HiveSyncMode
|
import org.apache.hudi.hive.ddl.HiveSyncMode
|
||||||
import org.apache.hudi.keygen.ComplexKeyGenerator
|
import org.apache.hudi.keygen.ComplexKeyGenerator
|
||||||
import org.apache.hudi.sql.InsertMode
|
import org.apache.hudi.sql.InsertMode
|
||||||
import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkSqlWriter, HoodieWriterUtils}
|
import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkSqlWriter}
|
||||||
|
|
||||||
import org.apache.spark.internal.Logging
|
import org.apache.spark.internal.Logging
|
||||||
import org.apache.spark.sql.catalyst.catalog.CatalogTable
|
import org.apache.spark.sql.catalyst.catalog.CatalogTable
|
||||||
import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
|
import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
|
||||||
@@ -40,10 +42,13 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
|
|||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
||||||
import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlUtils}
|
import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlUtils}
|
||||||
import org.apache.spark.sql.internal.SQLConf
|
import org.apache.spark.sql.internal.SQLConf
|
||||||
|
import org.apache.spark.sql.types.StructType
|
||||||
import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
|
import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
|
||||||
|
|
||||||
import java.util.Properties
|
import java.util.Properties
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Command for insert into hoodie table.
|
* Command for insert into hoodie table.
|
||||||
*/
|
*/
|
||||||
@@ -194,46 +199,39 @@ object InsertIntoHoodieTableCommand extends Logging {
|
|||||||
s"[${insertPartitions.keys.mkString(" " )}]" +
|
s"[${insertPartitions.keys.mkString(" " )}]" +
|
||||||
s" not equal to the defined partition in table[${table.partitionColumnNames.mkString(",")}]")
|
s" not equal to the defined partition in table[${table.partitionColumnNames.mkString(",")}]")
|
||||||
}
|
}
|
||||||
val options = table.storage.properties ++ extraOptions
|
|
||||||
val parameters = withSparkConf(sparkSession, options)()
|
|
||||||
|
|
||||||
val tableType = parameters.getOrElse(TABLE_TYPE.key, TABLE_TYPE.defaultValue)
|
|
||||||
val primaryColumns = HoodieOptionConfig.getPrimaryColumns(options)
|
|
||||||
val partitionFields = table.partitionColumnNames.mkString(",")
|
|
||||||
|
|
||||||
val path = getTableLocation(table, sparkSession)
|
val path = getTableLocation(table, sparkSession)
|
||||||
val conf = sparkSession.sessionState.newHadoopConf()
|
val conf = sparkSession.sessionState.newHadoopConf()
|
||||||
val isTableExists = tableExistsInPath(path, conf)
|
val isTableExists = tableExistsInPath(path, conf)
|
||||||
val tableConfig = if (isTableExists) {
|
val (tableConfig, tableSchema) = if (isTableExists) {
|
||||||
HoodieTableMetaClient.builder()
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
.setBasePath(path)
|
.setBasePath(path)
|
||||||
.setConf(conf)
|
.setConf(conf)
|
||||||
.build()
|
.build()
|
||||||
.getTableConfig
|
(metaClient.getTableConfig, getTableSqlSchema(metaClient).get)
|
||||||
} else {
|
} else {
|
||||||
null
|
(new HoodieTableConfig(), table.schema)
|
||||||
}
|
}
|
||||||
val hiveStylePartitioningEnable = if (null == tableConfig || null == tableConfig.getHiveStylePartitioningEnable) {
|
val partitionColumns = tableConfig.getPartitionFieldProp
|
||||||
"true"
|
val partitionSchema = if (null == partitionColumns || partitionColumns.isEmpty) {
|
||||||
|
table.partitionSchema
|
||||||
} else {
|
} else {
|
||||||
tableConfig.getHiveStylePartitioningEnable
|
StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
|
||||||
}
|
|
||||||
val urlEncodePartitioning = if (null == tableConfig || null == tableConfig.getUrlEncodePartitoning) {
|
|
||||||
"false"
|
|
||||||
} else {
|
|
||||||
tableConfig.getUrlEncodePartitoning
|
|
||||||
}
|
|
||||||
val keyGeneratorClassName = if (null == tableConfig || null == tableConfig.getKeyGeneratorClassName) {
|
|
||||||
if (primaryColumns.nonEmpty) {
|
|
||||||
classOf[ComplexKeyGenerator].getCanonicalName
|
|
||||||
} else {
|
|
||||||
classOf[UuidKeyGenerator].getCanonicalName
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tableConfig.getKeyGeneratorClassName
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val tableSchema = table.schema
|
val options = table.storage.properties ++ table.properties ++ tableConfig.getProps.asScala.toMap ++ extraOptions
|
||||||
|
val parameters = withSparkConf(sparkSession, options)()
|
||||||
|
|
||||||
|
val tableName = Option(tableConfig.getTableName).getOrElse(table.identifier.table)
|
||||||
|
val tableType = Option(tableConfig.getTableType.name).getOrElse(TABLE_TYPE.defaultValue)
|
||||||
|
val primaryColumns = tableConfig.getRecordKeyFields.orElse(HoodieOptionConfig.getPrimaryColumns(options))
|
||||||
|
val preCombineColumn = Option(tableConfig.getPreCombineField)
|
||||||
|
.getOrElse(HoodieOptionConfig.getPreCombineField(options).getOrElse(""))
|
||||||
|
val partitionFields = Option(tableConfig.getPartitionFieldProp)
|
||||||
|
.getOrElse(table.partitionColumnNames.mkString(","))
|
||||||
|
val hiveStylePartitioningEnable = Option(tableConfig.getHiveStylePartitioningEnable).getOrElse("true")
|
||||||
|
val urlEncodePartitioning = Option(tableConfig.getUrlEncodePartitoning).getOrElse("false")
|
||||||
|
val keyGeneratorClassName = Option(tableConfig.getKeyGeneratorClassName)
|
||||||
|
.getOrElse(classOf[ComplexKeyGenerator].getCanonicalName)
|
||||||
|
|
||||||
val dropDuplicate = sparkSession.conf
|
val dropDuplicate = sparkSession.conf
|
||||||
.getOption(INSERT_DROP_DUPS.key)
|
.getOption(INSERT_DROP_DUPS.key)
|
||||||
@@ -242,35 +240,33 @@ object InsertIntoHoodieTableCommand extends Logging {
|
|||||||
|
|
||||||
val enableBulkInsert = parameters.getOrElse(DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key,
|
val enableBulkInsert = parameters.getOrElse(DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key,
|
||||||
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.defaultValue()).toBoolean
|
DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.defaultValue()).toBoolean
|
||||||
|
val hasPrecombineColumn = preCombineColumn.nonEmpty
|
||||||
val isPartitionedTable = table.partitionColumnNames.nonEmpty
|
val isPartitionedTable = table.partitionColumnNames.nonEmpty
|
||||||
val isPrimaryKeyTable = primaryColumns.nonEmpty
|
|
||||||
val insertMode = InsertMode.of(parameters.getOrElse(DataSourceWriteOptions.SQL_INSERT_MODE.key,
|
val insertMode = InsertMode.of(parameters.getOrElse(DataSourceWriteOptions.SQL_INSERT_MODE.key,
|
||||||
DataSourceWriteOptions.SQL_INSERT_MODE.defaultValue()))
|
DataSourceWriteOptions.SQL_INSERT_MODE.defaultValue()))
|
||||||
val isNonStrictMode = insertMode == InsertMode.NON_STRICT
|
val isNonStrictMode = insertMode == InsertMode.NON_STRICT
|
||||||
|
|
||||||
val operation =
|
val operation =
|
||||||
(isPrimaryKeyTable, enableBulkInsert, isOverwrite, dropDuplicate) match {
|
(enableBulkInsert, isOverwrite, dropDuplicate, isNonStrictMode, isPartitionedTable) match {
|
||||||
case (true, true, _, _) if !isNonStrictMode =>
|
case (true, _, _, false, _) =>
|
||||||
throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
|
throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
|
||||||
case (_, true, true, _) if isPartitionedTable =>
|
case (true, true, _, _, true) =>
|
||||||
throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
|
throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
|
||||||
case (_, true, _, true) =>
|
case (true, _, true, _, _) =>
|
||||||
throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
|
throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
|
||||||
s" Please disable $INSERT_DROP_DUPS and try again.")
|
s" Please disable $INSERT_DROP_DUPS and try again.")
|
||||||
// if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
|
// if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
|
||||||
case (_, true, true, _) if !isPartitionedTable => BULK_INSERT_OPERATION_OPT_VAL
|
case (true, true, _, _, false) => BULK_INSERT_OPERATION_OPT_VAL
|
||||||
// insert overwrite partition
|
|
||||||
case (_, _, true, _) if isPartitionedTable => INSERT_OVERWRITE_OPERATION_OPT_VAL
|
|
||||||
// insert overwrite table
|
// insert overwrite table
|
||||||
case (_, _, true, _) if !isPartitionedTable => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
|
case (false, true, _, _, false) => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
|
||||||
// if it is pk table and the dropDuplicate has disable, use the upsert operation for strict and upsert mode.
|
// insert overwrite partition
|
||||||
case (true, false, false, false) if !isNonStrictMode => UPSERT_OPERATION_OPT_VAL
|
case (_, true, _, _, true) => INSERT_OVERWRITE_OPERATION_OPT_VAL
|
||||||
// if enableBulkInsert is true and the table is non-primaryKeyed, use the bulk insert operation
|
// disable dropDuplicate, and provide preCombineKey, use the upsert operation for strict and upsert mode.
|
||||||
case (false, true, _, _) => BULK_INSERT_OPERATION_OPT_VAL
|
case (false, false, false, false, _) if hasPrecombineColumn => UPSERT_OPERATION_OPT_VAL
|
||||||
// if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
|
// if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
|
||||||
case (true, true, _, _) if isNonStrictMode => BULK_INSERT_OPERATION_OPT_VAL
|
case (true, _, _, true, _) => BULK_INSERT_OPERATION_OPT_VAL
|
||||||
// for the rest case, use the insert operation
|
// for the rest case, use the insert operation
|
||||||
case (_, _, _, _) => INSERT_OPERATION_OPT_VAL
|
case _ => INSERT_OPERATION_OPT_VAL
|
||||||
}
|
}
|
||||||
|
|
||||||
val payloadClassName = if (operation == UPSERT_OPERATION_OPT_VAL &&
|
val payloadClassName = if (operation == UPSERT_OPERATION_OPT_VAL &&
|
||||||
@@ -288,17 +284,18 @@ object InsertIntoHoodieTableCommand extends Logging {
|
|||||||
Map(
|
Map(
|
||||||
"path" -> path,
|
"path" -> path,
|
||||||
TABLE_TYPE.key -> tableType,
|
TABLE_TYPE.key -> tableType,
|
||||||
TBL_NAME.key -> table.identifier.table,
|
TBL_NAME.key -> tableName,
|
||||||
PRECOMBINE_FIELD.key -> tableSchema.fields.last.name,
|
PRECOMBINE_FIELD.key -> preCombineColumn,
|
||||||
OPERATION.key -> operation,
|
OPERATION.key -> operation,
|
||||||
HIVE_STYLE_PARTITIONING.key -> hiveStylePartitioningEnable,
|
HIVE_STYLE_PARTITIONING.key -> hiveStylePartitioningEnable,
|
||||||
URL_ENCODE_PARTITIONING.key -> urlEncodePartitioning,
|
URL_ENCODE_PARTITIONING.key -> urlEncodePartitioning,
|
||||||
KEYGENERATOR_CLASS_NAME.key -> keyGeneratorClassName,
|
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
|
||||||
|
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> keyGeneratorClassName,
|
||||||
RECORDKEY_FIELD.key -> primaryColumns.mkString(","),
|
RECORDKEY_FIELD.key -> primaryColumns.mkString(","),
|
||||||
PARTITIONPATH_FIELD.key -> partitionFields,
|
PARTITIONPATH_FIELD.key -> partitionFields,
|
||||||
PAYLOAD_CLASS_NAME.key -> payloadClassName,
|
PAYLOAD_CLASS_NAME.key -> payloadClassName,
|
||||||
ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
|
ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
|
||||||
HoodieWriteConfig.COMBINE_BEFORE_INSERT.key -> isPrimaryKeyTable.toString,
|
HoodieWriteConfig.COMBINE_BEFORE_INSERT.key -> String.valueOf(hasPrecombineColumn),
|
||||||
META_SYNC_ENABLED.key -> enableHive.toString,
|
META_SYNC_ENABLED.key -> enableHive.toString,
|
||||||
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
||||||
HIVE_USE_JDBC.key -> "false",
|
HIVE_USE_JDBC.key -> "false",
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hudi.command
|
|||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient
|
import org.apache.hudi.common.table.HoodieTableMetaClient
|
||||||
|
import org.apache.hudi.common.util.StringUtils
|
||||||
import org.apache.hudi.config.HoodieWriteConfig
|
import org.apache.hudi.config.HoodieWriteConfig
|
||||||
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
||||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor
|
import org.apache.hudi.hive.MultiPartKeysValueExtractor
|
||||||
@@ -80,8 +81,9 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab
|
|||||||
private lazy val targetTable =
|
private lazy val targetTable =
|
||||||
sparkSession.sessionState.catalog.getTableMetadata(targetTableIdentify)
|
sparkSession.sessionState.catalog.getTableMetadata(targetTableIdentify)
|
||||||
|
|
||||||
private lazy val targetTableType =
|
private lazy val tblProperties = targetTable.storage.properties ++ targetTable.properties
|
||||||
HoodieOptionConfig.getTableType(targetTable.storage.properties)
|
|
||||||
|
private lazy val targetTableType = HoodieOptionConfig.getTableType(tblProperties)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -124,7 +126,7 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab
|
|||||||
assert(updateActions.size <= 1, s"Only support one updateAction currently, current update action count is: ${updateActions.size}")
|
assert(updateActions.size <= 1, s"Only support one updateAction currently, current update action count is: ${updateActions.size}")
|
||||||
|
|
||||||
val updateAction = updateActions.headOption
|
val updateAction = updateActions.headOption
|
||||||
HoodieOptionConfig.getPreCombineField(targetTable.storage.properties).map(preCombineField => {
|
HoodieOptionConfig.getPreCombineField(tblProperties).map(preCombineField => {
|
||||||
val sourcePreCombineField =
|
val sourcePreCombineField =
|
||||||
updateAction.map(u => u.assignments.filter {
|
updateAction.map(u => u.assignments.filter {
|
||||||
case Assignment(key: AttributeReference, _) => key.name.equalsIgnoreCase(preCombineField)
|
case Assignment(key: AttributeReference, _) => key.name.equalsIgnoreCase(preCombineField)
|
||||||
@@ -242,8 +244,13 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab
|
|||||||
// Append the table schema to the parameters. In the case of merge into, the schema of sourceDF
|
// Append the table schema to the parameters. In the case of merge into, the schema of sourceDF
|
||||||
// may be different from the target table, because the are transform logical in the update or
|
// may be different from the target table, because the are transform logical in the update or
|
||||||
// insert actions.
|
// insert actions.
|
||||||
|
val operation = if (StringUtils.isNullOrEmpty(parameters.getOrElse(PRECOMBINE_FIELD.key, ""))) {
|
||||||
|
INSERT_OPERATION_OPT_VAL
|
||||||
|
} else {
|
||||||
|
UPSERT_OPERATION_OPT_VAL
|
||||||
|
}
|
||||||
var writeParams = parameters +
|
var writeParams = parameters +
|
||||||
(OPERATION.key -> UPSERT_OPERATION_OPT_VAL) +
|
(OPERATION.key -> operation) +
|
||||||
(HoodieWriteConfig.WRITE_SCHEMA.key -> getTableSchema.toString) +
|
(HoodieWriteConfig.WRITE_SCHEMA.key -> getTableSchema.toString) +
|
||||||
(DataSourceWriteOptions.TABLE_TYPE.key -> targetTableType)
|
(DataSourceWriteOptions.TABLE_TYPE.key -> targetTableType)
|
||||||
|
|
||||||
@@ -436,38 +443,38 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Runnab
|
|||||||
.setConf(conf)
|
.setConf(conf)
|
||||||
.build()
|
.build()
|
||||||
val tableConfig = metaClient.getTableConfig
|
val tableConfig = metaClient.getTableConfig
|
||||||
val options = targetTable.storage.properties
|
val tableSchema = getTableSqlSchema(metaClient).get
|
||||||
val definedPk = HoodieOptionConfig.getPrimaryColumns(options)
|
val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
|
||||||
// TODO Currently the mergeEqualConditionKeys must be the same the primary key.
|
val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
|
||||||
if (targetKey2SourceExpression.keySet != definedPk.toSet) {
|
val options = tblProperties
|
||||||
throw new IllegalArgumentException(s"Merge Key[${targetKey2SourceExpression.keySet.mkString(",")}] is not" +
|
val preCombineColumn = Option(tableConfig.getPreCombineField).getOrElse("")
|
||||||
s" Equal to the defined primary key[${definedPk.mkString(",")}] in table $targetTableName")
|
|
||||||
}
|
|
||||||
// Enable the hive sync by default if spark have enable the hive metastore.
|
// Enable the hive sync by default if spark have enable the hive metastore.
|
||||||
val enableHive = isEnableHive(sparkSession)
|
val enableHive = isEnableHive(sparkSession)
|
||||||
withSparkConf(sparkSession, options) {
|
withSparkConf(sparkSession, options) {
|
||||||
Map(
|
Map(
|
||||||
"path" -> path,
|
"path" -> path,
|
||||||
RECORDKEY_FIELD.key -> targetKey2SourceExpression.keySet.mkString(","),
|
RECORDKEY_FIELD.key -> tableConfig.getRecordKeyFieldProp,
|
||||||
PRECOMBINE_FIELD.key -> targetKey2SourceExpression.keySet.head, // set a default preCombine field
|
PRECOMBINE_FIELD.key -> preCombineColumn,
|
||||||
TBL_NAME.key -> targetTableName,
|
TBL_NAME.key -> targetTableName,
|
||||||
PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","),
|
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
|
||||||
PAYLOAD_CLASS_NAME.key -> classOf[ExpressionPayload].getCanonicalName,
|
PAYLOAD_CLASS_NAME.key -> classOf[ExpressionPayload].getCanonicalName,
|
||||||
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
||||||
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
||||||
KEYGENERATOR_CLASS_NAME.key -> tableConfig.getKeyGeneratorClassName,
|
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
|
||||||
|
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
|
||||||
META_SYNC_ENABLED.key -> enableHive.toString,
|
META_SYNC_ENABLED.key -> enableHive.toString,
|
||||||
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
||||||
HIVE_USE_JDBC.key -> "false",
|
HIVE_USE_JDBC.key -> "false",
|
||||||
HIVE_DATABASE.key -> targetTableDb,
|
HIVE_DATABASE.key -> targetTableDb,
|
||||||
HIVE_TABLE.key -> targetTableName,
|
HIVE_TABLE.key -> targetTableName,
|
||||||
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
||||||
HIVE_PARTITION_FIELDS.key -> targetTable.partitionColumnNames.mkString(","),
|
HIVE_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
|
||||||
HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
|
HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
|
||||||
HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200", // set the default parallelism to 200 for sql
|
HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200", // set the default parallelism to 200 for sql
|
||||||
HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
|
HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
|
||||||
HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
|
HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
|
||||||
SqlKeyGenerator.PARTITION_SCHEMA -> targetTable.partitionSchema.toDDL
|
SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,20 +18,20 @@
|
|||||||
package org.apache.spark.sql.hudi.command
|
package org.apache.spark.sql.hudi.command
|
||||||
|
|
||||||
import org.apache.hudi.DataSourceWriteOptions._
|
import org.apache.hudi.DataSourceWriteOptions._
|
||||||
|
import org.apache.hudi.SparkAdapterSupport
|
||||||
import org.apache.hudi.common.model.HoodieRecord
|
import org.apache.hudi.common.model.HoodieRecord
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient
|
import org.apache.hudi.common.table.HoodieTableMetaClient
|
||||||
import org.apache.hudi.config.HoodieWriteConfig
|
import org.apache.hudi.config.HoodieWriteConfig
|
||||||
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
|
||||||
import org.apache.hudi.hive.MultiPartKeysValueExtractor
|
import org.apache.hudi.hive.MultiPartKeysValueExtractor
|
||||||
import org.apache.hudi.hive.ddl.HiveSyncMode
|
import org.apache.hudi.hive.ddl.HiveSyncMode
|
||||||
import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
|
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression}
|
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression}
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.{Assignment, UpdateTable}
|
import org.apache.spark.sql.catalyst.plans.logical.{Assignment, UpdateTable}
|
||||||
import org.apache.spark.sql.execution.command.RunnableCommand
|
import org.apache.spark.sql.execution.command.RunnableCommand
|
||||||
import org.apache.spark.sql.hudi.HoodieOptionConfig
|
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
||||||
import org.apache.spark.sql.types.StructField
|
import org.apache.spark.sql.types.{StructField, StructType}
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
@@ -83,8 +83,8 @@ case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends RunnableCo
|
|||||||
}
|
}
|
||||||
|
|
||||||
private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
|
private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
|
||||||
val targetTable = sparkSession.sessionState.catalog
|
val targetTable = sparkSession.sessionState.catalog.getTableMetadata(tableId)
|
||||||
.getTableMetadata(tableId)
|
val tblProperties = targetTable.storage.properties ++ targetTable.properties
|
||||||
val path = getTableLocation(targetTable, sparkSession)
|
val path = getTableLocation(targetTable, sparkSession)
|
||||||
val conf = sparkSession.sessionState.newHadoopConf()
|
val conf = sparkSession.sessionState.newHadoopConf()
|
||||||
val metaClient = HoodieTableMetaClient.builder()
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
@@ -92,32 +92,37 @@ case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends RunnableCo
|
|||||||
.setConf(conf)
|
.setConf(conf)
|
||||||
.build()
|
.build()
|
||||||
val tableConfig = metaClient.getTableConfig
|
val tableConfig = metaClient.getTableConfig
|
||||||
val primaryColumns = HoodieOptionConfig.getPrimaryColumns(targetTable.storage.properties)
|
val tableSchema = getTableSqlSchema(metaClient).get
|
||||||
|
val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
|
||||||
|
val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
|
||||||
|
val primaryColumns = tableConfig.getRecordKeyFields.get()
|
||||||
|
val preCombineColumn = Option(tableConfig.getPreCombineField).getOrElse("")
|
||||||
assert(primaryColumns.nonEmpty,
|
assert(primaryColumns.nonEmpty,
|
||||||
s"There are no primary key in table $tableId, cannot execute update operator")
|
s"There are no primary key in table $tableId, cannot execute update operator")
|
||||||
val enableHive = isEnableHive(sparkSession)
|
val enableHive = isEnableHive(sparkSession)
|
||||||
withSparkConf(sparkSession, targetTable.storage.properties) {
|
|
||||||
|
withSparkConf(sparkSession, tblProperties) {
|
||||||
Map(
|
Map(
|
||||||
"path" -> path,
|
"path" -> path,
|
||||||
RECORDKEY_FIELD.key -> primaryColumns.mkString(","),
|
RECORDKEY_FIELD.key -> primaryColumns.mkString(","),
|
||||||
PRECOMBINE_FIELD.key -> primaryColumns.head, //set the default preCombine field.
|
PRECOMBINE_FIELD.key -> preCombineColumn,
|
||||||
TBL_NAME.key -> tableId.table,
|
TBL_NAME.key -> tableId.table,
|
||||||
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
|
||||||
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitoning,
|
||||||
KEYGENERATOR_CLASS_NAME.key -> tableConfig.getKeyGeneratorClassName,
|
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
|
||||||
OPERATION.key -> DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
|
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
|
||||||
PARTITIONPATH_FIELD.key -> targetTable.partitionColumnNames.mkString(","),
|
OPERATION.key -> UPSERT_OPERATION_OPT_VAL,
|
||||||
|
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
|
||||||
META_SYNC_ENABLED.key -> enableHive.toString,
|
META_SYNC_ENABLED.key -> enableHive.toString,
|
||||||
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
|
||||||
HIVE_USE_JDBC.key -> "false",
|
HIVE_USE_JDBC.key -> "false",
|
||||||
HIVE_DATABASE.key -> tableId.database.getOrElse("default"),
|
HIVE_DATABASE.key -> tableId.database.getOrElse("default"),
|
||||||
HIVE_TABLE.key -> tableId.table,
|
HIVE_TABLE.key -> tableId.table,
|
||||||
HIVE_PARTITION_FIELDS.key -> targetTable.partitionColumnNames.mkString(","),
|
HIVE_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
|
||||||
HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
|
HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
|
||||||
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
|
||||||
HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
|
HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
|
||||||
SqlKeyGenerator.PARTITION_SCHEMA -> targetTable.partitionSchema.toDDL
|
SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -206,7 +206,7 @@ public class HoodieJavaApp {
|
|||||||
.option(DataSourceWriteOptions.OPERATION().key(), "delete")
|
.option(DataSourceWriteOptions.OPERATION().key(), "delete")
|
||||||
.option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key")
|
.option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key")
|
||||||
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition")
|
.option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition")
|
||||||
.option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "_row_key")
|
.option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp")
|
||||||
.option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(),
|
.option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(),
|
||||||
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
nonPartitionedTable ? NonpartitionedKeyGenerator.class.getCanonicalName()
|
||||||
: SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
|
: SimpleKeyGenerator.class.getCanonicalName()) // Add Key Extractor
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ set hoodie.delete.shuffle.parallelism = 1;
|
|||||||
|
|
||||||
# CTAS
|
# CTAS
|
||||||
|
|
||||||
create table h0 using hudi options(type = '${tableType}')
|
create table h0 using hudi options(type = '${tableType}', primaryKey = 'id')
|
||||||
as select 1 as id, 'a1' as name, 10 as price;
|
as select 1 as id, 'a1' as name, 10 as price;
|
||||||
+----------+
|
+----------+
|
||||||
| ok |
|
| ok |
|
||||||
@@ -30,7 +30,7 @@ select id, name, price from h0;
|
|||||||
+-----------+
|
+-----------+
|
||||||
|
|
||||||
create table h0_p using hudi partitioned by(dt)
|
create table h0_p using hudi partitioned by(dt)
|
||||||
options(type = '${tableType}')
|
options(type = '${tableType}', primaryKey = 'id')
|
||||||
as select cast('2021-05-07 00:00:00' as timestamp) as dt,
|
as select cast('2021-05-07 00:00:00' as timestamp) as dt,
|
||||||
1 as id, 'a1' as name, 10 as price;
|
1 as id, 'a1' as name, 10 as price;
|
||||||
+----------+
|
+----------+
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig
|
||||||
|
import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
|
||||||
|
|
||||||
|
import org.apache.spark.sql.hudi.command.SqlKeyGenerator
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions.assertTrue
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
class HoodieSparkSqlWriterSuite2 {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testGetOriginKeyGenerator(): Unit = {
|
||||||
|
// for dataframe write
|
||||||
|
val m1 = Map(
|
||||||
|
HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key -> classOf[ComplexKeyGenerator].getName
|
||||||
|
)
|
||||||
|
val kg1 = HoodieWriterUtils.getOriginKeyGenerator(m1)
|
||||||
|
assertTrue(kg1 == classOf[ComplexKeyGenerator].getName)
|
||||||
|
|
||||||
|
// for sql write
|
||||||
|
val m2 = Map(
|
||||||
|
HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getName,
|
||||||
|
SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> classOf[SimpleKeyGenerator].getName
|
||||||
|
)
|
||||||
|
val kg2 = HoodieWriterUtils.getOriginKeyGenerator(m2)
|
||||||
|
assertTrue(kg2 == classOf[SimpleKeyGenerator].getName)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -102,7 +102,10 @@ class TestDataSourceForBootstrap {
|
|||||||
.save(srcPath)
|
.save(srcPath)
|
||||||
|
|
||||||
// Perform bootstrap
|
// Perform bootstrap
|
||||||
val commitInstantTime1 = runMetadataBootstrapAndVerifyCommit(DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL)
|
val commitInstantTime1 = runMetadataBootstrapAndVerifyCommit(
|
||||||
|
DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL,
|
||||||
|
extraOpts = Map(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator")
|
||||||
|
)
|
||||||
|
|
||||||
// Read bootstrapped table and verify count
|
// Read bootstrapped table and verify count
|
||||||
var hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
|
var hoodieROViewDF1 = spark.read.format("hudi").load(basePath + "/*")
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class TestAlterTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '$tablePath'
|
| location '$tablePath'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -127,7 +127,7 @@ class TestAlterTable extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$partitionedTable'
|
| location '${tmp.getCanonicalPath}/$partitionedTable'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
| )
|
| )
|
||||||
| using hudi
|
| using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -77,7 +77,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
| options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|)
|
|)
|
||||||
@@ -105,7 +105,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
| )
|
| )
|
||||||
| using hudi
|
| using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -151,7 +151,7 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
| options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|)
|
|)
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class TestCompactionTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -82,7 +82,7 @@ class TestCompactionTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -62,6 +62,53 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
)(table.schema.fields)
|
)(table.schema.fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test("Test Create Hoodie Table With Options") {
|
||||||
|
val tableName = generateTableName
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
| create table $tableName (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long,
|
||||||
|
| dt string
|
||||||
|
| ) using hudi
|
||||||
|
| partitioned by (dt)
|
||||||
|
| options (
|
||||||
|
| primaryKey = 'id',
|
||||||
|
| preCombineField = 'ts'
|
||||||
|
| )
|
||||||
|
""".stripMargin)
|
||||||
|
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
|
||||||
|
assertResult(table.properties("type"))("cow")
|
||||||
|
assertResult(table.properties("primaryKey"))("id")
|
||||||
|
assertResult(table.properties("preCombineField"))("ts")
|
||||||
|
assertResult(tableName)(table.identifier.table)
|
||||||
|
assertResult("hudi")(table.provider.get)
|
||||||
|
assertResult(CatalogTableType.MANAGED)(table.tableType)
|
||||||
|
assertResult(
|
||||||
|
HoodieRecord.HOODIE_META_COLUMNS.asScala.map(StructField(_, StringType))
|
||||||
|
++ Seq(
|
||||||
|
StructField("id", IntegerType),
|
||||||
|
StructField("name", StringType),
|
||||||
|
StructField("price", DoubleType),
|
||||||
|
StructField("ts", LongType),
|
||||||
|
StructField("dt", StringType))
|
||||||
|
)(table.schema.fields)
|
||||||
|
|
||||||
|
val tablePath = table.storage.properties("path")
|
||||||
|
val metaClient = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(tablePath)
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
|
||||||
|
assertResult(true)(tableConfig.contains(HoodieTableConfig.CREATE_SCHEMA.key))
|
||||||
|
assertResult("dt")(tableConfig(HoodieTableConfig.PARTITION_FIELDS.key))
|
||||||
|
assertResult("id")(tableConfig(HoodieTableConfig.RECORDKEY_FIELDS.key))
|
||||||
|
assertResult("ts")(tableConfig(HoodieTableConfig.PRECOMBINE_FIELD.key))
|
||||||
|
assertResult(classOf[ComplexKeyGenerator].getCanonicalName)(tableConfig(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME.key))
|
||||||
|
}
|
||||||
|
|
||||||
test("Test Create External Hoodie Table") {
|
test("Test Create External Hoodie Table") {
|
||||||
withTempDir { tmp =>
|
withTempDir { tmp =>
|
||||||
// Test create cow table.
|
// Test create cow table.
|
||||||
@@ -74,7 +121,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id,name',
|
| primaryKey = 'id,name',
|
||||||
| type = 'cow'
|
| type = 'cow'
|
||||||
| )
|
| )
|
||||||
@@ -93,8 +140,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
StructField("price", DoubleType),
|
StructField("price", DoubleType),
|
||||||
StructField("ts", LongType))
|
StructField("ts", LongType))
|
||||||
)(table.schema.fields)
|
)(table.schema.fields)
|
||||||
assertResult(table.storage.properties("type"))("cow")
|
assertResult(table.properties("type"))("cow")
|
||||||
assertResult(table.storage.properties("primaryKey"))("id,name")
|
assertResult(table.properties("primaryKey"))("id,name")
|
||||||
|
|
||||||
spark.sql(s"drop table $tableName")
|
spark.sql(s"drop table $tableName")
|
||||||
// Test create mor partitioned table
|
// Test create mor partitioned table
|
||||||
@@ -108,15 +155,15 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| type = 'mor'
|
| type = 'mor'
|
||||||
| )
|
| )
|
||||||
| location '${tmp.getCanonicalPath}/h0'
|
| location '${tmp.getCanonicalPath}/h0'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
|
val table2 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
|
||||||
assertResult(table2.storage.properties("type"))("mor")
|
assertResult(table2.properties("type"))("mor")
|
||||||
assertResult(table2.storage.properties("primaryKey"))("id")
|
assertResult(table2.properties("primaryKey"))("id")
|
||||||
assertResult(Seq("dt"))(table2.partitionColumnNames)
|
assertResult(Seq("dt"))(table2.partitionColumnNames)
|
||||||
assertResult(classOf[HoodieParquetRealtimeInputFormat].getCanonicalName)(table2.storage.inputFormat.get)
|
assertResult(classOf[HoodieParquetRealtimeInputFormat].getCanonicalName)(table2.storage.inputFormat.get)
|
||||||
|
|
||||||
@@ -129,8 +176,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
|location '${tmp.getCanonicalPath}/h0'
|
|location '${tmp.getCanonicalPath}/h0'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
val table3 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName3))
|
val table3 = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName3))
|
||||||
assertResult(table3.storage.properties("type"))("mor")
|
assertResult(table3.properties("type"))("mor")
|
||||||
assertResult(table3.storage.properties("primaryKey"))("id")
|
assertResult(table3.properties("primaryKey"))("id")
|
||||||
assertResult(
|
assertResult(
|
||||||
HoodieRecord.HOODIE_META_COLUMNS.asScala.map(StructField(_, StringType))
|
HoodieRecord.HOODIE_META_COLUMNS.asScala.map(StructField(_, StringType))
|
||||||
++ Seq(
|
++ Seq(
|
||||||
@@ -156,7 +203,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id1',
|
| primaryKey = 'id1',
|
||||||
| type = 'cow'
|
| type = 'cow'
|
||||||
| )
|
| )
|
||||||
@@ -173,7 +220,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts1',
|
| preCombineField = 'ts1',
|
||||||
| type = 'cow'
|
| type = 'cow'
|
||||||
@@ -191,7 +238,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts',
|
| preCombineField = 'ts',
|
||||||
| type = 'cow1'
|
| type = 'cow1'
|
||||||
@@ -208,7 +255,8 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
val tableName1 = generateTableName
|
val tableName1 = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName1 using hudi
|
| create table $tableName1 using hudi
|
||||||
|
| tblproperties(primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tableName1'
|
| location '${tmp.getCanonicalPath}/$tableName1'
|
||||||
| AS
|
| AS
|
||||||
| select 1 as id, 'a1' as name, 10 as price, 1000 as ts
|
| select 1 as id, 'a1' as name, 10 as price, 1000 as ts
|
||||||
@@ -223,6 +271,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
s"""
|
s"""
|
||||||
| create table $tableName2 using hudi
|
| create table $tableName2 using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
|
| tblproperties(primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tableName2'
|
| location '${tmp.getCanonicalPath}/$tableName2'
|
||||||
| AS
|
| AS
|
||||||
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt
|
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt
|
||||||
@@ -240,7 +289,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
s"""
|
s"""
|
||||||
| create table $tableName3 using hudi
|
| create table $tableName3 using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| options(primaryKey = 'id')
|
| tblproperties(primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||||
| AS
|
| AS
|
||||||
| select null as id, 'a1' as name, 10 as price, '2021-05-07' as dt
|
| select null as id, 'a1' as name, 10 as price, '2021-05-07' as dt
|
||||||
@@ -252,6 +301,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
s"""
|
s"""
|
||||||
| create table $tableName3 using hudi
|
| create table $tableName3 using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
|
| tblproperties(primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tableName3'
|
| location '${tmp.getCanonicalPath}/$tableName3'
|
||||||
| AS
|
| AS
|
||||||
| select cast('2021-05-06 00:00:00' as timestamp) as dt, 1 as id, 'a1' as name, 10 as
|
| select cast('2021-05-06 00:00:00' as timestamp) as dt, 1 as id, 'a1' as name, 10 as
|
||||||
@@ -267,6 +317,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
s"""
|
s"""
|
||||||
| create table $tableName4 using hudi
|
| create table $tableName4 using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
|
| tblproperties(primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tableName4'
|
| location '${tmp.getCanonicalPath}/$tableName4'
|
||||||
| AS
|
| AS
|
||||||
| select cast('2021-05-06' as date) as dt, 1 as id, 'a1' as name, 10 as
|
| select cast('2021-05-06' as date) as dt, 1 as id, 'a1' as name, 10 as
|
||||||
@@ -303,7 +354,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
| options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|)
|
|)
|
||||||
@@ -380,7 +431,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
| options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|)
|
|)
|
||||||
@@ -455,7 +506,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
| options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|)
|
|)
|
||||||
@@ -514,6 +565,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| name string,
|
| name string,
|
||||||
| price double
|
| price double
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
|tblproperties(primaryKey = 'id')
|
||||||
|""".stripMargin
|
|""".stripMargin
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -527,6 +579,7 @@ class TestCreateTable extends TestHoodieSqlBase {
|
|||||||
| name string,
|
| name string,
|
||||||
| price double
|
| price double
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
|tblproperties(primaryKey = 'id')
|
||||||
|""".stripMargin
|
|""".stripMargin
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ class TestDeleteTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|
|||||||
@@ -0,0 +1,184 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, OverwriteWithLatestAvroPayload}
|
||||||
|
import org.apache.hudi.common.table.HoodieTableConfig
|
||||||
|
import org.apache.hudi.testutils.HoodieClientTestBase
|
||||||
|
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions.assertTrue
|
||||||
|
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||||
|
|
||||||
|
import org.scalatest.Matchers.intercept
|
||||||
|
|
||||||
|
class TestHoodieOptionConfig extends HoodieClientTestBase {
|
||||||
|
|
||||||
|
var spark: SparkSession = _
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Setup method running before each test.
|
||||||
|
*/
|
||||||
|
@BeforeEach override def setUp() {
|
||||||
|
initSparkContexts()
|
||||||
|
spark = sqlContext.sparkSession
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testWithDefaultSqlOptions(): Unit = {
|
||||||
|
val ops1 = Map("primaryKey" -> "id")
|
||||||
|
val with1 = HoodieOptionConfig.withDefaultSqlOptions(ops1)
|
||||||
|
assertTrue(with1.size == 3)
|
||||||
|
assertTrue(with1("primaryKey") == "id")
|
||||||
|
assertTrue(with1("type") == "cow")
|
||||||
|
assertTrue(with1("payloadClass") == classOf[DefaultHoodieRecordPayload].getName)
|
||||||
|
|
||||||
|
val ops2 = Map("primaryKey" -> "id",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "mor",
|
||||||
|
"payloadClass" -> classOf[OverwriteWithLatestAvroPayload].getName
|
||||||
|
)
|
||||||
|
val with2 = HoodieOptionConfig.withDefaultSqlOptions(ops2)
|
||||||
|
assertTrue(ops2 == with2)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testMappingSqlOptionToTableConfig(): Unit = {
|
||||||
|
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "mor",
|
||||||
|
"hoodie.index.type" -> "INMEMORY",
|
||||||
|
"hoodie.compact.inline" -> "true"
|
||||||
|
)
|
||||||
|
val tableConfigs = HoodieOptionConfig.mappingSqlOptionToTableConfig(sqlOptions)
|
||||||
|
|
||||||
|
assertTrue(tableConfigs.size == 5)
|
||||||
|
assertTrue(tableConfigs(HoodieTableConfig.RECORDKEY_FIELDS.key) == "id,addr")
|
||||||
|
assertTrue(tableConfigs(HoodieTableConfig.PRECOMBINE_FIELD.key) == "timestamp")
|
||||||
|
assertTrue(tableConfigs(HoodieTableConfig.TYPE.key) == "MERGE_ON_READ")
|
||||||
|
assertTrue(tableConfigs("hoodie.index.type") == "INMEMORY")
|
||||||
|
assertTrue(tableConfigs("hoodie.compact.inline") == "true")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testDeleteHooideOptions(): Unit = {
|
||||||
|
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "mor",
|
||||||
|
"hoodie.index.type" -> "INMEMORY",
|
||||||
|
"hoodie.compact.inline" -> "true",
|
||||||
|
"key123" -> "value456"
|
||||||
|
)
|
||||||
|
val tableConfigs = HoodieOptionConfig.deleteHooideOptions(sqlOptions)
|
||||||
|
assertTrue(tableConfigs.size == 1)
|
||||||
|
assertTrue(tableConfigs("key123") == "value456")
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testExtractSqlOptions(): Unit = {
|
||||||
|
val sqlOptions = Map("primaryKey" -> "id,addr",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "mor",
|
||||||
|
"hoodie.index.type" -> "INMEMORY",
|
||||||
|
"hoodie.compact.inline" -> "true",
|
||||||
|
"key123" -> "value456"
|
||||||
|
)
|
||||||
|
val tableConfigs = HoodieOptionConfig.extractSqlOptions(sqlOptions)
|
||||||
|
assertTrue(tableConfigs.size == 3)
|
||||||
|
assertTrue(tableConfigs.keySet == Set("primaryKey", "preCombineField", "type"))
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
def testValidateTable(): Unit = {
|
||||||
|
val baseSqlOptions = Map(
|
||||||
|
"hoodie.datasource.write.hive_style_partitioning" -> "true",
|
||||||
|
"hoodie.datasource.write.partitionpath.urlencode" -> "false",
|
||||||
|
"hoodie.table.keygenerator.class" -> "org.apache.hudi.keygen.ComplexKeyGenerator"
|
||||||
|
)
|
||||||
|
|
||||||
|
val schema = StructType(
|
||||||
|
Seq(StructField("id", IntegerType, true),
|
||||||
|
StructField("name", StringType, true),
|
||||||
|
StructField("timestamp", TimestampType, true),
|
||||||
|
StructField("dt", StringType, true))
|
||||||
|
)
|
||||||
|
|
||||||
|
// miss primaryKey parameter
|
||||||
|
val sqlOptions1 = baseSqlOptions ++ Map(
|
||||||
|
"type" -> "mor"
|
||||||
|
)
|
||||||
|
|
||||||
|
val e1 = intercept[IllegalArgumentException] {
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions1)
|
||||||
|
}
|
||||||
|
assertTrue(e1.getMessage.contains("No `primaryKey` is specified."))
|
||||||
|
|
||||||
|
// primary field not found
|
||||||
|
val sqlOptions2 = baseSqlOptions ++ Map(
|
||||||
|
"primaryKey" -> "xxx",
|
||||||
|
"type" -> "mor"
|
||||||
|
)
|
||||||
|
val e2 = intercept[IllegalArgumentException] {
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions2)
|
||||||
|
}
|
||||||
|
assertTrue(e2.getMessage.contains("Can't find primary key"))
|
||||||
|
|
||||||
|
// preCombine field not found
|
||||||
|
val sqlOptions3 = baseSqlOptions ++ Map(
|
||||||
|
"primaryKey" -> "id",
|
||||||
|
"preCombineField" -> "ts",
|
||||||
|
"type" -> "mor"
|
||||||
|
)
|
||||||
|
val e3 = intercept[IllegalArgumentException] {
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions3)
|
||||||
|
}
|
||||||
|
assertTrue(e3.getMessage.contains("Can't find precombine key"))
|
||||||
|
|
||||||
|
// miss type parameter
|
||||||
|
val sqlOptions4 = baseSqlOptions ++ Map(
|
||||||
|
"primaryKey" -> "id",
|
||||||
|
"preCombineField" -> "timestamp"
|
||||||
|
)
|
||||||
|
val e4 = intercept[IllegalArgumentException] {
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions4)
|
||||||
|
}
|
||||||
|
assertTrue(e4.getMessage.contains("No `type` is specified."))
|
||||||
|
|
||||||
|
// type is invalid
|
||||||
|
val sqlOptions5 = baseSqlOptions ++ Map(
|
||||||
|
"primaryKey" -> "id",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "abc"
|
||||||
|
)
|
||||||
|
val e5 = intercept[IllegalArgumentException] {
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions5)
|
||||||
|
}
|
||||||
|
assertTrue(e5.getMessage.contains("'type' must be 'cow' or 'mor'"))
|
||||||
|
|
||||||
|
// right options and schema
|
||||||
|
val sqlOptions6 = baseSqlOptions ++ Map(
|
||||||
|
"primaryKey" -> "id",
|
||||||
|
"preCombineField" -> "timestamp",
|
||||||
|
"type" -> "cow"
|
||||||
|
)
|
||||||
|
HoodieOptionConfig.validateTable(spark, schema, sqlOptions6)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
@@ -36,6 +36,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long,
|
| ts long,
|
||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
@@ -75,7 +76,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'cow',
|
| type = 'cow',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -115,7 +116,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName2'
|
| location '${tmp.getCanonicalPath}/$tableName2'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -146,6 +147,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long,
|
| ts long,
|
||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
@@ -191,6 +193,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}/$tblNonPartition'
|
| location '${tmp.getCanonicalPath}/$tblNonPartition'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
spark.sql(s"insert into $tblNonPartition select 1, 'a1', 10, 1000")
|
spark.sql(s"insert into $tblNonPartition select 1, 'a1', 10, 1000")
|
||||||
@@ -245,6 +248,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| dt $partitionType
|
| dt $partitionType
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
@@ -273,6 +277,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| name string,
|
| name string,
|
||||||
| price double
|
| price double
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
|
|
||||||
@@ -293,6 +298,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
checkException(s"insert into $tableName partition(dt = '2021-06-20')" +
|
checkException(s"insert into $tableName partition(dt = '2021-06-20')" +
|
||||||
@@ -305,7 +311,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
" count: 3,columns: (1,a1,10)"
|
" count: 3,columns: (1,a1,10)"
|
||||||
)
|
)
|
||||||
spark.sql("set hoodie.sql.bulk.insert.enable = true")
|
spark.sql("set hoodie.sql.bulk.insert.enable = true")
|
||||||
spark.sql("set hoodie.sql.insert.mode= strict")
|
spark.sql("set hoodie.sql.insert.mode = strict")
|
||||||
|
|
||||||
val tableName2 = generateTableName
|
val tableName2 = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
@@ -316,7 +322,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -325,6 +331,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
"Table with primaryKey can not use bulk insert in strict mode."
|
"Table with primaryKey can not use bulk insert in strict mode."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
spark.sql("set hoodie.sql.insert.mode = non-strict")
|
||||||
val tableName3 = generateTableName
|
val tableName3 = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
@@ -334,16 +341,18 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
checkException(s"insert overwrite table $tableName3 values(1, 'a1', 10, '2021-07-18')")(
|
checkException(s"insert overwrite table $tableName3 values(1, 'a1', 10, '2021-07-18')")(
|
||||||
"Insert Overwrite Partition can not use bulk insert."
|
"Insert Overwrite Partition can not use bulk insert."
|
||||||
)
|
)
|
||||||
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||||
spark.sql("set hoodie.sql.insert.mode= upsert")
|
spark.sql("set hoodie.sql.insert.mode = upsert")
|
||||||
}
|
}
|
||||||
|
|
||||||
test("Test bulk insert") {
|
test("Test bulk insert") {
|
||||||
|
spark.sql("set hoodie.sql.insert.mode = non-strict")
|
||||||
withTempDir { tmp =>
|
withTempDir { tmp =>
|
||||||
Seq("cow", "mor").foreach {tableType =>
|
Seq("cow", "mor").foreach {tableType =>
|
||||||
// Test bulk insert for single partition
|
// Test bulk insert for single partition
|
||||||
@@ -356,8 +365,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType'
|
| type = '$tableType',
|
||||||
|
| primaryKey = 'id'
|
||||||
| )
|
| )
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
@@ -391,8 +401,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| dt string,
|
| dt string,
|
||||||
| hh string
|
| hh string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType'
|
| type = '$tableType',
|
||||||
|
| primaryKey = 'id'
|
||||||
| )
|
| )
|
||||||
| partitioned by (dt, hh)
|
| partitioned by (dt, hh)
|
||||||
| location '${tmp.getCanonicalPath}/$tableMultiPartition'
|
| location '${tmp.getCanonicalPath}/$tableMultiPartition'
|
||||||
@@ -423,8 +434,9 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| name string,
|
| name string,
|
||||||
| price double
|
| price double
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType'
|
| type = '$tableType',
|
||||||
|
| primaryKey = 'id'
|
||||||
| )
|
| )
|
||||||
| location '${tmp.getCanonicalPath}/$nonPartitionedTable'
|
| location '${tmp.getCanonicalPath}/$nonPartitionedTable'
|
||||||
""".stripMargin)
|
""".stripMargin)
|
||||||
@@ -445,7 +457,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
s"""
|
s"""
|
||||||
|create table $tableName2
|
|create table $tableName2
|
||||||
|using hudi
|
|using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id'
|
| primaryKey = 'id'
|
||||||
|)
|
|)
|
||||||
@@ -459,9 +471,11 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
spark.sql("set hoodie.sql.insert.mode = upsert")
|
||||||
}
|
}
|
||||||
|
|
||||||
test("Test combine before insert") {
|
test("Test combine before insert") {
|
||||||
|
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||||
withTempDir{tmp =>
|
withTempDir{tmp =>
|
||||||
val tableName = generateTableName
|
val tableName = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
@@ -473,7 +487,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -495,6 +509,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
test("Test insert pk-table") {
|
test("Test insert pk-table") {
|
||||||
|
spark.sql("set hoodie.sql.bulk.insert.enable = false")
|
||||||
withTempDir{tmp =>
|
withTempDir{tmp =>
|
||||||
val tableName = generateTableName
|
val tableName = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
@@ -506,7 +521,7 @@ class TestInsertTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ class TestMergeIntoLogOnlyTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| preCombineField = 'ts',
|
| preCombineField = 'ts',
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -137,7 +137,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$targetTable'
|
| location '${tmp.getCanonicalPath}/$targetTable'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -203,7 +203,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long,
|
| ts long,
|
||||||
| dt string
|
| dt string
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -313,7 +313,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| dt string
|
| dt string
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id'
|
| primaryKey = 'id'
|
||||||
| )
|
| )
|
||||||
@@ -369,7 +369,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| v long,
|
| v long,
|
||||||
| dt string
|
| dt string
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'v'
|
| preCombineField = 'v'
|
||||||
@@ -439,7 +439,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| _ts long
|
| _ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type ='$tableType',
|
| type ='$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
@@ -457,7 +457,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| _ts long
|
| _ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type ='$tableType',
|
| type ='$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
@@ -553,7 +553,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| c $dataType
|
| c $dataType
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'c'
|
| preCombineField = 'c'
|
||||||
| )
|
| )
|
||||||
@@ -604,7 +604,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}'
|
| location '${tmp.getCanonicalPath}'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| preCombineField = 'ts',
|
| preCombineField = 'ts',
|
||||||
@@ -665,7 +665,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -711,7 +711,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
| ts long,
|
| ts long,
|
||||||
| dt string
|
| dt string
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -145,7 +145,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
spark.sql(
|
spark.sql(
|
||||||
s"""
|
s"""
|
||||||
|create table $tableName using hudi
|
|create table $tableName using hudi
|
||||||
|options(primaryKey = 'id')
|
|tblproperties(primaryKey = 'id')
|
||||||
|location '${tmp.getCanonicalPath}'
|
|location '${tmp.getCanonicalPath}'
|
||||||
|as
|
|as
|
||||||
|select 1 as id, 'a1' as name
|
|select 1 as id, 'a1' as name
|
||||||
@@ -187,7 +187,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
| m_value map<string, string>,
|
| m_value map<string, string>,
|
||||||
| ts long
|
| ts long
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
@@ -251,7 +251,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -333,7 +333,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -376,7 +376,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey ='id',
|
| primaryKey ='id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| _ts long
|
| _ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type ='$tableType',
|
| type ='$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
@@ -60,7 +60,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| name string,
|
| name string,
|
||||||
| price double
|
| price double
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type ='$tableType',
|
| type ='$tableType',
|
||||||
| primaryKey = 'id'
|
| primaryKey = 'id'
|
||||||
|)
|
|)
|
||||||
@@ -92,7 +92,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| _ts long
|
| _ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type = 'cow',
|
| type = 'cow',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
@@ -117,7 +117,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| _ts long
|
| _ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options(
|
|tblproperties(
|
||||||
| type = 'mor',
|
| type = 'mor',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
|||||||
| price double,
|
| price double,
|
||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
|options (
|
|tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
)
|
)
|
||||||
@@ -59,7 +59,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
|||||||
| dt string
|
| dt string
|
||||||
) using hudi
|
) using hudi
|
||||||
| partitioned by (dt)
|
| partitioned by (dt)
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
@@ -109,7 +109,7 @@ class TestShowPartitions extends TestHoodieSqlBase {
|
|||||||
| day string
|
| day string
|
||||||
| ) using hudi
|
| ) using hudi
|
||||||
| partitioned by (year, month, day)
|
| partitioned by (year, month, day)
|
||||||
| options (
|
| tblproperties (
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
| )
|
| )
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ class TestUpdateTable extends TestHoodieSqlBase {
|
|||||||
| ts long
|
| ts long
|
||||||
|) using hudi
|
|) using hudi
|
||||||
| location '${tmp.getCanonicalPath}/$tableName'
|
| location '${tmp.getCanonicalPath}/$tableName'
|
||||||
| options (
|
| tblproperties (
|
||||||
| type = '$tableType',
|
| type = '$tableType',
|
||||||
| primaryKey = 'id',
|
| primaryKey = 'id',
|
||||||
| preCombineField = 'ts'
|
| preCombineField = 'ts'
|
||||||
|
|||||||
Reference in New Issue
Block a user