1
0

[HUDI-1526] Translate the api partitionBy in spark datasource to hoodie.datasource.write.partitionpath.field (#2431)

This commit is contained in:
teeyog
2021-02-11 01:07:54 +08:00
committed by GitHub
parent a2f85d90de
commit 26da4f5462
3 changed files with 208 additions and 9 deletions

View File

@@ -23,9 +23,11 @@ import org.apache.hudi.common.model.WriteOperationType
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.hive.HiveSyncTool
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
import org.apache.hudi.keygen.SimpleKeyGenerator
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config
import org.apache.hudi.keygen.{CustomKeyGenerator, SimpleKeyGenerator}
import org.apache.hudi.keygen.constant.KeyGeneratorOptions
import org.apache.log4j.LogManager
import org.apache.spark.sql.execution.datasources.{DataSourceUtils => SparkDataSourceUtils}
/**
* List of options that can be passed to the Hoodie datasource,
@@ -192,6 +194,42 @@ object DataSourceWriteOptions {
}
}
/**
* Translate spark parameters to hudi parameters
*
* @param optParams Parameters to be translated
* @return Parameters after translation
*/
def translateSqlOptions(optParams: Map[String, String]): Map[String, String] = {
var translatedOptParams = optParams
// translate the api partitionBy of spark DataFrameWriter to PARTITIONPATH_FIELD_OPT_KEY
if (optParams.contains(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)) {
val partitionColumns = optParams.get(SparkDataSourceUtils.PARTITIONING_COLUMNS_KEY)
.map(SparkDataSourceUtils.decodePartitioningColumns)
.getOrElse(Nil)
val keyGeneratorClass = optParams.getOrElse(DataSourceWriteOptions.KEYGENERATOR_CLASS_OPT_KEY,
DataSourceWriteOptions.DEFAULT_KEYGENERATOR_CLASS_OPT_VAL)
val partitionPathField =
keyGeneratorClass match {
// Only CustomKeyGenerator needs special treatment, because it needs to be specified in a way
// such as "field1:PartitionKeyType1,field2:PartitionKeyType2".
// partitionBy can specify the partition like this: partitionBy("p1", "p2:SIMPLE", "p3:TIMESTAMP")
case c if c == classOf[CustomKeyGenerator].getName =>
partitionColumns.map(e => {
if (e.contains(":")) {
e
} else {
s"$e:SIMPLE"
}
}).mkString(",")
case _ =>
partitionColumns.mkString(",")
}
translatedOptParams = optParams ++ Map(PARTITIONPATH_FIELD_OPT_KEY -> partitionPathField)
}
translatedOptParams
}
/**
* Hive table name, to register the table into.