[HUDI-4124] Add valid check in Spark Datasource configs (#5637)
Co-authored-by: wangzixuan.wzxuan <wangzixuan.wzxuan@bytedance.com>
This commit is contained in:
@@ -56,6 +56,7 @@ object DataSourceReadOptions {
|
||||
.key("hoodie.datasource.query.type")
|
||||
.defaultValue(QUERY_TYPE_SNAPSHOT_OPT_VAL)
|
||||
.withAlternatives("hoodie.datasource.view.type")
|
||||
.withValidValues(QUERY_TYPE_SNAPSHOT_OPT_VAL, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_INCREMENTAL_OPT_VAL)
|
||||
.withDocumentation("Whether data needs to be read, in incremental mode (new data since an instantTime) " +
|
||||
"(or) Read Optimized mode (obtain latest view, based on base files) (or) Snapshot mode " +
|
||||
"(obtain latest view, by merging base and (if any) log files)")
|
||||
@@ -65,6 +66,7 @@ object DataSourceReadOptions {
|
||||
val REALTIME_MERGE: ConfigProperty[String] = ConfigProperty
|
||||
.key("hoodie.datasource.merge.type")
|
||||
.defaultValue(REALTIME_PAYLOAD_COMBINE_OPT_VAL)
|
||||
.withValidValues(REALTIME_SKIP_MERGE_OPT_VAL, REALTIME_PAYLOAD_COMBINE_OPT_VAL)
|
||||
.withDocumentation("For Snapshot query on merge on read table, control whether we invoke the record " +
|
||||
s"payload implementation to merge (${REALTIME_PAYLOAD_COMBINE_OPT_VAL}) or skip merging altogether" +
|
||||
s"${REALTIME_SKIP_MERGE_OPT_VAL}")
|
||||
@@ -210,6 +212,23 @@ object DataSourceWriteOptions {
|
||||
val OPERATION: ConfigProperty[String] = ConfigProperty
|
||||
.key("hoodie.datasource.write.operation")
|
||||
.defaultValue(UPSERT_OPERATION_OPT_VAL)
|
||||
.withValidValues(
|
||||
WriteOperationType.INSERT.value,
|
||||
WriteOperationType.INSERT_PREPPED.value,
|
||||
WriteOperationType.UPSERT.value,
|
||||
WriteOperationType.UPSERT_PREPPED.value,
|
||||
WriteOperationType.BULK_INSERT.value,
|
||||
WriteOperationType.BULK_INSERT_PREPPED.value,
|
||||
WriteOperationType.DELETE.value,
|
||||
WriteOperationType.BOOTSTRAP.value,
|
||||
WriteOperationType.INSERT_OVERWRITE.value,
|
||||
WriteOperationType.CLUSTER.value,
|
||||
WriteOperationType.DELETE_PARTITION.value,
|
||||
WriteOperationType.INSERT_OVERWRITE_TABLE.value,
|
||||
WriteOperationType.COMPACT.value,
|
||||
WriteOperationType.INSERT.value,
|
||||
WriteOperationType.ALTER_SCHEMA.value
|
||||
)
|
||||
.withDocumentation("Whether to do upsert, insert or bulkinsert for the write operation. " +
|
||||
"Use bulkinsert to load new data into a table, and there on use upsert/insert. " +
|
||||
"bulk insert uses a disk based write path to scale to load large inputs without need to cache it.")
|
||||
@@ -220,6 +239,7 @@ object DataSourceWriteOptions {
|
||||
val TABLE_TYPE: ConfigProperty[String] = ConfigProperty
|
||||
.key("hoodie.datasource.write.table.type")
|
||||
.defaultValue(COW_TABLE_TYPE_OPT_VAL)
|
||||
.withValidValues(COW_TABLE_TYPE_OPT_VAL, MOR_TABLE_TYPE_OPT_VAL)
|
||||
.withAlternatives("hoodie.datasource.write.storage.type")
|
||||
.withDocumentation("The table type for the underlying data, for this write. This can’t change between writes.")
|
||||
|
||||
@@ -308,7 +328,8 @@ object DataSourceWriteOptions {
|
||||
Option.of(classOf[NonpartitionedKeyGenerator].getName)
|
||||
} else {
|
||||
val numOfPartFields = p.getString(PARTITIONPATH_FIELD).split(",").length
|
||||
if (numOfPartFields == 1) {
|
||||
val numOfRecordKeyFields = p.getString(RECORDKEY_FIELD).split(",").length
|
||||
if (numOfPartFields == 1 && numOfRecordKeyFields == 1) {
|
||||
Option.of(classOf[SimpleKeyGenerator].getName)
|
||||
} else {
|
||||
Option.of(classOf[ComplexKeyGenerator].getName)
|
||||
|
||||
Reference in New Issue
Block a user