1
0

[HUDI-3383] Sync column comments while syncing a hive table (#4960)

Desc: Add a hive sync config(hoodie.datasource.hive_sync.sync_comment). This config defaults to false.
While syncing data source to hudi, add column comments to source avro schema, and the sync_comment is true, syncing column comments to the hive table.
This commit is contained in:
MrSleeping123
2022-03-10 09:44:39 +08:00
committed by GitHub
parent 548000b0d6
commit 8859b48b2a
13 changed files with 467 additions and 14 deletions

View File

@@ -555,6 +555,11 @@ object DataSourceWriteOptions {
.withDocumentation("Whether sync hive metastore bucket specification when using bucket index." +
"The specification is 'CLUSTERED BY (trace_id) SORTED BY (trace_id ASC) INTO 65536 BUCKETS'")
val HIVE_SYNC_COMMENT: ConfigProperty[String] = ConfigProperty
.key("hoodie.datasource.hive_sync.sync_comment")
.defaultValue("false")
.withDocumentation("Whether to sync the table column comments while syncing the table.")
// Async Compaction - Enabled by default for MOR
val ASYNC_COMPACT_ENABLE: ConfigProperty[String] = ConfigProperty
.key("hoodie.datasource.compaction.async.enable")

View File

@@ -601,6 +601,7 @@ object HoodieSparkSqlWriter {
hiveSyncConfig.serdeProperties = hoodieConfig.getString(HIVE_TABLE_SERDE_PROPERTIES)
hiveSyncConfig.tableProperties = hoodieConfig.getString(HIVE_TABLE_PROPERTIES)
hiveSyncConfig.sparkVersion = SPARK_VERSION
hiveSyncConfig.syncComment = hoodieConfig.getStringOrDefault(HIVE_SYNC_COMMENT).toBoolean
hiveSyncConfig
}