1
0

[HUDI-3383] Sync column comments while syncing a hive table (#4960)

Desc: Add a hive sync config(hoodie.datasource.hive_sync.sync_comment). This config defaults to false.
While syncing data source to hudi, add column comments to source avro schema, and the sync_comment is true, syncing column comments to the hive table.
This commit is contained in:
MrSleeping123
2022-03-10 09:44:39 +08:00
committed by GitHub
parent 548000b0d6
commit 8859b48b2a
13 changed files with 467 additions and 14 deletions

View File

@@ -161,4 +161,220 @@ class TestAvroConversionUtils extends FunSuite with Matchers {
assert(avroSchema.equals(expectedAvroSchema))
}
test("test convertStructTypeToAvroSchema with Nested StructField comment") {
val mapType = DataTypes.createMapType(StringType, new StructType().add("mapKey", "string", false, "mapKeyComment").add("mapVal", "integer", true))
val arrayType = ArrayType(new StructType().add("arrayKey", "string", false).add("arrayVal", "integer", true, "arrayValComment"))
val innerStruct = new StructType().add("innerKey","string",false, "innerKeyComment").add("value", "long", true, "valueComment")
val struct = new StructType().add("key", "string", false).add("version", "string", true, "versionComment")
.add("data1",innerStruct,false).add("data2",innerStruct,true)
.add("nullableMap", mapType, true).add("map",mapType,false)
.add("nullableArray", arrayType, true).add("array",arrayType,false)
val avroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(struct, "SchemaName", "SchemaNS")
val expectedSchemaStr = s"""
{
"type": "record",
"name": "SchemaName",
"namespace": "SchemaNS",
"fields": [
{
"name": "key",
"type": "string"
},
{
"name": "version",
"type": [
"null",
"string"
],
"doc": "versionComment",
"default": null
},
{
"name": "data1",
"type": {
"type": "record",
"name": "data1",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "innerKey",
"type": "string",
"doc": "innerKeyComment"
},
{
"name": "value",
"type": [
"null",
"long"
],
"doc": "valueComment",
"default": null
}
]
}
},
{
"name": "data2",
"type": [
"null",
{
"type": "record",
"name": "data2",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "innerKey",
"type": "string",
"doc": "innerKeyComment"
},
{
"name": "value",
"type": [
"null",
"long"
],
"doc": "valueComment",
"default": null
}
]
}
],
"default": null
},
{
"name": "nullableMap",
"type": [
"null",
{
"type": "map",
"values": [
{
"type": "record",
"name": "nullableMap",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "mapKey",
"type": "string",
"doc": "mapKeyComment"
},
{
"name": "mapVal",
"type": [
"null",
"int"
],
"default": null
}
]
},
"null"
]
}
],
"default": null
},
{
"name": "map",
"type": {
"type": "map",
"values": [
{
"type": "record",
"name": "map",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "mapKey",
"type": "string",
"doc": "mapKeyComment"
},
{
"name": "mapVal",
"type": [
"null",
"int"
],
"default": null
}
]
},
"null"
]
}
},
{
"name": "nullableArray",
"type": [
"null",
{
"type": "array",
"items": [
{
"type": "record",
"name": "nullableArray",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "arrayKey",
"type": "string"
},
{
"name": "arrayVal",
"type": [
"null",
"int"
],
"doc": "arrayValComment",
"default": null
}
]
},
"null"
]
}
],
"default": null
},
{
"name": "array",
"type": {
"type": "array",
"items": [
{
"type": "record",
"name": "array",
"namespace": "SchemaNS.SchemaName",
"fields": [
{
"name": "arrayKey",
"type": "string"
},
{
"name": "arrayVal",
"type": [
"null",
"int"
],
"doc": "arrayValComment",
"default": null
}
]
},
"null"
]
}
}
]
}}
"""
val expectedAvroSchema = new Schema.Parser().parse(expectedSchemaStr)
assert(avroSchema.equals(expectedAvroSchema))
}
}