1
0

[HUDI-713] Fix conversion of Spark array of struct type to Avro schema (#1406)

Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
wenningd
2020-03-30 15:52:15 -07:00
committed by GitHub
parent dbc9acd23a
commit ce0a4c64d0
6 changed files with 75 additions and 17 deletions

View File

@@ -27,7 +27,6 @@ import org.apache.avro.{LogicalTypes, Schema}
import org.apache.avro.Schema.Type._
import org.apache.avro.generic.GenericData.{Fixed, Record}
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
import org.apache.hudi.AvroConversionUtils.getNewRecordNamespace
import org.apache.spark.sql.Row
import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
import org.apache.spark.sql.catalyst.expressions.GenericRow
@@ -303,7 +302,7 @@ object AvroConversionHelper {
avroSchema,
elementType,
structName,
getNewRecordNamespace(elementType, recordNamespace, structName))
recordNamespace)
(item: Any) => {
if (item == null) {
null
@@ -324,7 +323,7 @@ object AvroConversionHelper {
avroSchema,
valueType,
structName,
getNewRecordNamespace(valueType, recordNamespace, structName))
recordNamespace)
(item: Any) => {
if (item == null) {
null
@@ -338,12 +337,13 @@ object AvroConversionHelper {
}
case structType: StructType =>
val schema: Schema = SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
val childNameSpace = if (recordNamespace != "") s"$recordNamespace.$structName" else structName
val fieldConverters = structType.fields.map(field =>
createConverterToAvro(
avroSchema,
field.dataType,
field.name,
getNewRecordNamespace(field.dataType, recordNamespace, structName)))
childNameSpace))
(item: Any) => {
if (item == null) {
null

View File

@@ -71,16 +71,6 @@ object AvroConversionUtils {
}
}
def getNewRecordNamespace(elementDataType: DataType,
currentRecordNamespace: String,
elementName: String): String = {
elementDataType match {
case StructType(_) => s"$currentRecordNamespace.$elementName"
case _ => currentRecordNamespace
}
}
def convertStructTypeToAvroSchema(structType: StructType,
structName: String,
recordNamespace: String): Schema = {