[HUDI-713] Fix conversion of Spark array of struct type to Avro schema (#1406)
Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -27,7 +27,6 @@ import org.apache.avro.{LogicalTypes, Schema}
|
||||
import org.apache.avro.Schema.Type._
|
||||
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
||||
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
|
||||
import org.apache.hudi.AvroConversionUtils.getNewRecordNamespace
|
||||
import org.apache.spark.sql.Row
|
||||
import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
||||
@@ -303,7 +302,7 @@ object AvroConversionHelper {
|
||||
avroSchema,
|
||||
elementType,
|
||||
structName,
|
||||
getNewRecordNamespace(elementType, recordNamespace, structName))
|
||||
recordNamespace)
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
@@ -324,7 +323,7 @@ object AvroConversionHelper {
|
||||
avroSchema,
|
||||
valueType,
|
||||
structName,
|
||||
getNewRecordNamespace(valueType, recordNamespace, structName))
|
||||
recordNamespace)
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
@@ -338,12 +337,13 @@ object AvroConversionHelper {
|
||||
}
|
||||
case structType: StructType =>
|
||||
val schema: Schema = SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
|
||||
val childNameSpace = if (recordNamespace != "") s"$recordNamespace.$structName" else structName
|
||||
val fieldConverters = structType.fields.map(field =>
|
||||
createConverterToAvro(
|
||||
avroSchema,
|
||||
field.dataType,
|
||||
field.name,
|
||||
getNewRecordNamespace(field.dataType, recordNamespace, structName)))
|
||||
childNameSpace))
|
||||
(item: Any) => {
|
||||
if (item == null) {
|
||||
null
|
||||
|
||||
@@ -71,16 +71,6 @@ object AvroConversionUtils {
|
||||
}
|
||||
}
|
||||
|
||||
def getNewRecordNamespace(elementDataType: DataType,
|
||||
currentRecordNamespace: String,
|
||||
elementName: String): String = {
|
||||
|
||||
elementDataType match {
|
||||
case StructType(_) => s"$currentRecordNamespace.$elementName"
|
||||
case _ => currentRecordNamespace
|
||||
}
|
||||
}
|
||||
|
||||
def convertStructTypeToAvroSchema(structType: StructType,
|
||||
structName: String,
|
||||
recordNamespace: String): Schema = {
|
||||
|
||||
Reference in New Issue
Block a user