[HUDI-3664] Fixing Column Stats Index composition (#5181)
Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
@@ -17,7 +17,7 @@
|
||||
|
||||
package org.apache.spark.sql
|
||||
|
||||
import org.apache.spark.HoodieSparkTypeUtils.isCastPreservingOrdering
|
||||
import HoodieSparkTypeUtils.isCastPreservingOrdering
|
||||
import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper}
|
||||
|
||||
object HoodieSpark2CatalystExpressionUtils extends HoodieCatalystExpressionUtils {
|
||||
|
||||
@@ -178,6 +178,11 @@ class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable:
|
||||
val numFields = st.length
|
||||
(getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
|
||||
|
||||
case (st: StructType, UNION) =>
|
||||
val unionConverter = newUnionConverter(st, avroType)
|
||||
val numFields = st.length
|
||||
(getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields))
|
||||
|
||||
case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
|
||||
val valueConverter = newConverter(
|
||||
vt, resolveNullableType(avroType.getValueType, valueContainsNull))
|
||||
@@ -205,8 +210,7 @@ class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable:
|
||||
}
|
||||
}
|
||||
|
||||
private def newStructConverter(
|
||||
catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
|
||||
private def newStructConverter(catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
|
||||
if (avroStruct.getType != RECORD || avroStruct.getFields.size() != catalystStruct.length) {
|
||||
throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
|
||||
s"Avro type $avroStruct.")
|
||||
@@ -229,14 +233,58 @@ class AvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable:
|
||||
result
|
||||
}
|
||||
|
||||
private def newUnionConverter(catalystStruct: StructType, avroUnion: Schema): InternalRow => Any = {
|
||||
if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) {
|
||||
throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
|
||||
s"Avro type $avroUnion.")
|
||||
}
|
||||
val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL
|
||||
val avroInnerTypes = if (nullable) {
|
||||
avroUnion.getTypes.asScala.tail
|
||||
} else {
|
||||
avroUnion.getTypes.asScala
|
||||
}
|
||||
val fieldConverters = catalystStruct.zip(avroInnerTypes).map {
|
||||
case (f1, f2) => newConverter(f1.dataType, f2)
|
||||
}
|
||||
val numFields = catalystStruct.length
|
||||
(row: InternalRow) =>
|
||||
var i = 0
|
||||
var result: Any = null
|
||||
while (i < numFields) {
|
||||
if (!row.isNullAt(i)) {
|
||||
if (result != null) {
|
||||
throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
|
||||
s"Avro union $avroUnion. Record has more than one optional values set")
|
||||
}
|
||||
result = fieldConverters(i).apply(row, i)
|
||||
}
|
||||
i += 1
|
||||
}
|
||||
if (!nullable && result == null) {
|
||||
throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
|
||||
s"Avro union $avroUnion. Record has no values set, while should have exactly one")
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = {
|
||||
(avroStruct.getTypes.size() > 0 &&
|
||||
avroStruct.getTypes.get(0).getType == Type.NULL &&
|
||||
avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length
|
||||
}
|
||||
|
||||
private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = {
|
||||
if (nullable && avroType.getType != NULL) {
|
||||
// avro uses union to represent nullable type.
|
||||
// Avro uses union to represent nullable type.
|
||||
val fields = avroType.getTypes.asScala
|
||||
assert(fields.length == 2)
|
||||
val actualType = fields.filter(_.getType != Type.NULL)
|
||||
assert(actualType.length == 1)
|
||||
actualType.head
|
||||
if (fields.length == 2 && actualType.length == 1) {
|
||||
actualType.head
|
||||
} else {
|
||||
// This is just a normal union, not used to designate nullability
|
||||
avroType
|
||||
}
|
||||
} else {
|
||||
avroType
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user