1
0

[HUDI-3664] Fixing Column Stats Index composition (#5181)

Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
Alexey Kudinkin
2022-04-02 17:15:52 -07:00
committed by GitHub
parent 74eb09be9b
commit cc3737be50
52 changed files with 1776 additions and 749 deletions

View File

@@ -18,7 +18,7 @@
package org.apache.spark.sql
import org.apache.spark.HoodieSparkTypeUtils.isCastPreservingOrdering
import HoodieSparkTypeUtils.isCastPreservingOrdering
import org.apache.spark.sql.catalyst.expressions.{Add, AttributeReference, BitwiseOr, Cast, DateAdd, DateDiff, DateFormatClass, DateSub, Divide, Exp, Expm1, Expression, FromUTCTimestamp, FromUnixTime, Log, Log10, Log1p, Log2, Lower, Multiply, ParseToDate, ParseToTimestamp, ShiftLeft, ShiftRight, ToUTCTimestamp, ToUnixTimestamp, Upper}
object HoodieSpark3_1CatalystExpressionUtils extends HoodieCatalystExpressionUtils {

View File

@@ -196,6 +196,11 @@ private[sql] class AvroSerializer(rootCatalystType: DataType,
val numFields = st.length
(getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
case (st: StructType, UNION) =>
val unionConverter = newUnionConverter(st, avroType)
val numFields = st.length
(getter, ordinal) => unionConverter(getter.getStruct(ordinal, numFields))
case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
val valueConverter = newConverter(
vt, resolveNullableType(avroType.getValueType, valueContainsNull))
@@ -223,8 +228,7 @@ private[sql] class AvroSerializer(rootCatalystType: DataType,
}
}
private def newStructConverter(
catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
private def newStructConverter(catalystStruct: StructType, avroStruct: Schema): InternalRow => Record = {
if (avroStruct.getType != RECORD || avroStruct.getFields.size() != catalystStruct.length) {
throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
s"Avro type $avroStruct.")
@@ -258,6 +262,47 @@ private[sql] class AvroSerializer(rootCatalystType: DataType,
result
}
private def newUnionConverter(catalystStruct: StructType, avroUnion: Schema): InternalRow => Any = {
if (avroUnion.getType != UNION || !canMapUnion(catalystStruct, avroUnion)) {
throw new IncompatibleSchemaException(s"Cannot convert Catalyst type $catalystStruct to " +
s"Avro type $avroUnion.")
}
val nullable = avroUnion.getTypes.size() > 0 && avroUnion.getTypes.get(0).getType == Type.NULL
val avroInnerTypes = if (nullable) {
avroUnion.getTypes.asScala.tail
} else {
avroUnion.getTypes.asScala
}
val fieldConverters = catalystStruct.zip(avroInnerTypes).map {
case (f1, f2) => newConverter(f1.dataType, f2)
}
val numFields = catalystStruct.length
(row: InternalRow) =>
var i = 0
var result: Any = null
while (i < numFields) {
if (!row.isNullAt(i)) {
if (result != null) {
throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
s"Avro union $avroUnion. Record has more than one optional values set")
}
result = fieldConverters(i).apply(row, i)
}
i += 1
}
if (!nullable && result == null) {
throw new IncompatibleSchemaException(s"Cannot convert Catalyst record $catalystStruct to " +
s"Avro union $avroUnion. Record has no values set, while should have exactly one")
}
result
}
private def canMapUnion(catalystStruct: StructType, avroStruct: Schema): Boolean = {
(avroStruct.getTypes.size() > 0 &&
avroStruct.getTypes.get(0).getType == Type.NULL &&
avroStruct.getTypes.size() - 1 == catalystStruct.length) || avroStruct.getTypes.size() == catalystStruct.length
}
/**
* Resolve a possibly nullable Avro Type.
*
@@ -285,12 +330,12 @@ private[sql] class AvroSerializer(rootCatalystType: DataType,
if (avroType.getType == Type.UNION) {
val fields = avroType.getTypes.asScala
val actualType = fields.filter(_.getType != Type.NULL)
if (fields.length != 2 || actualType.length != 1) {
throw new UnsupportedAvroTypeException(
s"Unsupported Avro UNION type $avroType: Only UNION of a null type and a non-null " +
"type is supported")
if (fields.length == 2 && actualType.length == 1) {
(true, actualType.head)
} else {
// This is just a normal union, not used to designate nullability
(false, avroType)
}
(true, actualType.head)
} else {
(false, avroType)
}