[HUDI-3664] Fixing Column Stats Index composition (#5181)
Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
@@ -122,17 +122,195 @@
|
||||
"doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
|
||||
"name": "minValue",
|
||||
"type": [
|
||||
// Those types should be aligned with Parquet `Statistics` impl
|
||||
// making sure that we implement semantic consistent across file formats
|
||||
//
|
||||
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
|
||||
// into one of the following types, making sure that their corresponding
|
||||
// ordering is preserved
|
||||
"null",
|
||||
"string"
|
||||
]
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "BooleanWrapper",
|
||||
"doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "boolean",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "IntWrapper",
|
||||
"doc": "A record wrapping int type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "int",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "LongWrapper",
|
||||
"doc": "A record wrapping long type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "long",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "FloatWrapper",
|
||||
"doc": "A record wrapping float type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "float",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "DoubleWrapper",
|
||||
"doc": "A record wrapping double type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "double",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "BytesWrapper",
|
||||
"doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "bytes",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "StringWrapper",
|
||||
"doc": "A record wrapping string type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": "string",
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "DateWrapper",
|
||||
"doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": {
|
||||
"type": "int"
|
||||
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
|
||||
// rely on logical types to do proper encoding of the native Java types,
|
||||
// and hereby have to encode statistic manually
|
||||
//"logicalType": "date"
|
||||
},
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "DecimalWrapper",
|
||||
"doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": {
|
||||
"type": "bytes",
|
||||
"logicalType": "decimal",
|
||||
// NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should
|
||||
// be enough for almost any possible use-cases
|
||||
"precision": 30,
|
||||
"scale": 15
|
||||
},
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "TimeMicrosWrapper",
|
||||
"doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": {
|
||||
"type": "long",
|
||||
"logicalType": "time-micros"
|
||||
},
|
||||
"name": "value"
|
||||
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"namespace": "org.apache.hudi.avro.model",
|
||||
"type": "record",
|
||||
"name": "TimestampMicrosWrapper",
|
||||
"doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union",
|
||||
"fields": [
|
||||
{
|
||||
"type": {
|
||||
"type": "long"
|
||||
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
|
||||
// rely on logical types to do proper encoding of the native Java types,
|
||||
// and hereby have to encode statistic manually
|
||||
//"logicalType": "timestamp-micros"
|
||||
},
|
||||
"name": "value"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
"doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
|
||||
"name": "maxValue",
|
||||
"type": [
|
||||
// Those types should be aligned with Parquet `Statistics` impl
|
||||
// making sure that we implement semantic consistent across file formats
|
||||
//
|
||||
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
|
||||
// into one of the following types, making sure that their corresponding
|
||||
// ordering is preserved
|
||||
"null",
|
||||
"string"
|
||||
]
|
||||
"org.apache.hudi.avro.model.BooleanWrapper",
|
||||
"org.apache.hudi.avro.model.IntWrapper",
|
||||
"org.apache.hudi.avro.model.LongWrapper",
|
||||
"org.apache.hudi.avro.model.FloatWrapper",
|
||||
"org.apache.hudi.avro.model.DoubleWrapper",
|
||||
"org.apache.hudi.avro.model.BytesWrapper",
|
||||
"org.apache.hudi.avro.model.StringWrapper",
|
||||
"org.apache.hudi.avro.model.DateWrapper",
|
||||
"org.apache.hudi.avro.model.DecimalWrapper",
|
||||
"org.apache.hudi.avro.model.TimeMicrosWrapper",
|
||||
"org.apache.hudi.avro.model.TimestampMicrosWrapper"
|
||||
],
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
"doc": "Total count of values",
|
||||
|
||||
Reference in New Issue
Block a user