359 lines
18 KiB
JSON
359 lines
18 KiB
JSON
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. The ASF licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "HoodieMetadataRecord",
|
|
"doc": "A record saved within the Metadata Table",
|
|
"fields": [
|
|
{
|
|
"name": "key",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"name": "type",
|
|
"doc": "Type of the metadata record",
|
|
"type": "int"
|
|
},
|
|
{
|
|
"doc": "Contains information about partitions and files within the dataset",
|
|
"name": "filesystemMetadata",
|
|
"type": [
|
|
"null",
|
|
{
|
|
"type": "map",
|
|
"values": {
|
|
"type": "record",
|
|
"name": "HoodieMetadataFileInfo",
|
|
"fields": [
|
|
{
|
|
"name": "size",
|
|
"type": "long",
|
|
"doc": "Size of the file"
|
|
},
|
|
{
|
|
"name": "isDeleted",
|
|
"type": "boolean",
|
|
"doc": "True if this file has been deleted"
|
|
}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"doc": "Metadata Index of bloom filters for all data files in the user table",
|
|
"name": "BloomFilterMetadata",
|
|
"type": [
|
|
"null",
|
|
{
|
|
"doc": "Data file bloom filter details",
|
|
"name": "HoodieMetadataBloomFilter",
|
|
"type": "record",
|
|
"fields": [
|
|
{
|
|
"doc": "Bloom filter type code",
|
|
"name": "type",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"doc": "Instant timestamp when this metadata was created/updated",
|
|
"name": "timestamp",
|
|
"type": "string"
|
|
},
|
|
{
|
|
"doc": "Bloom filter binary byte array",
|
|
"name": "bloomFilter",
|
|
"type": "bytes"
|
|
},
|
|
{
|
|
"doc": "Bloom filter entry valid/deleted flag",
|
|
"name": "isDeleted",
|
|
"type": "boolean"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"default" : null
|
|
},
|
|
{
|
|
"doc": "Metadata Index of column statistics for all data files in the user table",
|
|
"name": "ColumnStatsMetadata",
|
|
"type": [
|
|
"null",
|
|
{
|
|
"doc": "Data file column statistics",
|
|
"name": "HoodieMetadataColumnStats",
|
|
"type": "record",
|
|
"fields": [
|
|
{
|
|
"doc": "File name for which this column statistics applies",
|
|
"name": "fileName",
|
|
"type": [
|
|
"null",
|
|
"string"
|
|
]
|
|
},
|
|
{
|
|
"doc": "Column name for which this column statistics applies",
|
|
"name": "columnName",
|
|
"type": [
|
|
"null",
|
|
"string"
|
|
],
|
|
"default" : null
|
|
},
|
|
{
|
|
"doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
|
|
"name": "minValue",
|
|
"type": [
|
|
// Those types should be aligned with Parquet `Statistics` impl
|
|
// making sure that we implement semantic consistent across file formats
|
|
//
|
|
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
|
|
// into one of the following types, making sure that their corresponding
|
|
// ordering is preserved
|
|
"null",
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "BooleanWrapper",
|
|
"doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "boolean",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "IntWrapper",
|
|
"doc": "A record wrapping int type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "int",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "LongWrapper",
|
|
"doc": "A record wrapping long type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "long",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "FloatWrapper",
|
|
"doc": "A record wrapping float type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "float",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "DoubleWrapper",
|
|
"doc": "A record wrapping double type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "double",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "BytesWrapper",
|
|
"doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "bytes",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "StringWrapper",
|
|
"doc": "A record wrapping string type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": "string",
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "DateWrapper",
|
|
"doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": {
|
|
"type": "int"
|
|
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
|
|
// rely on logical types to do proper encoding of the native Java types,
|
|
// and hereby have to encode statistic manually
|
|
//"logicalType": "date"
|
|
},
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "DecimalWrapper",
|
|
"doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": {
|
|
"type": "bytes",
|
|
"logicalType": "decimal",
|
|
// NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should
|
|
// be enough for almost any possible use-cases
|
|
"precision": 30,
|
|
"scale": 15
|
|
},
|
|
"name": "value"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "TimeMicrosWrapper",
|
|
"doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": {
|
|
"type": "long",
|
|
"logicalType": "time-micros"
|
|
},
|
|
"name": "value"
|
|
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"namespace": "org.apache.hudi.avro.model",
|
|
"type": "record",
|
|
"name": "TimestampMicrosWrapper",
|
|
"doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union",
|
|
"fields": [
|
|
{
|
|
"type": {
|
|
"type": "long"
|
|
// NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't
|
|
// rely on logical types to do proper encoding of the native Java types,
|
|
// and hereby have to encode statistic manually
|
|
//"logicalType": "timestamp-micros"
|
|
},
|
|
"name": "value"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"default": null
|
|
},
|
|
{
|
|
"doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
|
|
"name": "maxValue",
|
|
"type": [
|
|
// Those types should be aligned with Parquet `Statistics` impl
|
|
// making sure that we implement semantic consistent across file formats
|
|
//
|
|
// NOTE: Other logical types (decimal, date, timestamp, etc) will be converted
|
|
// into one of the following types, making sure that their corresponding
|
|
// ordering is preserved
|
|
"null",
|
|
"org.apache.hudi.avro.model.BooleanWrapper",
|
|
"org.apache.hudi.avro.model.IntWrapper",
|
|
"org.apache.hudi.avro.model.LongWrapper",
|
|
"org.apache.hudi.avro.model.FloatWrapper",
|
|
"org.apache.hudi.avro.model.DoubleWrapper",
|
|
"org.apache.hudi.avro.model.BytesWrapper",
|
|
"org.apache.hudi.avro.model.StringWrapper",
|
|
"org.apache.hudi.avro.model.DateWrapper",
|
|
"org.apache.hudi.avro.model.DecimalWrapper",
|
|
"org.apache.hudi.avro.model.TimeMicrosWrapper",
|
|
"org.apache.hudi.avro.model.TimestampMicrosWrapper"
|
|
],
|
|
"default": null
|
|
},
|
|
{
|
|
"doc": "Total count of values",
|
|
"name": "valueCount",
|
|
"type": [
|
|
"null",
|
|
"long"
|
|
]
|
|
},
|
|
{
|
|
"doc": "Total count of null values",
|
|
"name": "nullCount",
|
|
"type": [
|
|
"null",
|
|
"long"
|
|
]
|
|
},
|
|
{
|
|
"doc": "Total storage size on disk",
|
|
"name": "totalSize",
|
|
"type": [
|
|
"null",
|
|
"long"
|
|
]
|
|
},
|
|
{
|
|
"doc": "Total uncompressed storage size on disk",
|
|
"name": "totalUncompressedSize",
|
|
"type": [
|
|
"null",
|
|
"long"
|
|
]
|
|
},
|
|
{
|
|
"doc": "Column range entry valid/deleted flag",
|
|
"name": "isDeleted",
|
|
"type": "boolean"
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"default" : null
|
|
}
|
|
]
|
|
}
|