/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "HoodieMetadataRecord", "doc": "A record saved within the Metadata Table", "fields": [ { "name": "key", "type": "string" }, { "name": "type", "doc": "Type of the metadata record", "type": "int" }, { "doc": "Contains information about partitions and files within the dataset", "name": "filesystemMetadata", "type": [ "null", { "type": "map", "values": { "type": "record", "name": "HoodieMetadataFileInfo", "fields": [ { "name": "size", "type": "long", "doc": "Size of the file" }, { "name": "isDeleted", "type": "boolean", "doc": "True if this file has been deleted" } ] } } ] }, { "doc": "Metadata Index of bloom filters for all data files in the user table", "name": "BloomFilterMetadata", "type": [ "null", { "doc": "Data file bloom filter details", "name": "HoodieMetadataBloomFilter", "type": "record", "fields": [ { "doc": "Bloom filter type code", "name": "type", "type": "string" }, { "doc": "Instant timestamp when this metadata was created/updated", "name": "timestamp", "type": "string" }, { "doc": "Bloom filter binary byte array", "name": "bloomFilter", "type": "bytes" }, { "doc": "Bloom filter entry valid/deleted flag", "name": "isDeleted", "type": "boolean" } ] } ], "default" : null }, { "doc": "Metadata Index of column statistics for all data files in the user table", "name": "ColumnStatsMetadata", "type": [ "null", { "doc": "Data file column statistics", "name": "HoodieMetadataColumnStats", "type": "record", "fields": [ { "doc": "File name for which this column statistics applies", "name": "fileName", "type": [ "null", "string" ] }, { "doc": "Column name for which this column statistics applies", "name": "columnName", "type": [ "null", "string" ], "default" : null }, { "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type", "name": "minValue", "type": [ // Those types should be aligned with Parquet `Statistics` impl // making sure that we implement semantic consistent across file formats // // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted // into one of the following types, making sure that their corresponding // ordering is preserved "null", { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "BooleanWrapper", "doc": "A record wrapping boolean type to be able to be used it w/in Avro's Union", "fields": [ { "type": "boolean", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "IntWrapper", "doc": "A record wrapping int type to be able to be used it w/in Avro's Union", "fields": [ { "type": "int", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "LongWrapper", "doc": "A record wrapping long type to be able to be used it w/in Avro's Union", "fields": [ { "type": "long", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "FloatWrapper", "doc": "A record wrapping float type to be able to be used it w/in Avro's Union", "fields": [ { "type": "float", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "DoubleWrapper", "doc": "A record wrapping double type to be able to be used it w/in Avro's Union", "fields": [ { "type": "double", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "BytesWrapper", "doc": "A record wrapping bytes type to be able to be used it w/in Avro's Union", "fields": [ { "type": "bytes", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "StringWrapper", "doc": "A record wrapping string type to be able to be used it w/in Avro's Union", "fields": [ { "type": "string", "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "DateWrapper", "doc": "A record wrapping Date logical type to be able to be used it w/in Avro's Union", "fields": [ { "type": { "type": "int" // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't // rely on logical types to do proper encoding of the native Java types, // and hereby have to encode statistic manually //"logicalType": "date" }, "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "DecimalWrapper", "doc": "A record wrapping Decimal logical type to be able to be used it w/in Avro's Union", "fields": [ { "type": { "type": "bytes", "logicalType": "decimal", // NOTE: This is equivalent to Spark's [[DoubleDecimal]] and should // be enough for almost any possible use-cases "precision": 30, "scale": 15 }, "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "TimeMicrosWrapper", "doc": "A record wrapping Time-micros logical type to be able to be used it w/in Avro's Union", "fields": [ { "type": { "type": "long", "logicalType": "time-micros" }, "name": "value" } ] }, { "namespace": "org.apache.hudi.avro.model", "type": "record", "name": "TimestampMicrosWrapper", "doc": "A record wrapping Timestamp-micros logical type to be able to be used it w/in Avro's Union", "fields": [ { "type": { "type": "long" // NOTE: Due to breaking changes in code-gen b/w Avro 1.8.2 and 1.10, we can't // rely on logical types to do proper encoding of the native Java types, // and hereby have to encode statistic manually //"logicalType": "timestamp-micros" }, "name": "value" } ] } ], "default": null }, { "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type", "name": "maxValue", "type": [ // Those types should be aligned with Parquet `Statistics` impl // making sure that we implement semantic consistent across file formats // // NOTE: Other logical types (decimal, date, timestamp, etc) will be converted // into one of the following types, making sure that their corresponding // ordering is preserved "null", "org.apache.hudi.avro.model.BooleanWrapper", "org.apache.hudi.avro.model.IntWrapper", "org.apache.hudi.avro.model.LongWrapper", "org.apache.hudi.avro.model.FloatWrapper", "org.apache.hudi.avro.model.DoubleWrapper", "org.apache.hudi.avro.model.BytesWrapper", "org.apache.hudi.avro.model.StringWrapper", "org.apache.hudi.avro.model.DateWrapper", "org.apache.hudi.avro.model.DecimalWrapper", "org.apache.hudi.avro.model.TimeMicrosWrapper", "org.apache.hudi.avro.model.TimestampMicrosWrapper" ], "default": null }, { "doc": "Total count of values", "name": "valueCount", "type": [ "null", "long" ] }, { "doc": "Total count of null values", "name": "nullCount", "type": [ "null", "long" ] }, { "doc": "Total storage size on disk", "name": "totalSize", "type": [ "null", "long" ] }, { "doc": "Total uncompressed storage size on disk", "name": "totalUncompressedSize", "type": [ "null", "long" ] }, { "doc": "Column range entry valid/deleted flag", "name": "isDeleted", "type": "boolean" } ] } ], "default" : null } ] }