1
0

[HUDI-1828] Update unit tests to support ORC as the base file format (#3237)

This commit is contained in:
Jintao Guan
2021-07-14 09:05:42 -07:00
committed by GitHub
parent 93967404a7
commit 2debb9b3ed
14 changed files with 149 additions and 53 deletions

View File

@@ -42,6 +42,7 @@ public class HoodieAvroWriteSupport extends AvroWriteSupport {
public static final String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
public static final String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
public static final String HOODIE_BLOOM_FILTER_TYPE_CODE = "hoodie_bloom_filter_type_code";
public static final String HOODIE_AVRO_SCHEMA_METADATA_KEY = "orc.avro.schema";
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
super(schema, avroSchema);

View File

@@ -19,6 +19,8 @@
package org.apache.hudi.common.util;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -46,6 +48,8 @@ import org.apache.orc.Reader.Options;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import static org.apache.hudi.avro.HoodieAvroWriteSupport.HOODIE_AVRO_SCHEMA_METADATA_KEY;
/**
* Utility functions for ORC files.
*/
@@ -202,8 +206,7 @@ public class OrcUtils extends BaseFileUtils {
footerVals.put(footerName, metadata.get(footerName));
} else if (required) {
throw new MetadataNotFoundException(
"Could not find index in ORC footer. Looked for key " + footerName + " in "
+ orcFilePath);
"Could not find index in ORC footer. Looked for key " + footerName + " in " + orcFilePath);
}
}
return footerVals;
@@ -216,8 +219,9 @@ public class OrcUtils extends BaseFileUtils {
public Schema readAvroSchema(Configuration conf, Path orcFilePath) {
try {
Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf));
TypeDescription orcSchema = reader.getSchema();
return AvroOrcUtils.createAvroSchema(orcSchema);
ByteBuffer schemaBuffer = reader.getMetadataValue(HOODIE_AVRO_SCHEMA_METADATA_KEY);
String schemaText = StandardCharsets.UTF_8.decode(schemaBuffer).toString();
return new Schema.Parser().parse(schemaText);
} catch (IOException io) {
throw new HoodieIOException("Unable to get Avro schema for ORC file:" + orcFilePath, io);
}