From 5388c7f7a3efb84516af71a2533e8953113b30da Mon Sep 17 00:00:00 2001 From: Balajee Nagasubramaniam <47542891+nbalajee@users.noreply.github.com> Date: Fri, 18 Dec 2020 03:18:52 -0800 Subject: [PATCH] [HUDI-1470] Use the latest writer schema, when reading from existing parquet files in the hudi-test-suite (#2344) --- .../integ/testsuite/reader/DFSHoodieDatasetInputReader.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java index 2bd507ca5..bc7803d9d 100644 --- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java +++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSHoodieDatasetInputReader.java @@ -52,6 +52,7 @@ import org.apache.hudi.common.util.ParquetReaderIterator; import org.apache.hudi.common.util.ValidationUtils; import org.apache.hudi.config.HoodieMemoryConfig; import org.apache.parquet.avro.AvroParquetReader; +import org.apache.parquet.avro.AvroReadSupport; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; @@ -243,6 +244,9 @@ public class DFSHoodieDatasetInputReader extends DFSDeltaInputReader { private Iterator readParquetOrLogFiles(FileSlice fileSlice) throws IOException { if (fileSlice.getBaseFile().isPresent()) { + // Read the parquet files using the latest writer schema. + Schema schema = new Schema.Parser().parse(schemaStr); + AvroReadSupport.setAvroReadSchema(metaClient.getHadoopConf(), HoodieAvroUtils.addMetadataFields(schema)); Iterator itr = new ParquetReaderIterator(AvroParquetReader.builder(new Path(fileSlice.getBaseFile().get().getPath())).withConf(metaClient.getHadoopConf()).build());