[HUDI-431] Adding support for Parquet in MOR LogBlocks (#4333)
- Adding support for Parquet in MOR tables Log blocks Co-authored-by: Sivabalan Narayanan <n.siva.b@gmail.com>
This commit is contained in:
@@ -163,6 +163,12 @@ public class TestHoodieRealtimeRecordReader {
|
||||
HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParquetInlineReader() throws Exception {
|
||||
testReaderInternal(ExternalSpillableMap.DiskMapType.BITCASK, false, false,
|
||||
HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK);
|
||||
}
|
||||
|
||||
private void testReaderInternal(ExternalSpillableMap.DiskMapType diskMapType,
|
||||
boolean isCompressionEnabled,
|
||||
boolean partitioned) throws Exception {
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi.hadoop.testutils;
|
||||
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.RawLocalFileSystem;
|
||||
import org.apache.hadoop.hbase.io.compress.Compression;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieFileFormat;
|
||||
@@ -33,6 +34,7 @@ import org.apache.hudi.common.table.log.block.HoodieCommandBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
|
||||
import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
|
||||
@@ -47,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
|
||||
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.parquet.avro.AvroParquetWriter;
|
||||
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
@@ -363,9 +366,14 @@ public class InputFormatTestUtil {
|
||||
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, newCommit);
|
||||
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
|
||||
HoodieDataBlock dataBlock = (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK)
|
||||
? new HoodieHFileDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD) :
|
||||
new HoodieAvroDataBlock(records, header);
|
||||
HoodieDataBlock dataBlock = null;
|
||||
if (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) {
|
||||
dataBlock = new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ);
|
||||
} else if (logBlockType == HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK) {
|
||||
dataBlock = new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
|
||||
} else {
|
||||
dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
|
||||
}
|
||||
writer.appendBlock(dataBlock);
|
||||
return writer;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user