1
0

[HUDI-684] Introduced abstraction for writing and reading different types of base file formats. (#1687)

Notable changes:
    1. HoodieFileWriter and HoodieFileReader abstractions for writer/reader side of a base file format
    2. HoodieDataBlock abstraction for creation specific data blocks for base file formats. (e.g. Parquet has HoodieAvroDataBlock)
    3. All hardocded references to Parquet / Parquet based classes have been abstracted to call methods which accept a base file format
    4. HiveSyncTool accepts the base file format as a CLI parameter
    5. HoodieDeltaStreamer accepts the base file format as a CLI parameter
    6. HoodieSparkSqlWriter accepts the base file format as a parameter
This commit is contained in:
Prashant Wason
2020-06-25 23:46:55 -07:00
committed by GitHub
parent 5e47673341
commit 2603cfb33e
55 changed files with 1086 additions and 466 deletions

View File

@@ -30,8 +30,8 @@ import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieCorruptBlock;
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
@@ -118,8 +118,8 @@ public class HoodieLogFileCommand implements CommandMarker {
dummyInstantTimeCount++;
instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
}
if (n instanceof HoodieAvroDataBlock) {
recordCount = ((HoodieAvroDataBlock) n).getRecords().size();
if (n instanceof HoodieDataBlock) {
recordCount = ((HoodieDataBlock) n).getRecords().size();
}
}
if (commitCountAndMetadata.containsKey(instantTime)) {
@@ -215,8 +215,8 @@ public class HoodieLogFileCommand implements CommandMarker {
// read the avro blocks
while (reader.hasNext()) {
HoodieLogBlock n = reader.next();
if (n instanceof HoodieAvroDataBlock) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) n;
if (n instanceof HoodieDataBlock) {
HoodieDataBlock blk = (HoodieDataBlock) n;
List<IndexedRecord> records = blk.getRecords();
for (IndexedRecord record : records) {
if (allRecords.size() < limit) {