1
0

[HUDI-1445] Refactor AbstractHoodieLogRecordScanner to use Builder (#2313)

This commit is contained in:
Danny Chan
2020-12-10 20:02:02 +08:00
committed by GitHub
parent bd9cceccb5
commit 4bc45a391a
11 changed files with 431 additions and 82 deletions

View File

@@ -63,17 +63,18 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
// NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
// but can return records for completed commits > the commit we are trying to read (if using
// readCommit() API)
return new HoodieMergedLogRecordScanner(
FSUtils.getFs(split.getPath().toString(), jobConf),
split.getBasePath(),
split.getDeltaLogPaths(),
usesCustomPayload ? getWriterSchema() : getReaderSchema(),
split.getMaxCommitTime(),
HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf),
Boolean.parseBoolean(jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
false,
jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE),
jobConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH));
return HoodieMergedLogRecordScanner.newBuilder()
.withFileSystem(FSUtils.getFs(split.getPath().toString(), jobConf))
.withBasePath(split.getBasePath())
.withLogFilePaths(split.getDeltaLogPaths())
.withReaderSchema(usesCustomPayload ? getWriterSchema() : getReaderSchema())
.withLatestInstantTime(split.getMaxCommitTime())
.withMaxMemorySizeInBytes(HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf))
.withReadBlocksLazily(Boolean.parseBoolean(jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)))
.withReverseReader(false)
.withBufferSize(jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
.withSpillableMapBasePath(jobConf.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
.build();
}
@Override

View File

@@ -77,15 +77,22 @@ class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
Option.empty(), x -> x, new DefaultSizeEstimator<>());
// Consumer of this record reader
this.iterator = this.executor.getQueue().iterator();
this.logRecordScanner = new HoodieUnMergedLogRecordScanner(FSUtils.getFs(split.getPath().toString(), this.jobConf),
split.getBasePath(), split.getDeltaLogPaths(), getReaderSchema(), split.getMaxCommitTime(),
Boolean.parseBoolean(this.jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
false, this.jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE), record -> {
this.logRecordScanner = HoodieUnMergedLogRecordScanner.newBuilder()
.withFileSystem(FSUtils.getFs(split.getPath().toString(), this.jobConf))
.withBasePath(split.getBasePath())
.withLogFilePaths(split.getDeltaLogPaths())
.withReaderSchema(getReaderSchema())
.withLatestInstantTime(split.getMaxCommitTime())
.withReadBlocksLazily(Boolean.parseBoolean(this.jobConf.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)))
.withReverseReader(false)
.withBufferSize(this.jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
.withLogRecordScannerCallback(record -> {
// convert Hoodie log record to Hadoop AvroWritable and buffer
GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema()).get();
ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(rec, getHiveSchema());
this.executor.getQueue().insertRecord(aWritable);
});
})
.build();
// Start reading and buffering
this.executor.startProducers();
}