1
0

[minor] Checks the data block type for archived timeline (#5106)

This commit is contained in:
Danny Chan
2022-03-24 14:10:43 +08:00
committed by GitHub
parent 52f0498330
commit a1c42fcc07
2 changed files with 14 additions and 39 deletions

View File

@@ -27,6 +27,7 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.FileIOUtils;
@@ -248,16 +249,19 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
int instantsInPreviousFile = instantsInRange.size();
// Read the avro blocks
while (reader.hasNext()) {
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
// TODO If we can store additional metadata in datablock, we can skip parsing records
// (such as startTime, endTime of records in the block)
try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
// Filter blocks in desired time window
.filter(r -> commitsFilter.apply((GenericRecord) r))
.map(r -> readCommit((GenericRecord) r, loadInstantDetails))
.filter(c -> filter == null || filter.isInRange(c))
.forEach(instantsInRange::add);
HoodieLogBlock block = reader.next();
if (block instanceof HoodieAvroDataBlock) {
HoodieAvroDataBlock avroBlock = (HoodieAvroDataBlock) block;
// TODO If we can store additional metadata in datablock, we can skip parsing records
// (such as startTime, endTime of records in the block)
try (ClosableIterator<IndexedRecord> itr = avroBlock.getRecordItr()) {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
// Filter blocks in desired time window
.filter(r -> commitsFilter.apply((GenericRecord) r))
.map(r -> readCommit((GenericRecord) r, loadInstantDetails))
.filter(c -> filter == null || filter.isInRange(c))
.forEach(instantsInRange::add);
}
}
}