[HUDI-3760] Adding capability to fetch Metadata Records by prefix (#5208)
- Adding capability to fetch Metadata Records by key prefix so that Data Skipping could fetch only Column Stats - Index records pertaining to the columns being queried by, instead of reading out whole Index. - Fixed usages of HFileScanner in HFileReader. few code paths uses cached scanner if available. Other code paths uses its own HFileScanner w/ positional read. Brief change log - Rebasing ColumnStatsIndexSupport to rely on HoodieBackedTableMetadata in lieu of reading t/h Spark DS - Adding methods enabling key-prefix lookups to HoodiFileReader, HoodieHFileReader - Wiring key-prefix lookup t/h LogRecordScanner impls - Cleaning up HoodieHFileReader impl Co-authored-by: sivabalan <n.siva.b@gmail.com> Co-authored-by: Sagar Sumit <sagarsumit09@gmail.com>
This commit is contained in:
@@ -81,7 +81,7 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
// read the avro blocks
|
||||
while (reader.hasNext()) {
|
||||
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
|
||||
blk.getRecordItr().forEachRemaining(readRecords::add);
|
||||
blk.getRecordIterator().forEachRemaining(readRecords::add);
|
||||
}
|
||||
List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
|
||||
.filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION)
|
||||
@@ -155,7 +155,7 @@ public class ArchivedCommitsCommand implements CommandMarker {
|
||||
// read the avro blocks
|
||||
while (reader.hasNext()) {
|
||||
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
|
||||
recordItr.forEachRemaining(readRecords::add);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,7 +124,7 @@ public class ExportCommand implements CommandMarker {
|
||||
// read the avro blocks
|
||||
while (reader.hasNext() && copyCount < limit) {
|
||||
HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
|
||||
while (recordItr.hasNext()) {
|
||||
IndexedRecord ir = recordItr.next();
|
||||
// Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
|
||||
|
||||
@@ -122,7 +122,7 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
|
||||
}
|
||||
if (n instanceof HoodieDataBlock) {
|
||||
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
|
||||
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordIterator()) {
|
||||
recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
|
||||
}
|
||||
}
|
||||
@@ -236,7 +236,7 @@ public class HoodieLogFileCommand implements CommandMarker {
|
||||
HoodieLogBlock n = reader.next();
|
||||
if (n instanceof HoodieDataBlock) {
|
||||
HoodieDataBlock blk = (HoodieDataBlock) n;
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
|
||||
try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordIterator()) {
|
||||
recordItr.forEachRemaining(record -> {
|
||||
if (allRecords.size() < limit) {
|
||||
allRecords.add(record);
|
||||
|
||||
Reference in New Issue
Block a user