From 383d5edc169b79c4022f81a1580ab1bc2afebc30 Mon Sep 17 00:00:00 2001 From: Manoj Govindassamy Date: Fri, 3 Dec 2021 11:18:10 -0800 Subject: [PATCH] [HUDI-2894][HUDI-2905] Metadata table - avoiding key lookup failures on base files over S3 (#4185) - Fetching partition files or all partitions from the metadata table is failing when run over S3. Metadata table uses HFile format for the base files and the record lookup uses HFile.Reader and HFileScanner interfaces to get records by partition keys. When the backing storage is S3, this record lookup from HFiles is failing with IOException, in turn failing the caller commit/update operations. - Metadata table looks up HFile records with positional read enabled so as to perform better for random lookups. But this positional read key lookup is returning with partial read sizes over S3 leading to HFile scanner throwing IOException. This doesn't happen over HDFS. Metadata table though uses the HFile for random key lookups, the positional read is not mandatory as we sort the keys when doing a lookup for multiple keys. - The fix is to disable HFile positional read for all HFile scanner based key lookups. --- .../hudi/common/bootstrap/index/HFileBootstrapIndex.java | 8 ++++---- .../org/apache/hudi/io/storage/HoodieHFileReader.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index d4a77b082..3700d01a6 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -291,13 +291,13 @@ public class HFileBootstrapIndex extends BootstrapIndex { @Override public List getIndexedPartitionPaths() { - HFileScanner scanner = partitionIndexReader().getScanner(true, true); + HFileScanner scanner = partitionIndexReader().getScanner(true, false); return getAllKeys(scanner, HFileBootstrapIndex::getPartitionFromKey); } @Override public List getIndexedFileGroupIds() { - HFileScanner scanner = fileIdIndexReader().getScanner(true, true); + HFileScanner scanner = fileIdIndexReader().getScanner(true, false); return getAllKeys(scanner, HFileBootstrapIndex::getFileGroupFromKey); } @@ -319,7 +319,7 @@ public class HFileBootstrapIndex extends BootstrapIndex { @Override public List getSourceFileMappingForPartition(String partition) { try { - HFileScanner scanner = partitionIndexReader().getScanner(true, true); + HFileScanner scanner = partitionIndexReader().getScanner(true, false); KeyValue keyValue = new KeyValue(Bytes.toBytes(getPartitionKey(partition)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); if (scanner.seekTo(keyValue) == 0) { @@ -352,7 +352,7 @@ public class HFileBootstrapIndex extends BootstrapIndex { List fileGroupIds = new ArrayList<>(ids); Collections.sort(fileGroupIds); try { - HFileScanner scanner = fileIdIndexReader().getScanner(true, true); + HFileScanner scanner = fileIdIndexReader().getScanner(true, false); for (HoodieFileGroupId fileGroupId : fileGroupIds) { KeyValue keyValue = new KeyValue(Bytes.toBytes(getFileGroupKey(fileGroupId)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 7b80d1a58..e3e38eca8 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -246,7 +246,7 @@ public class HoodieHFileReader implements HoodieFileRea synchronized (this) { if (keyScanner == null) { - keyScanner = reader.getScanner(false, true); + keyScanner = reader.getScanner(false, false); } if (keyScanner.seekTo(kv) == 0) {