Introduce config to control interval tree pruning
- turned on by default - Minor code refactoring/restructuring
This commit is contained in:
committed by
vinoth chandar
parent
7129dc5bb7
commit
ea20d47248
@@ -45,13 +45,12 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
|
|||||||
public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
|
public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
|
||||||
public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
|
public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
|
||||||
public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
|
public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
|
||||||
|
public static final String BLOOM_INDEX_TREE_BASED_FILTER_PROP = "hoodie.bloom.index.use.treebased.filter";
|
||||||
|
public static final String DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER = "true";
|
||||||
public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL =
|
public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL =
|
||||||
"hoodie.bloom.index.input.storage" + ".level";
|
"hoodie.bloom.index.input.storage" + ".level";
|
||||||
public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
|
public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
|
||||||
|
|
||||||
// ***** Bucketed Index Configs *****
|
|
||||||
public static final String BUCKETED_INDEX_NUM_BUCKETS_PROP = "hoodie.index.bucketed.numbuckets";
|
|
||||||
|
|
||||||
private HoodieIndexConfig(Properties props) {
|
private HoodieIndexConfig(Properties props) {
|
||||||
super(props);
|
super(props);
|
||||||
}
|
}
|
||||||
@@ -114,12 +113,11 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder numBucketsPerPartition(int numBuckets) {
|
public Builder bloomIndexTreebasedFilter(boolean useTreeFilter) {
|
||||||
props.setProperty(BUCKETED_INDEX_NUM_BUCKETS_PROP, String.valueOf(numBuckets));
|
props.setProperty(BLOOM_INDEX_TREE_BASED_FILTER_PROP, String.valueOf(useTreeFilter));
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Builder withBloomIndexInputStorageLevel(String level) {
|
public Builder withBloomIndexInputStorageLevel(String level) {
|
||||||
props.setProperty(BLOOM_INDEX_INPUT_STORAGE_LEVEL, level);
|
props.setProperty(BLOOM_INDEX_INPUT_STORAGE_LEVEL, level);
|
||||||
return this;
|
return this;
|
||||||
@@ -141,6 +139,8 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
|
|||||||
BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
|
BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
|
||||||
setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL),
|
setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL),
|
||||||
BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL);
|
BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL);
|
||||||
|
setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_TREE_BASED_FILTER_PROP),
|
||||||
|
BLOOM_INDEX_TREE_BASED_FILTER_PROP, DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER);
|
||||||
// Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
|
// Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
|
||||||
HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
|
HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
|
||||||
return config;
|
return config;
|
||||||
|
|||||||
@@ -324,8 +324,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
|||||||
return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
|
return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getNumBucketsPerPartition() {
|
public boolean useBloomIndexTreebasedFilter() {
|
||||||
return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP));
|
return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_TREE_BASED_FILTER_PROP));
|
||||||
}
|
}
|
||||||
|
|
||||||
public StorageLevel getBloomIndexInputStorageLevel() {
|
public StorageLevel getBloomIndexInputStorageLevel() {
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ import com.uber.hoodie.config.HoodieWriteConfig;
|
|||||||
import com.uber.hoodie.exception.MetadataNotFoundException;
|
import com.uber.hoodie.exception.MetadataNotFoundException;
|
||||||
import com.uber.hoodie.index.HoodieIndex;
|
import com.uber.hoodie.index.HoodieIndex;
|
||||||
import com.uber.hoodie.table.HoodieTable;
|
import com.uber.hoodie.table.HoodieTable;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -304,9 +303,9 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
|||||||
JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(
|
JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(
|
||||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
|
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
|
||||||
JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
|
JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
|
||||||
IndexFileFilter indexFileFilter = config.getBloomIndexPruneByRanges()
|
IndexFileFilter indexFileFilter = config.useBloomIndexTreebasedFilter()
|
||||||
? new IntervalTreeBasedIndexFileFilter(partitionToFileIndexInfo)
|
? new IntervalTreeBasedIndexFileFilter(partitionToFileIndexInfo)
|
||||||
: new SimpleIndexFileFilter(partitionToFileIndexInfo);
|
: new ListBasedIndexFileFilter(partitionToFileIndexInfo);
|
||||||
return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
|
return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
|
||||||
String recordKey = partitionRecordKeyPair._2();
|
String recordKey = partitionRecordKeyPair._2();
|
||||||
String partitionPath = partitionRecordKeyPair._1();
|
String partitionPath = partitionRecordKeyPair._1();
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ package com.uber.hoodie.index.bloom;
|
|||||||
import com.uber.hoodie.common.BloomFilter;
|
import com.uber.hoodie.common.BloomFilter;
|
||||||
import com.uber.hoodie.common.model.HoodieKey;
|
import com.uber.hoodie.common.model.HoodieKey;
|
||||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||||
|
import com.uber.hoodie.common.util.HoodieTimer;
|
||||||
import com.uber.hoodie.common.util.ParquetUtils;
|
import com.uber.hoodie.common.util.ParquetUtils;
|
||||||
import com.uber.hoodie.exception.HoodieException;
|
import com.uber.hoodie.exception.HoodieException;
|
||||||
import com.uber.hoodie.exception.HoodieIndexException;
|
import com.uber.hoodie.exception.HoodieIndexException;
|
||||||
@@ -65,11 +66,12 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
try {
|
try {
|
||||||
// Load all rowKeys from the file, to double-confirm
|
// Load all rowKeys from the file, to double-confirm
|
||||||
if (!candidateRecordKeys.isEmpty()) {
|
if (!candidateRecordKeys.isEmpty()) {
|
||||||
|
HoodieTimer timer = new HoodieTimer().startTimer();
|
||||||
Set<String> fileRowKeys = ParquetUtils.filterParquetRowKeys(configuration, filePath,
|
Set<String> fileRowKeys = ParquetUtils.filterParquetRowKeys(configuration, filePath,
|
||||||
new HashSet<>(candidateRecordKeys));
|
new HashSet<>(candidateRecordKeys));
|
||||||
foundRecordKeys.addAll(fileRowKeys);
|
foundRecordKeys.addAll(fileRowKeys);
|
||||||
logger.info("After checking with row keys, we have " + foundRecordKeys.size()
|
logger.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
|
||||||
+ " results, for file " + filePath + " => " + foundRecordKeys);
|
timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
|
logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
|
||||||
}
|
}
|
||||||
@@ -98,6 +100,8 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
|
|
||||||
private String currentPartitionPath;
|
private String currentPartitionPath;
|
||||||
|
|
||||||
|
private long totalKeysChecked;
|
||||||
|
|
||||||
LazyKeyCheckIterator(
|
LazyKeyCheckIterator(
|
||||||
Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> filePartitionRecordKeyTripletItr) {
|
Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> filePartitionRecordKeyTripletItr) {
|
||||||
super(filePartitionRecordKeyTripletItr);
|
super(filePartitionRecordKeyTripletItr);
|
||||||
@@ -105,6 +109,7 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
candidateRecordKeys = new ArrayList<>();
|
candidateRecordKeys = new ArrayList<>();
|
||||||
bloomFilter = null;
|
bloomFilter = null;
|
||||||
currentPartitionPath = null;
|
currentPartitionPath = null;
|
||||||
|
totalKeysChecked = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -114,16 +119,42 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
private void initState(String fileName, String partitionPath) throws HoodieIndexException {
|
private void initState(String fileName, String partitionPath) throws HoodieIndexException {
|
||||||
try {
|
try {
|
||||||
Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
|
Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
|
||||||
bloomFilter = ParquetUtils
|
HoodieTimer timer = new HoodieTimer().startTimer();
|
||||||
.readBloomFilterFromParquetMetadata(metaClient.getHadoopConf(), filePath);
|
bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(metaClient.getHadoopConf(), filePath);
|
||||||
|
logger.info(String.format("Read bloom filter from %s/%s in %d ms", partitionPath, fileName, timer.endTimer()));
|
||||||
candidateRecordKeys = new ArrayList<>();
|
candidateRecordKeys = new ArrayList<>();
|
||||||
currentFile = fileName;
|
currentFile = fileName;
|
||||||
currentPartitionPath = partitionPath;
|
currentPartitionPath = partitionPath;
|
||||||
|
totalKeysChecked = 0;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new HoodieIndexException("Error checking candidate keys against file.", e);
|
throw new HoodieIndexException("Error checking candidate keys against file.", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check record key against bloom filter of current file & add to possible keys if needed
|
||||||
|
private void checkAndAddCandidates(String recordKey) {
|
||||||
|
if (bloomFilter.mightContain(recordKey)) {
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("Record key " + recordKey + " matches bloom filter in file " + currentPartitionPath
|
||||||
|
+ "/" + currentFile);
|
||||||
|
}
|
||||||
|
candidateRecordKeys.add(recordKey);
|
||||||
|
}
|
||||||
|
totalKeysChecked++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> checkAgainstCurrentFile() {
|
||||||
|
Path filePath = new Path(basePath + "/" + currentPartitionPath + "/" + currentFile);
|
||||||
|
if (logger.isDebugEnabled()) {
|
||||||
|
logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
|
||||||
|
}
|
||||||
|
List<String> matchingKeys = checkCandidatesAgainstFile(metaClient.getHadoopConf(), candidateRecordKeys, filePath);
|
||||||
|
logger.info(String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)",
|
||||||
|
totalKeysChecked, candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(),
|
||||||
|
matchingKeys.size()));
|
||||||
|
return matchingKeys;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected List<KeyLookupResult> computeNext() {
|
protected List<KeyLookupResult> computeNext() {
|
||||||
|
|
||||||
@@ -131,7 +162,6 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
try {
|
try {
|
||||||
// process one file in each go.
|
// process one file in each go.
|
||||||
while (inputItr.hasNext()) {
|
while (inputItr.hasNext()) {
|
||||||
|
|
||||||
Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
|
Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
|
||||||
String fileName = currentTuple._2._1;
|
String fileName = currentTuple._2._1;
|
||||||
String partitionPath = currentTuple._2._2.getPartitionPath();
|
String partitionPath = currentTuple._2._2.getPartitionPath();
|
||||||
@@ -142,53 +172,22 @@ public class HoodieBloomIndexCheckFunction implements
|
|||||||
initState(fileName, partitionPath);
|
initState(fileName, partitionPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if continue on current file)
|
// if continue on current file
|
||||||
if (fileName.equals(currentFile)) {
|
if (fileName.equals(currentFile)) {
|
||||||
// check record key against bloom filter of current file & add to possible keys if
|
checkAndAddCandidates(recordKey);
|
||||||
// needed
|
|
||||||
if (bloomFilter.mightContain(recordKey)) {
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("#1 Adding " + recordKey + " as candidate for file " + fileName);
|
|
||||||
}
|
|
||||||
candidateRecordKeys.add(recordKey);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// do the actual checking of file & break out
|
// do the actual checking of file & break out
|
||||||
Path filePath = new Path(basePath + "/" + currentPartitionPath + "/" + currentFile);
|
ret.add(new KeyLookupResult(currentFile, checkAgainstCurrentFile()));
|
||||||
logger.info(
|
|
||||||
"#1 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
|
|
||||||
.size() + " for " + filePath);
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger
|
|
||||||
.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
|
|
||||||
}
|
|
||||||
ret.add(new KeyLookupResult(currentFile,
|
|
||||||
checkCandidatesAgainstFile(metaClient.getHadoopConf(), candidateRecordKeys, filePath)));
|
|
||||||
|
|
||||||
initState(fileName, partitionPath);
|
initState(fileName, partitionPath);
|
||||||
if (bloomFilter.mightContain(recordKey)) {
|
checkAndAddCandidates(recordKey);
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("#2 Adding " + recordKey + " as candidate for file " + fileName);
|
|
||||||
}
|
|
||||||
candidateRecordKeys.add(recordKey);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// handle case, where we ran out of input, close pending work, update return val
|
// handle case, where we ran out of input, close pending work, update return val
|
||||||
if (!inputItr.hasNext()) {
|
if (!inputItr.hasNext()) {
|
||||||
Path filePath = new Path(basePath + "/" + currentPartitionPath + "/" + currentFile);
|
ret.add(new KeyLookupResult(currentFile, checkAgainstCurrentFile()));
|
||||||
logger.info(
|
|
||||||
"#2 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
|
|
||||||
.size() + " for " + filePath);
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
|
|
||||||
}
|
|
||||||
ret.add(new KeyLookupResult(currentFile,
|
|
||||||
checkCandidatesAgainstFile(metaClient.getHadoopConf(), candidateRecordKeys, filePath)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieException) {
|
if (e instanceof HoodieException) {
|
||||||
throw e;
|
throw e;
|
||||||
|
|||||||
@@ -26,11 +26,12 @@ import com.uber.hoodie.common.util.FSUtils;
|
|||||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||||
import com.uber.hoodie.exception.HoodieIOException;
|
import com.uber.hoodie.exception.HoodieIOException;
|
||||||
import com.uber.hoodie.table.HoodieTable;
|
import com.uber.hoodie.table.HoodieTable;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.spark.api.java.JavaPairRDD;
|
import org.apache.spark.api.java.JavaPairRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
@@ -85,7 +86,7 @@ public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
|
|
||||||
IndexFileFilter indexFileFilter = config.getBloomIndexPruneByRanges()
|
IndexFileFilter indexFileFilter = config.getBloomIndexPruneByRanges()
|
||||||
? new IntervalTreeBasedGlobalIndexFileFilter(partitionToFileIndexInfo)
|
? new IntervalTreeBasedGlobalIndexFileFilter(partitionToFileIndexInfo)
|
||||||
: new SimpleGlobalIndexFileFilter(partitionToFileIndexInfo);
|
: new ListBasedGlobalIndexFileFilter(partitionToFileIndexInfo);
|
||||||
return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
|
return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
|
||||||
String recordKey = partitionRecordKeyPair._2();
|
String recordKey = partitionRecordKeyPair._2();
|
||||||
String partitionPath = partitionRecordKeyPair._1();
|
String partitionPath = partitionRecordKeyPair._1();
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package com.uber.hoodie.index.bloom;
|
package com.uber.hoodie.index.bloom;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -40,8 +41,8 @@ class IntervalTreeBasedGlobalIndexFileFilter implements IndexFileFilter {
|
|||||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}s
|
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}s
|
||||||
*/
|
*/
|
||||||
IntervalTreeBasedGlobalIndexFileFilter(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
IntervalTreeBasedGlobalIndexFileFilter(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
||||||
List<BloomIndexFileInfo> allIndexFiles = partitionToFileIndexInfo.values().stream().flatMap(
|
List<BloomIndexFileInfo> allIndexFiles = partitionToFileIndexInfo.values().stream().flatMap(Collection::stream)
|
||||||
indexFiles -> indexFiles.stream()).collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
// Note that the interval tree implementation doesn't have auto-balancing to ensure logN search time.
|
// Note that the interval tree implementation doesn't have auto-balancing to ensure logN search time.
|
||||||
// So, we are shuffling the input here hoping the tree will not have any skewness. If not, the tree could be skewed
|
// So, we are shuffling the input here hoping the tree will not have any skewness. If not, the tree could be skewed
|
||||||
// which could result in N search time instead of NlogN.
|
// which could result in N search time instead of NlogN.
|
||||||
@@ -60,9 +61,7 @@ class IntervalTreeBasedGlobalIndexFileFilter implements IndexFileFilter {
|
|||||||
public Set<String> getMatchingFiles(String partitionPath, String recordKey) {
|
public Set<String> getMatchingFiles(String partitionPath, String recordKey) {
|
||||||
Set<String> toReturn = new HashSet<>();
|
Set<String> toReturn = new HashSet<>();
|
||||||
toReturn.addAll(indexLookUpTree.getMatchingIndexFiles(recordKey));
|
toReturn.addAll(indexLookUpTree.getMatchingIndexFiles(recordKey));
|
||||||
filesWithNoRanges.forEach(indexFile -> {
|
toReturn.addAll(filesWithNoRanges);
|
||||||
toReturn.add(indexFile);
|
|
||||||
});
|
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ import java.util.Set;
|
|||||||
class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
|
class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
|
||||||
|
|
||||||
private final Map<String, KeyRangeLookupTree> partitionToFileIndexLookUpTree = new HashMap<>();
|
private final Map<String, KeyRangeLookupTree> partitionToFileIndexLookUpTree = new HashMap<>();
|
||||||
private final Map<String, Set<String>> filesWithNoRanges = new HashMap<>();
|
private final Map<String, Set<String>> partitionToFilesWithNoRanges = new HashMap<>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates {@link IntervalTreeBasedIndexFileFilter}
|
* Instantiates {@link IntervalTreeBasedIndexFileFilter}
|
||||||
@@ -51,10 +51,10 @@ class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
|
|||||||
lookUpTree.insert(new KeyRangeNode(indexFileInfo.getMinRecordKey(),
|
lookUpTree.insert(new KeyRangeNode(indexFileInfo.getMinRecordKey(),
|
||||||
indexFileInfo.getMaxRecordKey(), indexFileInfo.getFileName()));
|
indexFileInfo.getMaxRecordKey(), indexFileInfo.getFileName()));
|
||||||
} else {
|
} else {
|
||||||
if (!filesWithNoRanges.containsKey(partition)) {
|
if (!partitionToFilesWithNoRanges.containsKey(partition)) {
|
||||||
filesWithNoRanges.put(partition, new HashSet<>());
|
partitionToFilesWithNoRanges.put(partition, new HashSet<>());
|
||||||
}
|
}
|
||||||
filesWithNoRanges.get(partition).add(indexFileInfo.getFileName());
|
partitionToFilesWithNoRanges.get(partition).add(indexFileInfo.getFileName());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
partitionToFileIndexLookUpTree.put(partition, lookUpTree);
|
partitionToFileIndexLookUpTree.put(partition, lookUpTree);
|
||||||
@@ -64,18 +64,12 @@ class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
|
|||||||
@Override
|
@Override
|
||||||
public Set<String> getMatchingFiles(String partitionPath, String recordKey) {
|
public Set<String> getMatchingFiles(String partitionPath, String recordKey) {
|
||||||
Set<String> toReturn = new HashSet<>();
|
Set<String> toReturn = new HashSet<>();
|
||||||
if (partitionToFileIndexLookUpTree
|
// could be null, if there are no files in a given partition yet or if all index files have no ranges
|
||||||
.containsKey(partitionPath)) { // could be null, if there are no files in a given partition yet or if all
|
if (partitionToFileIndexLookUpTree.containsKey(partitionPath)) {
|
||||||
// index files has no ranges
|
toReturn.addAll(partitionToFileIndexLookUpTree.get(partitionPath).getMatchingIndexFiles(recordKey));
|
||||||
partitionToFileIndexLookUpTree.get(partitionPath).getMatchingIndexFiles(recordKey)
|
|
||||||
.forEach(matchingFileName -> {
|
|
||||||
toReturn.add(matchingFileName);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
if (filesWithNoRanges.containsKey(partitionPath)) {
|
if (partitionToFilesWithNoRanges.containsKey(partitionPath)) {
|
||||||
filesWithNoRanges.get(partitionPath).forEach(fileName -> {
|
toReturn.addAll(partitionToFilesWithNoRanges.get(partitionPath));
|
||||||
toReturn.add(fileName);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,14 +23,14 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
class SimpleGlobalIndexFileFilter extends SimpleIndexFileFilter {
|
class ListBasedGlobalIndexFileFilter extends ListBasedIndexFileFilter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates {@link SimpleGlobalIndexFileFilter}
|
* Instantiates {@link ListBasedGlobalIndexFileFilter}
|
||||||
*
|
*
|
||||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
||||||
*/
|
*/
|
||||||
SimpleGlobalIndexFileFilter(
|
ListBasedGlobalIndexFileFilter(
|
||||||
Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
||||||
super(partitionToFileIndexInfo);
|
super(partitionToFileIndexInfo);
|
||||||
}
|
}
|
||||||
@@ -27,16 +27,16 @@ import java.util.Set;
|
|||||||
* Simple implementation of {@link IndexFileFilter}. Sequentially goes through every index file in a given partition to
|
* Simple implementation of {@link IndexFileFilter}. Sequentially goes through every index file in a given partition to
|
||||||
* search for potential index files to be searched for a given record key.
|
* search for potential index files to be searched for a given record key.
|
||||||
*/
|
*/
|
||||||
class SimpleIndexFileFilter implements IndexFileFilter {
|
class ListBasedIndexFileFilter implements IndexFileFilter {
|
||||||
|
|
||||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo;
|
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Instantiates {@link SimpleIndexFileFilter}
|
* Instantiates {@link ListBasedIndexFileFilter}
|
||||||
*
|
*
|
||||||
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
* @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
|
||||||
*/
|
*/
|
||||||
SimpleIndexFileFilter(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
ListBasedIndexFileFilter(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
|
||||||
this.partitionToFileIndexInfo = partitionToFileIndexInfo;
|
this.partitionToFileIndexInfo = partitionToFileIndexInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user