1
0

Bucketized Bloom Filter checking

- Tackles the skew seen in sort based partitioning/checking
 - Parameterized the HoodieBloomIndex test
 - Config to turn on/off (on by default)
 - Unit tests & also tested at scale
This commit is contained in:
Vinoth Chandar
2019-05-08 20:20:58 -07:00
committed by vinoth chandar
parent 4b27cc72bb
commit a0e62b7919
7 changed files with 466 additions and 159 deletions

View File

@@ -328,6 +328,14 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_TREE_BASED_FILTER_PROP));
}
public boolean useBloomIndexBucketizedChecking() {
return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_BUCKETIZED_CHECKING_PROP));
}
public int getBloomIndexKeysPerBucket() {
return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_KEYS_PER_BUCKET_PROP));
}
public StorageLevel getBloomIndexInputStorageLevel() {
return StorageLevel
.fromString(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_INPUT_STORAGE_LEVEL));