diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java index a84c57b11..9a9687a3d 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java @@ -44,6 +44,8 @@ public class HoodieIndexConfig extends DefaultHoodieConfig { public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true"; public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching"; public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true"; + public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = "hoodie.bloom.index.input.storage.level"; + public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER"; // ***** HBase Index Configs ***** public final static String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum"; @@ -143,6 +145,11 @@ public class HoodieIndexConfig extends DefaultHoodieConfig { return this; } + public Builder withBloomIndexInputStorageLevel(String level) { + props.setProperty(BLOOM_INDEX_INPUT_STORAGE_LEVEL, level); + return this; + } + public HoodieIndexConfig build() { HoodieIndexConfig config = new HoodieIndexConfig(props); setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), @@ -161,6 +168,8 @@ public class HoodieIndexConfig extends DefaultHoodieConfig { HBASE_GET_BATCH_SIZE_PROP, String.valueOf(DEFAULT_HBASE_BATCH_SIZE)); setDefaultOnCondition(props, !props.containsKey(HBASE_PUT_BATCH_SIZE_PROP), HBASE_PUT_BATCH_SIZE_PROP, String.valueOf(DEFAULT_HBASE_BATCH_SIZE)); + setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL), + BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL); // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP)); return config; diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java index c9ca6aa4b..168174c63 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java @@ -68,7 +68,7 @@ public class HoodieStorageConfig extends DefaultHoodieConfig { return this; } - public Builder limitFileSize(int maxFileSize) { + public Builder limitFileSize(long maxFileSize) { props.setProperty(PARQUET_FILE_MAX_BYTES, String.valueOf(maxFileSize)); return this; } diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java index dfae2a082..30badf2a0 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java @@ -275,11 +275,15 @@ public class HoodieWriteConfig extends DefaultHoodieConfig { return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP)); } + public StorageLevel getBloomIndexInputStorageLevel() { + return StorageLevel.fromString(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_INPUT_STORAGE_LEVEL)); + } + /** * storage properties **/ - public int getParquetMaxFileSize() { - return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES)); + public long getParquetMaxFileSize() { + return Long.parseLong(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES)); } public int getParquetBlockSize() { diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java index bf452bc8f..73a07fc4c 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java @@ -75,7 +75,7 @@ public class HoodieBloomIndex extends HoodieIndex // Step 0: cache the input record RDD if (config.getBloomIndexUseCaching()) { - recordRDD.persist(StorageLevel.MEMORY_AND_DISK_SER()); + recordRDD.persist(config.getBloomIndexInputStorageLevel()); } // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey) diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java index d2af03047..f0aa544da 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java @@ -26,11 +26,11 @@ public class HoodieParquetConfig { private CompressionCodecName compressionCodecName; private int blockSize; private int pageSize; - private int maxFileSize; + private long maxFileSize; private Configuration hadoopConf; public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport, - CompressionCodecName compressionCodecName, int blockSize, int pageSize, int maxFileSize, + CompressionCodecName compressionCodecName, int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf) { this.writeSupport = writeSupport; this.compressionCodecName = compressionCodecName; @@ -56,7 +56,7 @@ public class HoodieParquetConfig { return pageSize; } - public int getMaxFileSize() { + public long getMaxFileSize() { return maxFileSize; }