From fb45fc9cb9581abc40922ddcbee21dfc016d4edc Mon Sep 17 00:00:00 2001 From: Y Ethan Guo Date: Fri, 1 Apr 2022 20:14:07 -0700 Subject: [PATCH] [HUDI-3773] Fix parallelism used for metadata table bloom filter index (#5209) --- .../org/apache/hudi/config/HoodieWriteConfig.java | 4 ++++ .../metadata/HoodieBackedTableMetadataWriter.java | 2 +- .../hudi/common/config/HoodieMetadataConfig.java | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index 813ccb7e9..23f1f386b 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -1563,6 +1563,10 @@ public class HoodieWriteConfig extends HoodieConfig { return getMetadataConfig().getIndexingCheckTimeoutSeconds(); } + public int getMetadataBloomFilterIndexParallelism() { + return metadataConfig.getBloomFilterIndexParallelism(); + } + public int getColumnStatsIndexParallelism() { return metadataConfig.getColumnStatsIndexParallelism(); } diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 1a069c465..b64d8ec09 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -704,7 +704,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta private MetadataRecordsGenerationParams getRecordsGenerationParams() { return new MetadataRecordsGenerationParams( dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(), - dataWriteConfig.getBloomIndexParallelism(), + dataWriteConfig.getMetadataBloomFilterIndexParallelism(), dataWriteConfig.isMetadataColumnStatsIndexEnabled(), dataWriteConfig.getColumnStatsIndexParallelism(), StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()), diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 9dbdf6d64..d20f63bac 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -151,6 +151,12 @@ public final class HoodieMetadataConfig extends HoodieConfig { + "log files and read parallelism in the bloom filter index partition. The recommendation is to size the " + "file group count such that the base files are under 1GB."); + public static final ConfigProperty BLOOM_FILTER_INDEX_PARALLELISM = ConfigProperty + .key(METADATA_PREFIX + ".index.bloom.filter.parallelism") + .defaultValue(200) + .sinceVersion("0.11.0") + .withDocumentation("Parallelism to use for generating bloom filter index in metadata table."); + public static final ConfigProperty ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty .key(METADATA_PREFIX + ".index.column.stats.enable") .defaultValue(false) @@ -263,6 +269,10 @@ public final class HoodieMetadataConfig extends HoodieConfig { return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT); } + public int getBloomFilterIndexParallelism() { + return getIntOrDefault(BLOOM_FILTER_INDEX_PARALLELISM); + } + public int getColumnStatsIndexParallelism() { return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM); } @@ -323,6 +333,11 @@ public final class HoodieMetadataConfig extends HoodieConfig { return this; } + public Builder withBloomFilterIndexParallelism(int parallelism) { + metadataConfig.setValue(BLOOM_FILTER_INDEX_PARALLELISM, String.valueOf(parallelism)); + return this; + } + public Builder withMetadataIndexColumnStats(boolean enable) { metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable)); return this;