1
0

[HUDI-3773] Fix parallelism used for metadata table bloom filter index (#5209)

This commit is contained in:
Y Ethan Guo
2022-04-01 20:14:07 -07:00
committed by GitHub
parent 444ff496a4
commit fb45fc9cb9
3 changed files with 20 additions and 1 deletions

View File

@@ -1563,6 +1563,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return getMetadataConfig().getIndexingCheckTimeoutSeconds(); return getMetadataConfig().getIndexingCheckTimeoutSeconds();
} }
public int getMetadataBloomFilterIndexParallelism() {
return metadataConfig.getBloomFilterIndexParallelism();
}
public int getColumnStatsIndexParallelism() { public int getColumnStatsIndexParallelism() {
return metadataConfig.getColumnStatsIndexParallelism(); return metadataConfig.getColumnStatsIndexParallelism();
} }

View File

@@ -704,7 +704,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
private MetadataRecordsGenerationParams getRecordsGenerationParams() { private MetadataRecordsGenerationParams getRecordsGenerationParams() {
return new MetadataRecordsGenerationParams( return new MetadataRecordsGenerationParams(
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(), dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
dataWriteConfig.getBloomIndexParallelism(), dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
dataWriteConfig.isMetadataColumnStatsIndexEnabled(), dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
dataWriteConfig.getColumnStatsIndexParallelism(), dataWriteConfig.getColumnStatsIndexParallelism(),
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()), StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),

View File

@@ -151,6 +151,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
+ "log files and read parallelism in the bloom filter index partition. The recommendation is to size the " + "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
+ "file group count such that the base files are under 1GB."); + "file group count such that the base files are under 1GB.");
public static final ConfigProperty<Integer> BLOOM_FILTER_INDEX_PARALLELISM = ConfigProperty
.key(METADATA_PREFIX + ".index.bloom.filter.parallelism")
.defaultValue(200)
.sinceVersion("0.11.0")
.withDocumentation("Parallelism to use for generating bloom filter index in metadata table.");
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
.key(METADATA_PREFIX + ".index.column.stats.enable") .key(METADATA_PREFIX + ".index.column.stats.enable")
.defaultValue(false) .defaultValue(false)
@@ -263,6 +269,10 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT); return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
} }
public int getBloomFilterIndexParallelism() {
return getIntOrDefault(BLOOM_FILTER_INDEX_PARALLELISM);
}
public int getColumnStatsIndexParallelism() { public int getColumnStatsIndexParallelism() {
return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM); return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM);
} }
@@ -323,6 +333,11 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this; return this;
} }
public Builder withBloomFilterIndexParallelism(int parallelism) {
metadataConfig.setValue(BLOOM_FILTER_INDEX_PARALLELISM, String.valueOf(parallelism));
return this;
}
public Builder withMetadataIndexColumnStats(boolean enable) { public Builder withMetadataIndexColumnStats(boolean enable) {
metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable)); metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
return this; return this;