1
0

[HUDI-3773] Fix parallelism used for metadata table bloom filter index (#5209)

This commit is contained in:
Y Ethan Guo
2022-04-01 20:14:07 -07:00
committed by GitHub
parent 444ff496a4
commit fb45fc9cb9
3 changed files with 20 additions and 1 deletions

View File

@@ -1563,6 +1563,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return getMetadataConfig().getIndexingCheckTimeoutSeconds();
}
public int getMetadataBloomFilterIndexParallelism() {
return metadataConfig.getBloomFilterIndexParallelism();
}
public int getColumnStatsIndexParallelism() {
return metadataConfig.getColumnStatsIndexParallelism();
}

View File

@@ -704,7 +704,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
return new MetadataRecordsGenerationParams(
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
dataWriteConfig.getBloomIndexParallelism(),
dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
dataWriteConfig.getColumnStatsIndexParallelism(),
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),

View File

@@ -151,6 +151,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
+ "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
+ "file group count such that the base files are under 1GB.");
public static final ConfigProperty<Integer> BLOOM_FILTER_INDEX_PARALLELISM = ConfigProperty
.key(METADATA_PREFIX + ".index.bloom.filter.parallelism")
.defaultValue(200)
.sinceVersion("0.11.0")
.withDocumentation("Parallelism to use for generating bloom filter index in metadata table.");
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
.key(METADATA_PREFIX + ".index.column.stats.enable")
.defaultValue(false)
@@ -263,6 +269,10 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
}
public int getBloomFilterIndexParallelism() {
return getIntOrDefault(BLOOM_FILTER_INDEX_PARALLELISM);
}
public int getColumnStatsIndexParallelism() {
return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM);
}
@@ -323,6 +333,11 @@ public final class HoodieMetadataConfig extends HoodieConfig {
return this;
}
public Builder withBloomFilterIndexParallelism(int parallelism) {
metadataConfig.setValue(BLOOM_FILTER_INDEX_PARALLELISM, String.valueOf(parallelism));
return this;
}
public Builder withMetadataIndexColumnStats(boolean enable) {
metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
return this;