[HUDI-3773] Fix parallelism used for metadata table bloom filter index (#5209)
This commit is contained in:
@@ -1563,6 +1563,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
|||||||
return getMetadataConfig().getIndexingCheckTimeoutSeconds();
|
return getMetadataConfig().getIndexingCheckTimeoutSeconds();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getMetadataBloomFilterIndexParallelism() {
|
||||||
|
return metadataConfig.getBloomFilterIndexParallelism();
|
||||||
|
}
|
||||||
|
|
||||||
public int getColumnStatsIndexParallelism() {
|
public int getColumnStatsIndexParallelism() {
|
||||||
return metadataConfig.getColumnStatsIndexParallelism();
|
return metadataConfig.getColumnStatsIndexParallelism();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -704,7 +704,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
|||||||
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
|
private MetadataRecordsGenerationParams getRecordsGenerationParams() {
|
||||||
return new MetadataRecordsGenerationParams(
|
return new MetadataRecordsGenerationParams(
|
||||||
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
|
dataMetaClient, enabledPartitionTypes, dataWriteConfig.getBloomFilterType(),
|
||||||
dataWriteConfig.getBloomIndexParallelism(),
|
dataWriteConfig.getMetadataBloomFilterIndexParallelism(),
|
||||||
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
|
dataWriteConfig.isMetadataColumnStatsIndexEnabled(),
|
||||||
dataWriteConfig.getColumnStatsIndexParallelism(),
|
dataWriteConfig.getColumnStatsIndexParallelism(),
|
||||||
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),
|
StringUtils.toList(dataWriteConfig.getColumnsEnabledForColumnStatsIndex()),
|
||||||
|
|||||||
@@ -151,6 +151,12 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
+ "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
|
+ "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
|
||||||
+ "file group count such that the base files are under 1GB.");
|
+ "file group count such that the base files are under 1GB.");
|
||||||
|
|
||||||
|
public static final ConfigProperty<Integer> BLOOM_FILTER_INDEX_PARALLELISM = ConfigProperty
|
||||||
|
.key(METADATA_PREFIX + ".index.bloom.filter.parallelism")
|
||||||
|
.defaultValue(200)
|
||||||
|
.sinceVersion("0.11.0")
|
||||||
|
.withDocumentation("Parallelism to use for generating bloom filter index in metadata table.");
|
||||||
|
|
||||||
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
|
public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
|
||||||
.key(METADATA_PREFIX + ".index.column.stats.enable")
|
.key(METADATA_PREFIX + ".index.column.stats.enable")
|
||||||
.defaultValue(false)
|
.defaultValue(false)
|
||||||
@@ -263,6 +269,10 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
|
return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getBloomFilterIndexParallelism() {
|
||||||
|
return getIntOrDefault(BLOOM_FILTER_INDEX_PARALLELISM);
|
||||||
|
}
|
||||||
|
|
||||||
public int getColumnStatsIndexParallelism() {
|
public int getColumnStatsIndexParallelism() {
|
||||||
return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM);
|
return getIntOrDefault(COLUMN_STATS_INDEX_PARALLELISM);
|
||||||
}
|
}
|
||||||
@@ -323,6 +333,11 @@ public final class HoodieMetadataConfig extends HoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder withBloomFilterIndexParallelism(int parallelism) {
|
||||||
|
metadataConfig.setValue(BLOOM_FILTER_INDEX_PARALLELISM, String.valueOf(parallelism));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public Builder withMetadataIndexColumnStats(boolean enable) {
|
public Builder withMetadataIndexColumnStats(boolean enable) {
|
||||||
metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
|
metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
|
||||||
return this;
|
return this;
|
||||||
|
|||||||
Reference in New Issue
Block a user