[HUDI-3807] Add a new config to control the use of metadata index in HoodieBloomIndex (#5268)
This commit is contained in:
@@ -30,17 +30,22 @@ import org.apache.hudi.data.HoodieJavaPairRDD;
|
||||
import org.apache.hudi.data.HoodieJavaRDD;
|
||||
import org.apache.hudi.io.HoodieKeyLookupResult;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.Partitioner;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import scala.Tuple2;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import scala.Tuple2;
|
||||
|
||||
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.getCompletedMetadataPartitions;
|
||||
import static org.apache.hudi.metadata.MetadataPartitionType.BLOOM_FILTERS;
|
||||
|
||||
/**
|
||||
* Helper for {@link HoodieBloomIndex} containing Spark-specific logic.
|
||||
*/
|
||||
@@ -75,7 +80,9 @@ public class SparkHoodieBloomIndexHelper extends BaseHoodieBloomIndexHelper {
|
||||
+ config.getBloomIndexParallelism() + "}");
|
||||
|
||||
JavaRDD<List<HoodieKeyLookupResult>> keyLookupResultRDD;
|
||||
if (config.isMetadataBloomFilterIndexEnabled()) {
|
||||
if (config.getBloomIndexUseMetadata()
|
||||
&& getCompletedMetadataPartitions(hoodieTable.getMetaClient().getTableConfig())
|
||||
.contains(BLOOM_FILTERS.getPartitionPath())) {
|
||||
// Step 1: Sort by file id
|
||||
JavaRDD<Tuple2<String, HoodieKey>> sortedFileIdAndKeyPairs =
|
||||
fileComparisonsRDD.sortBy(Tuple2::_1, true, joinParallelism);
|
||||
|
||||
Reference in New Issue
Block a user