diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java index e156310c7..f82f14d5a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java @@ -152,7 +152,7 @@ public class HoodieIndexConfig extends HoodieConfig { public static final ConfigProperty SIMPLE_INDEX_PARALLELISM = ConfigProperty .key("hoodie.simple.index.parallelism") - .defaultValue("50") + .defaultValue("100") .withDocumentation("Only applies if index type is SIMPLE. " + "This is the amount of parallelism for index lookup, which involves a Spark Shuffle"); @@ -568,7 +568,7 @@ public class HoodieIndexConfig extends HoodieConfig { private String getDefaultIndexType(EngineType engineType) { switch (engineType) { case SPARK: - return HoodieIndex.IndexType.BLOOM.name(); + return HoodieIndex.IndexType.SIMPLE.name(); case FLINK: case JAVA: return HoodieIndex.IndexType.INMEMORY.name(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java index 8ff3322a9..95823ff51 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java @@ -148,7 +148,7 @@ public class HoodieSimpleIndex protected HoodiePairData fetchRecordLocations( HoodieEngineContext context, HoodieTable hoodieTable, int parallelism, List> baseFiles) { - int fetchParallelism = Math.max(1, Math.max(baseFiles.size(), parallelism)); + int fetchParallelism = Math.max(1, Math.min(baseFiles.size(), parallelism)); return context.parallelize(baseFiles, fetchParallelism) .flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java index c86a34a60..0713b99b1 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java @@ -82,7 +82,7 @@ public class TestHoodieWriteConfig { public void testDefaultIndexAccordingToEngineType() { testEngineSpecificConfig(HoodieWriteConfig::getIndexType, constructConfigMap( - EngineType.SPARK, HoodieIndex.IndexType.BLOOM, + EngineType.SPARK, HoodieIndex.IndexType.SIMPLE, EngineType.FLINK, HoodieIndex.IndexType.INMEMORY, EngineType.JAVA, HoodieIndex.IndexType.INMEMORY)); }