From 6a32cfe0201bd84ad9199af76a585d4b5a3a54d7 Mon Sep 17 00:00:00 2001 From: Sivabalan Narayanan Date: Tue, 8 Feb 2022 04:32:18 -0500 Subject: [PATCH] [HUDI-3091] Making SIMPLE index as the default index type (#4659) * [HUDI-3091] Making SIMPLE index as the default index type * Fixing tests * Traiging timeouts * disable SIMPLE index for bootstrap tests * removing test run start and end log statements * Fixing simple index parallellism for some tests * Disabling failing test for now * reverting previous disable * Reverting all changes * fixing azure pipeline script --- .../main/java/org/apache/hudi/config/HoodieIndexConfig.java | 4 ++-- .../java/org/apache/hudi/index/simple/HoodieSimpleIndex.java | 2 +- .../java/org/apache/hudi/config/TestHoodieWriteConfig.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java index e156310c7..f82f14d5a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java @@ -152,7 +152,7 @@ public class HoodieIndexConfig extends HoodieConfig { public static final ConfigProperty SIMPLE_INDEX_PARALLELISM = ConfigProperty .key("hoodie.simple.index.parallelism") - .defaultValue("50") + .defaultValue("100") .withDocumentation("Only applies if index type is SIMPLE. " + "This is the amount of parallelism for index lookup, which involves a Spark Shuffle"); @@ -568,7 +568,7 @@ public class HoodieIndexConfig extends HoodieConfig { private String getDefaultIndexType(EngineType engineType) { switch (engineType) { case SPARK: - return HoodieIndex.IndexType.BLOOM.name(); + return HoodieIndex.IndexType.SIMPLE.name(); case FLINK: case JAVA: return HoodieIndex.IndexType.INMEMORY.name(); diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java index 8ff3322a9..95823ff51 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java @@ -148,7 +148,7 @@ public class HoodieSimpleIndex protected HoodiePairData fetchRecordLocations( HoodieEngineContext context, HoodieTable hoodieTable, int parallelism, List> baseFiles) { - int fetchParallelism = Math.max(1, Math.max(baseFiles.size(), parallelism)); + int fetchParallelism = Math.max(1, Math.min(baseFiles.size(), parallelism)); return context.parallelize(baseFiles, fetchParallelism) .flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt) diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java index c86a34a60..0713b99b1 100644 --- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java @@ -82,7 +82,7 @@ public class TestHoodieWriteConfig { public void testDefaultIndexAccordingToEngineType() { testEngineSpecificConfig(HoodieWriteConfig::getIndexType, constructConfigMap( - EngineType.SPARK, HoodieIndex.IndexType.BLOOM, + EngineType.SPARK, HoodieIndex.IndexType.SIMPLE, EngineType.FLINK, HoodieIndex.IndexType.INMEMORY, EngineType.JAVA, HoodieIndex.IndexType.INMEMORY)); }