1
0

[HUDI-3091] Making SIMPLE index as the default index type (#4659)

* [HUDI-3091] Making SIMPLE index as the default index type

* Fixing tests

* Traiging timeouts

* disable SIMPLE index for bootstrap tests

* removing test run start and end log statements

* Fixing simple index parallellism for some tests

* Disabling failing test for now

* reverting previous disable

* Reverting all changes

* fixing azure pipeline script
This commit is contained in:
Sivabalan Narayanan
2022-02-08 04:32:18 -05:00
committed by GitHub
parent ab73047958
commit 6a32cfe020
3 changed files with 4 additions and 4 deletions

View File

@@ -152,7 +152,7 @@ public class HoodieIndexConfig extends HoodieConfig {
public static final ConfigProperty<String> SIMPLE_INDEX_PARALLELISM = ConfigProperty public static final ConfigProperty<String> SIMPLE_INDEX_PARALLELISM = ConfigProperty
.key("hoodie.simple.index.parallelism") .key("hoodie.simple.index.parallelism")
.defaultValue("50") .defaultValue("100")
.withDocumentation("Only applies if index type is SIMPLE. " .withDocumentation("Only applies if index type is SIMPLE. "
+ "This is the amount of parallelism for index lookup, which involves a Spark Shuffle"); + "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
@@ -568,7 +568,7 @@ public class HoodieIndexConfig extends HoodieConfig {
private String getDefaultIndexType(EngineType engineType) { private String getDefaultIndexType(EngineType engineType) {
switch (engineType) { switch (engineType) {
case SPARK: case SPARK:
return HoodieIndex.IndexType.BLOOM.name(); return HoodieIndex.IndexType.SIMPLE.name();
case FLINK: case FLINK:
case JAVA: case JAVA:
return HoodieIndex.IndexType.INMEMORY.name(); return HoodieIndex.IndexType.INMEMORY.name();

View File

@@ -148,7 +148,7 @@ public class HoodieSimpleIndex
protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocations( protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocations(
HoodieEngineContext context, HoodieTable hoodieTable, int parallelism, HoodieEngineContext context, HoodieTable hoodieTable, int parallelism,
List<Pair<String, HoodieBaseFile>> baseFiles) { List<Pair<String, HoodieBaseFile>> baseFiles) {
int fetchParallelism = Math.max(1, Math.max(baseFiles.size(), parallelism)); int fetchParallelism = Math.max(1, Math.min(baseFiles.size(), parallelism));
return context.parallelize(baseFiles, fetchParallelism) return context.parallelize(baseFiles, fetchParallelism)
.flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt) .flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt)

View File

@@ -82,7 +82,7 @@ public class TestHoodieWriteConfig {
public void testDefaultIndexAccordingToEngineType() { public void testDefaultIndexAccordingToEngineType() {
testEngineSpecificConfig(HoodieWriteConfig::getIndexType, testEngineSpecificConfig(HoodieWriteConfig::getIndexType,
constructConfigMap( constructConfigMap(
EngineType.SPARK, HoodieIndex.IndexType.BLOOM, EngineType.SPARK, HoodieIndex.IndexType.SIMPLE,
EngineType.FLINK, HoodieIndex.IndexType.INMEMORY, EngineType.FLINK, HoodieIndex.IndexType.INMEMORY,
EngineType.JAVA, HoodieIndex.IndexType.INMEMORY)); EngineType.JAVA, HoodieIndex.IndexType.INMEMORY));
} }