[HUDI-3091] Making SIMPLE index as the default index type (#4659)
* [HUDI-3091] Making SIMPLE index as the default index type * Fixing tests * Traiging timeouts * disable SIMPLE index for bootstrap tests * removing test run start and end log statements * Fixing simple index parallellism for some tests * Disabling failing test for now * reverting previous disable * Reverting all changes * fixing azure pipeline script
This commit is contained in:
committed by
GitHub
parent
ab73047958
commit
6a32cfe020
@@ -152,7 +152,7 @@ public class HoodieIndexConfig extends HoodieConfig {
|
|||||||
|
|
||||||
public static final ConfigProperty<String> SIMPLE_INDEX_PARALLELISM = ConfigProperty
|
public static final ConfigProperty<String> SIMPLE_INDEX_PARALLELISM = ConfigProperty
|
||||||
.key("hoodie.simple.index.parallelism")
|
.key("hoodie.simple.index.parallelism")
|
||||||
.defaultValue("50")
|
.defaultValue("100")
|
||||||
.withDocumentation("Only applies if index type is SIMPLE. "
|
.withDocumentation("Only applies if index type is SIMPLE. "
|
||||||
+ "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
|
+ "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
|
||||||
|
|
||||||
@@ -568,7 +568,7 @@ public class HoodieIndexConfig extends HoodieConfig {
|
|||||||
private String getDefaultIndexType(EngineType engineType) {
|
private String getDefaultIndexType(EngineType engineType) {
|
||||||
switch (engineType) {
|
switch (engineType) {
|
||||||
case SPARK:
|
case SPARK:
|
||||||
return HoodieIndex.IndexType.BLOOM.name();
|
return HoodieIndex.IndexType.SIMPLE.name();
|
||||||
case FLINK:
|
case FLINK:
|
||||||
case JAVA:
|
case JAVA:
|
||||||
return HoodieIndex.IndexType.INMEMORY.name();
|
return HoodieIndex.IndexType.INMEMORY.name();
|
||||||
|
|||||||
@@ -148,7 +148,7 @@ public class HoodieSimpleIndex
|
|||||||
protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocations(
|
protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocations(
|
||||||
HoodieEngineContext context, HoodieTable hoodieTable, int parallelism,
|
HoodieEngineContext context, HoodieTable hoodieTable, int parallelism,
|
||||||
List<Pair<String, HoodieBaseFile>> baseFiles) {
|
List<Pair<String, HoodieBaseFile>> baseFiles) {
|
||||||
int fetchParallelism = Math.max(1, Math.max(baseFiles.size(), parallelism));
|
int fetchParallelism = Math.max(1, Math.min(baseFiles.size(), parallelism));
|
||||||
|
|
||||||
return context.parallelize(baseFiles, fetchParallelism)
|
return context.parallelize(baseFiles, fetchParallelism)
|
||||||
.flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt)
|
.flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt)
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ public class TestHoodieWriteConfig {
|
|||||||
public void testDefaultIndexAccordingToEngineType() {
|
public void testDefaultIndexAccordingToEngineType() {
|
||||||
testEngineSpecificConfig(HoodieWriteConfig::getIndexType,
|
testEngineSpecificConfig(HoodieWriteConfig::getIndexType,
|
||||||
constructConfigMap(
|
constructConfigMap(
|
||||||
EngineType.SPARK, HoodieIndex.IndexType.BLOOM,
|
EngineType.SPARK, HoodieIndex.IndexType.SIMPLE,
|
||||||
EngineType.FLINK, HoodieIndex.IndexType.INMEMORY,
|
EngineType.FLINK, HoodieIndex.IndexType.INMEMORY,
|
||||||
EngineType.JAVA, HoodieIndex.IndexType.INMEMORY));
|
EngineType.JAVA, HoodieIndex.IndexType.INMEMORY));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user