1
0

[HUDI-4178] Addressing performance regressions in Spark DataSourceV2 Integration (#5737)

There are multiple issues with our current DataSource V2 integrations: b/c we advertise Hudi tables as V2, Spark expects it to implement certain APIs which are not implemented at the moment, instead we're using custom Resolution rule (in HoodieSpark3Analysis) to instead manually fallback to V1 APIs.  This commit fixes the issue by reverting DSv2 APIs and making Spark use V1, except for schema evaluation logic.
This commit is contained in:
Alexey Kudinkin
2022-06-07 16:30:46 -07:00
committed by GitHub
parent 1349b596a1
commit 35afdb4316
28 changed files with 374 additions and 256 deletions

View File

@@ -175,11 +175,6 @@ public class HoodieWriteConfig extends HoodieConfig {
.withDocumentation("Schema string representing the latest schema of the table. Hudi passes this to "
+ "implementations of evolution of schema");
public static final ConfigProperty<Boolean> SCHEMA_EVOLUTION_ENABLE = ConfigProperty
.key("hoodie.schema.on.read.enable")
.defaultValue(false)
.withDocumentation("enable full schema evolution for hoodie");
public static final ConfigProperty<Boolean> ENABLE_INTERNAL_SCHEMA_CACHE = ConfigProperty
.key("hoodie.schema.cache.enable")
.defaultValue(false)
@@ -929,11 +924,11 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public boolean getSchemaEvolutionEnable() {
return getBoolean(SCHEMA_EVOLUTION_ENABLE);
return getBoolean(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE);
}
public void setSchemaEvolutionEnable(boolean enable) {
setValue(SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
setValue(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
}
/**
@@ -2175,7 +2170,7 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public Builder withSchemaEvolutionEnable(boolean enable) {
writeConfig.setValue(SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
writeConfig.setValue(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE, String.valueOf(enable));
return this;
}