1
0

[HUDI-3764] Allow loading external configs while querying Hudi tables with Spark (#4915)

Currently when doing Hudi queries w/ Spark, it won't 
load the external configurations. Say if customers enabled 
metadata listing in their global config file, then this would 
let them actually query w/o metadata feature enabled. 
This PR fixes this issue and allows loading global 
configs during the Hudi reading phase.

Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
wenningd
2022-07-21 02:42:17 -07:00
committed by GitHub
parent de37774e12
commit c7fe3fd01d
3 changed files with 35 additions and 17 deletions

View File

@@ -19,7 +19,7 @@ package org.apache.hudi
import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL}
import org.apache.hudi.HoodieConversionUtils.toScalaOption
import org.apache.hudi.common.config.{ConfigProperty, HoodieCommonConfig, HoodieConfig, TypedProperties}
import org.apache.hudi.common.config.{ConfigProperty, DFSPropertiesConfiguration, HoodieCommonConfig, HoodieConfig, TypedProperties}
import org.apache.hudi.common.fs.ConsistencyGuardConfig
import org.apache.hudi.common.model.{HoodieTableType, WriteOperationType}
import org.apache.hudi.common.table.HoodieTableConfig
@@ -768,13 +768,14 @@ object DataSourceOptionsHelper {
def parametersWithReadDefaults(parameters: Map[String, String]): Map[String, String] = {
// First check if the ConfigUtils.IS_QUERY_AS_RO_TABLE has set by HiveSyncTool,
// or else use query type from QUERY_TYPE.
val queryType = parameters.get(ConfigUtils.IS_QUERY_AS_RO_TABLE)
val paramsWithGlobalProps = DFSPropertiesConfiguration.getGlobalProps.asScala.toMap ++ parameters
val queryType = paramsWithGlobalProps.get(ConfigUtils.IS_QUERY_AS_RO_TABLE)
.map(is => if (is.toBoolean) QUERY_TYPE_READ_OPTIMIZED_OPT_VAL else QUERY_TYPE_SNAPSHOT_OPT_VAL)
.getOrElse(parameters.getOrElse(QUERY_TYPE.key, QUERY_TYPE.defaultValue()))
.getOrElse(paramsWithGlobalProps.getOrElse(QUERY_TYPE.key, QUERY_TYPE.defaultValue()))
Map(
QUERY_TYPE.key -> queryType
) ++ translateConfigurations(parameters)
) ++ translateConfigurations(paramsWithGlobalProps)
}
def inferKeyGenClazz(props: TypedProperties): String = {