[HUDI-2362] Add external config file support (#3416)
Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -112,7 +112,7 @@ public class HDFSParquetImporter implements Serializable {
|
||||
public int dataImport(JavaSparkContext jsc, int retry) {
|
||||
this.fs = FSUtils.getFs(cfg.targetPath, jsc.hadoopConfiguration());
|
||||
this.props = cfg.propsFilePath == null ? UtilHelpers.buildProperties(cfg.configs)
|
||||
: UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs).getConfig();
|
||||
: UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs).getProps(true);
|
||||
LOG.info("Starting data import with configs : " + props.toString());
|
||||
int ret = -1;
|
||||
try {
|
||||
|
||||
@@ -63,7 +63,7 @@ public class HoodieCleaner {
|
||||
*/
|
||||
FileSystem fs = FSUtils.getFs(cfg.basePath, jssc.hadoopConfiguration());
|
||||
this.props = cfg.propsFilePath == null ? UtilHelpers.buildProperties(cfg.configs)
|
||||
: UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs).getConfig();
|
||||
: UtilHelpers.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs).getProps(true);
|
||||
LOG.info("Creating Cleaner with configs : " + props.toString());
|
||||
}
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ public class HoodieClusteringJob {
|
||||
|
||||
return UtilHelpers
|
||||
.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs)
|
||||
.getConfig();
|
||||
.getProps(true);
|
||||
}
|
||||
|
||||
public static class Config implements Serializable {
|
||||
|
||||
@@ -59,7 +59,7 @@ public class HoodieCompactor {
|
||||
|
||||
return UtilHelpers
|
||||
.readConfig(fs, new Path(cfg.propsFilePath), cfg.configs)
|
||||
.getConfig();
|
||||
.getProps(true);
|
||||
}
|
||||
|
||||
public static class Config implements Serializable {
|
||||
|
||||
@@ -161,18 +161,11 @@ public class UtilHelpers {
|
||||
*
|
||||
*/
|
||||
public static DFSPropertiesConfiguration readConfig(FileSystem fs, Path cfgPath, List<String> overriddenProps) {
|
||||
DFSPropertiesConfiguration conf;
|
||||
try {
|
||||
conf = new DFSPropertiesConfiguration(cfgPath.getFileSystem(fs.getConf()), cfgPath);
|
||||
} catch (Exception e) {
|
||||
conf = new DFSPropertiesConfiguration();
|
||||
LOG.warn("Unexpected error read props file at :" + cfgPath, e);
|
||||
}
|
||||
|
||||
DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration(fs, cfgPath);
|
||||
try {
|
||||
if (!overriddenProps.isEmpty()) {
|
||||
LOG.info("Adding overridden properties to file properties.");
|
||||
conf.addProperties(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));
|
||||
conf.addPropsFromStream(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieIOException("Unexpected error adding config overrides", ioe);
|
||||
@@ -186,7 +179,7 @@ public class UtilHelpers {
|
||||
try {
|
||||
if (!overriddenProps.isEmpty()) {
|
||||
LOG.info("Adding overridden properties to file properties.");
|
||||
conf.addProperties(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));
|
||||
conf.addPropsFromStream(new BufferedReader(new StringReader(String.join("\n", overriddenProps))));
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieIOException("Unexpected error adding config overrides", ioe);
|
||||
@@ -196,7 +189,7 @@ public class UtilHelpers {
|
||||
}
|
||||
|
||||
public static TypedProperties buildProperties(List<String> props) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
TypedProperties properties = DFSPropertiesConfiguration.getGlobalProps();
|
||||
props.forEach(x -> {
|
||||
String[] kv = x.split("=");
|
||||
ValidationUtils.checkArgument(kv.length == 2);
|
||||
|
||||
@@ -29,6 +29,7 @@ import org.apache.hudi.client.WriteStatus;
|
||||
import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.client.utils.OperationConverter;
|
||||
import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
@@ -121,15 +122,18 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configuration conf,
|
||||
Option<TypedProperties> props) throws IOException {
|
||||
// Resolving the properties first in a consistent way
|
||||
HoodieConfig hoodieConfig = new HoodieConfig();
|
||||
if (props.isPresent()) {
|
||||
this.properties = setDefaults(props.get());
|
||||
hoodieConfig.setAll(props.get());
|
||||
} else if (cfg.propsFilePath.equals(Config.DEFAULT_DFS_SOURCE_PROPERTIES)) {
|
||||
this.properties = setDefaults(UtilHelpers.getConfig(cfg.configs).getConfig());
|
||||
hoodieConfig.setAll(UtilHelpers.getConfig(cfg.configs).getProps());
|
||||
} else {
|
||||
this.properties = setDefaults(UtilHelpers.readConfig(
|
||||
hoodieConfig.setAll(UtilHelpers.readConfig(
|
||||
FSUtils.getFs(cfg.propsFilePath, jssc.hadoopConfiguration()),
|
||||
new Path(cfg.propsFilePath), cfg.configs).getConfig());
|
||||
new Path(cfg.propsFilePath), cfg.configs).getProps());
|
||||
}
|
||||
hoodieConfig.setDefaultValue(DataSourceWriteOptions.RECONCILE_SCHEMA());
|
||||
this.properties = (TypedProperties) hoodieConfig.getProps(true);
|
||||
|
||||
if (cfg.initialCheckpointProvider != null && cfg.checkpoint == null) {
|
||||
InitialCheckPointProvider checkPointProvider =
|
||||
@@ -148,13 +152,6 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
deltaSyncService.ifPresent(ds -> ds.shutdown(false));
|
||||
}
|
||||
|
||||
private TypedProperties setDefaults(TypedProperties props) {
|
||||
if (!props.containsKey(DataSourceWriteOptions.RECONCILE_SCHEMA().key())) {
|
||||
props.setProperty(DataSourceWriteOptions.RECONCILE_SCHEMA().key(), DataSourceWriteOptions.RECONCILE_SCHEMA().defaultValue().toString());
|
||||
}
|
||||
return props;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main method to start syncing.
|
||||
*
|
||||
@@ -370,12 +367,12 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
}
|
||||
|
||||
public boolean isAsyncClusteringEnabled() {
|
||||
return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getConfig()
|
||||
return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getProps()
|
||||
.getOrDefault(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE.key(), false)));
|
||||
}
|
||||
|
||||
public boolean isInlineClusteringEnabled() {
|
||||
return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getConfig()
|
||||
return Boolean.parseBoolean(String.valueOf(UtilHelpers.getConfig(this.configs).getProps()
|
||||
.getOrDefault(HoodieClusteringConfig.INLINE_CLUSTERING.key(), false)));
|
||||
}
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ public class HoodieMultiTableDeltaStreamer {
|
||||
FileSystem fs = FSUtils.getFs(commonPropsFile, jssc.hadoopConfiguration());
|
||||
configFolder = configFolder.charAt(configFolder.length() - 1) == '/' ? configFolder.substring(0, configFolder.length() - 1) : configFolder;
|
||||
checkIfPropsFileAndConfigFolderExist(commonPropsFile, configFolder, fs);
|
||||
TypedProperties commonProperties = UtilHelpers.readConfig(fs, new Path(commonPropsFile), new ArrayList<>()).getConfig();
|
||||
TypedProperties commonProperties = UtilHelpers.readConfig(fs, new Path(commonPropsFile), new ArrayList<>()).getProps();
|
||||
//get the tables to be ingested and their corresponding config files from this properties instance
|
||||
populateTableExecutionContextList(commonProperties, configFolder, fs, config);
|
||||
}
|
||||
@@ -116,7 +116,7 @@ public class HoodieMultiTableDeltaStreamer {
|
||||
String configProp = Constants.INGESTION_PREFIX + database + Constants.DELIMITER + currentTable + Constants.INGESTION_CONFIG_SUFFIX;
|
||||
String configFilePath = properties.getString(configProp, Helpers.getDefaultConfigFilePath(configFolder, database, currentTable));
|
||||
checkIfTableConfigFileExists(configFolder, fs, configFilePath);
|
||||
TypedProperties tableProperties = UtilHelpers.readConfig(fs, new Path(configFilePath), new ArrayList<>()).getConfig();
|
||||
TypedProperties tableProperties = UtilHelpers.readConfig(fs, new Path(configFilePath), new ArrayList<>()).getProps();
|
||||
properties.forEach((k, v) -> {
|
||||
if (tableProperties.get(k) == null) {
|
||||
tableProperties.setProperty(k.toString(), v.toString());
|
||||
|
||||
@@ -365,7 +365,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
||||
@Test
|
||||
public void testProps() {
|
||||
TypedProperties props =
|
||||
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getConfig();
|
||||
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getProps();
|
||||
assertEquals(2, props.getInteger("hoodie.upsert.shuffle.parallelism"));
|
||||
assertEquals("_row_key", props.getString("hoodie.datasource.write.recordkey.field"));
|
||||
assertEquals("org.apache.hudi.utilities.functional.TestHoodieDeltaStreamer$TestGenerator",
|
||||
@@ -485,7 +485,7 @@ public class TestHoodieDeltaStreamer extends HoodieDeltaStreamerTestBase {
|
||||
String checkpointProviderClass = "org.apache.hudi.utilities.checkpointing.KafkaConnectHdfsProvider";
|
||||
HoodieDeltaStreamer.Config cfg = TestHelpers.makeDropAllConfig(tableBasePath, WriteOperationType.UPSERT);
|
||||
TypedProperties props =
|
||||
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getConfig();
|
||||
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/" + PROPS_FILENAME_TEST_SOURCE)).getProps();
|
||||
props.put("hoodie.deltastreamer.checkpoint.provider.path", bootstrapPath);
|
||||
cfg.initialCheckpointProvider = checkpointProviderClass;
|
||||
// create regular kafka connect hdfs dirs
|
||||
|
||||
Reference in New Issue
Block a user