1
0

[HUDI-2362] Add external config file support (#3416)

Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
wenningd
2021-11-18 01:59:26 -08:00
committed by GitHub
parent 8772cec4bd
commit 24def0b30d
25 changed files with 426 additions and 102 deletions

View File

@@ -18,14 +18,20 @@
package org.apache.hudi.common.config;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.HashSet;
import java.util.Set;
@@ -43,72 +49,110 @@ public class DFSPropertiesConfiguration {
private static final Logger LOG = LogManager.getLogger(DFSPropertiesConfiguration.class);
public static final String DEFAULT_PROPERTIES_FILE = "hudi-defaults.conf";
public static final String CONF_FILE_DIR_ENV_NAME = "HUDI_CONF_DIR";
public static final String DEFAULT_CONF_FILE_DIR = "file:/etc/hudi/conf";
// props read from hudi-defaults.conf
private static TypedProperties GLOBAL_PROPS = loadGlobalProps();
private final FileSystem fs;
private final Path rootFile;
private Path currentFilePath;
private final TypedProperties props;
// props read from user defined configuration file or input stream
private final HoodieConfig hoodieConfig;
// Keep track of files visited, to detect loops
private final Set<String> visitedFiles;
private final Set<String> visitedFilePaths;
public DFSPropertiesConfiguration(FileSystem fs, Path rootFile, TypedProperties defaults) {
public DFSPropertiesConfiguration(FileSystem fs, Path filePath) {
this.fs = fs;
this.rootFile = rootFile;
this.props = defaults;
this.visitedFiles = new HashSet<>();
visitFile(rootFile);
}
public DFSPropertiesConfiguration(FileSystem fs, Path rootFile) {
this(fs, rootFile, new TypedProperties());
this.currentFilePath = filePath;
this.hoodieConfig = new HoodieConfig();
this.visitedFilePaths = new HashSet<>();
addPropsFromFile(filePath);
}
public DFSPropertiesConfiguration() {
this.fs = null;
this.rootFile = null;
this.props = new TypedProperties();
this.visitedFiles = new HashSet<>();
this.currentFilePath = null;
this.hoodieConfig = new HoodieConfig();
this.visitedFilePaths = new HashSet<>();
}
private String[] splitProperty(String line) {
int ind = line.indexOf('=');
String k = line.substring(0, ind).trim();
String v = line.substring(ind + 1).trim();
return new String[] {k, v};
}
private void visitFile(Path file) {
try {
if (visitedFiles.contains(file.getName())) {
throw new IllegalStateException("Loop detected; file " + file + " already referenced");
/**
* Load global props from hudi-defaults.conf which is under CONF_FILE_DIR_ENV_NAME.
* @return Typed Properties
*/
public static TypedProperties loadGlobalProps() {
DFSPropertiesConfiguration conf = new DFSPropertiesConfiguration();
Option<Path> defaultConfPath = getConfPathFromEnv();
if (defaultConfPath.isPresent()) {
conf.addPropsFromFile(defaultConfPath.get());
} else {
try {
conf.addPropsFromFile(new Path(DEFAULT_CONF_FILE_DIR));
} catch (Exception ignored) {
LOG.debug("Didn't find config file under default conf file dir: " + DEFAULT_CONF_FILE_DIR);
}
visitedFiles.add(file.getName());
BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(file)));
addProperties(reader);
}
return conf.getProps();
}
public static void refreshGlobalProps() {
GLOBAL_PROPS = loadGlobalProps();
}
public static void clearGlobalProps() {
GLOBAL_PROPS = new TypedProperties();
}
/**
* Add properties from external configuration files.
*
* @param filePath File path for configuration file
*/
public void addPropsFromFile(Path filePath) {
if (visitedFilePaths.contains(filePath.toString())) {
throw new IllegalStateException("Loop detected; file " + filePath + " already referenced");
}
FileSystem fileSystem;
try {
fileSystem = fs != null ? fs : filePath.getFileSystem(new Configuration());
} catch (IOException e) {
throw new IllegalArgumentException("Cannot get the file system from file path", e);
}
try (BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(filePath)))) {
visitedFilePaths.add(filePath.toString());
currentFilePath = filePath;
addPropsFromStream(reader);
} catch (IOException ioe) {
LOG.error("Error reading in properies from dfs", ioe);
LOG.error("Error reading in properties from dfs");
throw new IllegalArgumentException("Cannot read properties from dfs", ioe);
}
}
/**
* Add properties from input stream.
*
* Add properties from buffered reader.
*
* @param reader Buffered Reader
* @throws IOException
*/
public void addProperties(BufferedReader reader) throws IOException {
public void addPropsFromStream(BufferedReader reader) throws IOException {
try {
reader.lines().forEach(line -> {
if (line.startsWith("#") || line.equals("") || !line.contains("=")) {
if (!isValidLine(line)) {
return;
}
String[] split = splitProperty(line);
if (line.startsWith("include=") || line.startsWith("include =")) {
visitFile(new Path(rootFile.getParent(), split[1]));
Path includeFilePath = new Path(currentFilePath.getParent(), split[1]);
addPropsFromFile(includeFilePath);
} else {
props.setProperty(split[0], split[1]);
hoodieConfig.setValue(split[0], split[1]);
}
});
@@ -117,7 +161,46 @@ public class DFSPropertiesConfiguration {
}
}
public TypedProperties getConfig() {
return props;
public static TypedProperties getGlobalProps() {
final TypedProperties globalProps = new TypedProperties();
globalProps.putAll(GLOBAL_PROPS);
return globalProps;
}
public TypedProperties getProps() {
return new TypedProperties(hoodieConfig.getProps());
}
public TypedProperties getProps(boolean includeGlobalProps) {
return new TypedProperties(hoodieConfig.getProps(includeGlobalProps));
}
private static Option<Path> getConfPathFromEnv() {
String confDir = System.getenv(CONF_FILE_DIR_ENV_NAME);
if (confDir == null) {
LOG.warn("Cannot find " + CONF_FILE_DIR_ENV_NAME + ", please set it as the dir of " + DEFAULT_PROPERTIES_FILE);
return Option.empty();
}
if (StringUtils.isNullOrEmpty(URI.create(confDir).getScheme())) {
confDir = "file://" + confDir;
}
return Option.of(new Path(confDir + File.separator + DEFAULT_PROPERTIES_FILE));
}
private String[] splitProperty(String line) {
line = line.replaceAll("\\s+"," ");
String delimiter = line.contains("=") ? "=" : " ";
int ind = line.indexOf(delimiter);
String k = line.substring(0, ind).trim();
String v = line.substring(ind + 1).trim();
return new String[] {k, v};
}
private boolean isValidLine(String line) {
ValidationUtils.checkArgument(line != null, "passed line is null");
if (line.startsWith("#") || line.equals("")) {
return false;
}
return line.contains("=") || line.matches(".*\\s.*");
}
}

View File

@@ -58,6 +58,14 @@ public class HoodieConfig implements Serializable {
props.setProperty(cfg.key(), val);
}
public <T> void setValue(String key, String val) {
props.setProperty(key, val);
}
public void setAll(Properties properties) {
props.putAll(properties);
}
public <T> void setDefaultValue(ConfigProperty<T> configProperty) {
if (!contains(configProperty)) {
Option<T> inferValue = Option.empty();
@@ -167,7 +175,17 @@ public class HoodieConfig implements Serializable {
}
public Properties getProps() {
return props;
return getProps(false);
}
public Properties getProps(boolean includeGlobalProps) {
if (includeGlobalProps) {
Properties mergedProps = DFSPropertiesConfiguration.getGlobalProps();
mergedProps.putAll(props);
return mergedProps;
} else {
return props;
}
}
public void setDefaultOnCondition(boolean condition, HoodieConfig config) {

View File

@@ -25,10 +25,14 @@ import org.apache.hudi.common.testutils.minicluster.HdfsTestService;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.junit.Rule;
import org.junit.contrib.java.lang.system.EnvironmentVariables;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
@@ -47,6 +51,10 @@ public class TestDFSPropertiesConfiguration {
private static MiniDFSCluster dfsCluster;
private static DistributedFileSystem dfs;
@Rule
public static final EnvironmentVariables ENVIRONMENT_VARIABLES
= new EnvironmentVariables();
@BeforeAll
public static void initClass() throws Exception {
hdfsTestService = new HdfsTestService();
@@ -73,12 +81,17 @@ public class TestDFSPropertiesConfiguration {
}
@AfterAll
public static void cleanupClass() throws Exception {
public static void cleanupClass() {
if (hdfsTestService != null) {
hdfsTestService.stop();
}
}
@AfterEach
public void cleanupGlobalConfig() {
DFSPropertiesConfiguration.clearGlobalProps();
}
private static void writePropertiesFile(Path path, String[] lines) throws IOException {
PrintStream out = new PrintStream(dfs.create(path, true));
for (String line : lines) {
@@ -91,7 +104,7 @@ public class TestDFSPropertiesConfiguration {
@Test
public void testParsing() {
DFSPropertiesConfiguration cfg = new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/t1.props"));
TypedProperties props = cfg.getConfig();
TypedProperties props = cfg.getProps();
assertEquals(5, props.size());
assertThrows(IllegalArgumentException.class, () -> {
props.getString("invalid.key");
@@ -119,7 +132,7 @@ public class TestDFSPropertiesConfiguration {
@Test
public void testIncludes() {
DFSPropertiesConfiguration cfg = new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/t3.props"));
TypedProperties props = cfg.getConfig();
TypedProperties props = cfg.getProps();
assertEquals(123, props.getInteger("int.prop"));
assertEquals(243.4, props.getDouble("double.prop"), 0.001);
@@ -127,7 +140,30 @@ public class TestDFSPropertiesConfiguration {
assertEquals("t3.value", props.getString("string.prop"));
assertEquals(1354354354, props.getLong("long.prop"));
assertThrows(IllegalStateException.class, () -> {
new DFSPropertiesConfiguration(dfs, new Path(dfsBasePath + "/t4.props"));
cfg.addPropsFromFile(new Path(dfsBasePath + "/t4.props"));
}, "Should error out on a self-included file.");
}
@Test
public void testNoGlobalConfFileConfigured() {
ENVIRONMENT_VARIABLES.clear(DFSPropertiesConfiguration.CONF_FILE_DIR_ENV_NAME);
// Should not throw any exception when no external configuration file configured
DFSPropertiesConfiguration.refreshGlobalProps();
assertEquals(0, DFSPropertiesConfiguration.getGlobalProps().size());
}
@Test
public void testLoadGlobalConfFile() {
// set HUDI_CONF_DIR
String testPropsFilePath = new File("src/test/resources/external-config").getAbsolutePath();
ENVIRONMENT_VARIABLES.set(DFSPropertiesConfiguration.CONF_FILE_DIR_ENV_NAME, testPropsFilePath);
DFSPropertiesConfiguration.refreshGlobalProps();
assertEquals(5, DFSPropertiesConfiguration.getGlobalProps().size());
assertEquals("jdbc:hive2://localhost:10000", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.jdbcurl"));
assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.use_jdbc"));
assertEquals("false", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.datasource.hive_sync.support_timestamp"));
assertEquals("BLOOM", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.index.type"));
assertEquals("true", DFSPropertiesConfiguration.getGlobalProps().get("hoodie.metadata.enable"));
}
}

View File

@@ -0,0 +1,26 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Default system properties included when running Hudi jobs.
# This is useful for setting default environmental settings.
# Example:
hoodie.datasource.hive_sync.jdbcurl jdbc:hive2://localhost:10000
hoodie.datasource.hive_sync.use_jdbc true
hoodie.datasource.hive_sync.support_timestamp false
hoodie.index.type BLOOM
hoodie.metadata.enable true