1
0

[HUDI-3104] Kafka-connect support of hadoop config environments and properties (#4451)

This commit is contained in:
Thinking Chen
2022-01-09 15:10:17 +08:00
committed by GitHub
parent 98ec215079
commit 0d8ca8da4e
7 changed files with 292 additions and 0 deletions

View File

@@ -49,9 +49,14 @@ import org.apache.log4j.Logger;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.FileVisitOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -65,6 +70,52 @@ public class KafkaConnectUtils {
private static final Logger LOG = LogManager.getLogger(KafkaConnectUtils.class);
private static final String HOODIE_CONF_PREFIX = "hoodie.";
public static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR";
public static final String HADOOP_HOME = "HADOOP_HOME";
private static final List<Path> DEFAULT_HADOOP_CONF_FILES;
static {
DEFAULT_HADOOP_CONF_FILES = new ArrayList<>();
try {
String hadoopConfigPath = System.getenv(HADOOP_CONF_DIR);
String hadoopHomePath = System.getenv(HADOOP_HOME);
DEFAULT_HADOOP_CONF_FILES.addAll(getHadoopConfigFiles(hadoopConfigPath, hadoopHomePath));
if (!DEFAULT_HADOOP_CONF_FILES.isEmpty()) {
LOG.info(String.format("Found Hadoop default config files %s", DEFAULT_HADOOP_CONF_FILES));
}
} catch (IOException e) {
LOG.error("An error occurred while getting the default Hadoop configuration. "
+ "Please use hadoop.conf.dir or hadoop.home to configure Hadoop environment variables", e);
}
}
/**
* Get hadoop config files by HADOOP_CONF_DIR or HADOOP_HOME
*/
public static List<Path> getHadoopConfigFiles(String hadoopConfigPath, String hadoopHomePath)
throws IOException {
List<Path> hadoopConfigFiles = new ArrayList<>();
if (!StringUtils.isNullOrEmpty(hadoopConfigPath)) {
hadoopConfigFiles.addAll(walkTreeForXml(Paths.get(hadoopConfigPath)));
}
if (hadoopConfigFiles.isEmpty() && !StringUtils.isNullOrEmpty(hadoopHomePath)) {
hadoopConfigFiles.addAll(walkTreeForXml(Paths.get(hadoopHomePath, "etc", "hadoop")));
}
return hadoopConfigFiles;
}
/**
* Files walk to find xml
*/
private static List<Path> walkTreeForXml(Path basePath) throws IOException {
if (Files.notExists(basePath)) {
return new ArrayList<>();
}
return Files.walk(basePath, FileVisitOption.FOLLOW_LINKS)
.filter(path -> path.toFile().isFile())
.filter(path -> path.toString().endsWith(".xml"))
.collect(Collectors.toList());
}
public static int getLatestNumPartitions(String bootstrapServers, String topicName) {
Properties props = new Properties();
@@ -89,6 +140,23 @@ public class KafkaConnectUtils {
*/
public static Configuration getDefaultHadoopConf(KafkaConnectConfigs connectConfigs) {
Configuration hadoopConf = new Configuration();
// add hadoop config files
if (!StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfDir())
|| !StringUtils.isNullOrEmpty(connectConfigs.getHadoopConfHome())) {
try {
List<Path> configFiles = getHadoopConfigFiles(connectConfigs.getHadoopConfDir(),
connectConfigs.getHadoopConfHome());
configFiles.forEach(f ->
hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
} catch (Exception e) {
throw new HoodieException("Failed to read hadoop configuration!", e);
}
} else {
DEFAULT_HADOOP_CONF_FILES.forEach(f ->
hadoopConf.addResource(new org.apache.hadoop.fs.Path(f.toAbsolutePath().toUri())));
}
connectConfigs.getProps().keySet().stream().filter(prop -> {
// In order to prevent printing unnecessary warn logs, here filter out the hoodie
// configuration items before passing to hadoop/hive configs

View File

@@ -93,6 +93,17 @@ public class KafkaConnectConfigs extends HoodieConfig {
.defaultValue(true)
.withDocumentation("Commit even when some records failed to be written");
// Reference https://docs.confluent.io/kafka-connect-hdfs/current/configuration_options.html#hdfs
public static final ConfigProperty<String> HADOOP_CONF_DIR = ConfigProperty
.key("hadoop.conf.dir")
.noDefaultValue()
.withDocumentation("The Hadoop configuration directory.");
public static final ConfigProperty<String> HADOOP_HOME = ConfigProperty
.key("hadoop.home")
.noDefaultValue()
.withDocumentation("The Hadoop home directory.");
protected KafkaConnectConfigs() {
super();
}
@@ -145,6 +156,14 @@ public class KafkaConnectConfigs extends HoodieConfig {
return getBoolean(ALLOW_COMMIT_ON_ERRORS);
}
public String getHadoopConfDir() {
return getString(HADOOP_CONF_DIR);
}
public String getHadoopConfHome() {
return getString(HADOOP_HOME);
}
public static class Builder {
protected final KafkaConnectConfigs connectConfigs = new KafkaConnectConfigs();
@@ -185,6 +204,16 @@ public class KafkaConnectConfigs extends HoodieConfig {
return this;
}
public Builder withHadoopConfDir(String hadoopConfDir) {
connectConfigs.setValue(HADOOP_CONF_DIR, String.valueOf(hadoopConfDir));
return this;
}
public Builder withHadoopHome(String hadoopHome) {
connectConfigs.setValue(HADOOP_HOME, String.valueOf(hadoopHome));
return this;
}
protected void setDefaults() {
// Check for mandatory properties
connectConfigs.setDefaults(KafkaConnectConfigs.class.getName());

View File

@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.connect;
import org.apache.hudi.connect.utils.KafkaConnectUtils;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.apache.hudi.connect.writers.KafkaConnectConfigs;
import org.junit.jupiter.api.Test;
import java.nio.file.Path;
import java.util.List;
public class TestHdfsConfiguration {
private boolean checkFiles(List<Path> paths) {
paths.removeIf(p -> {
String fileName = p.toFile().getName();
return fileName.equals("core-site.xml") || fileName.equals("hdfs-site.xml");
});
return paths.isEmpty();
}
@Test
public void testHadoopConfigEnvs() throws Exception {
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
"src/test/resources/hadoop_conf", "");
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}
@Test
public void testHadoopHomeEnvs() throws Exception {
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
"","src/test/resources/hadoop_home");
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}
@Test
public void testKafkaConfig() throws Exception {
KafkaConnectConfigs connectConfigs = KafkaConnectConfigs.newBuilder()
.withHadoopHome("src/test/resources/hadoop_home")
.build();
List<Path> paths = KafkaConnectUtils.getHadoopConfigFiles(
connectConfigs.getHadoopConfDir(),
connectConfigs.getHadoopConfHome()
);
assertEquals(paths.size(), 2);
assertTrue(checkFiles(paths));
}
}

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://test-hudi-path:9000</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>http://test-hudi-path:50070</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
</description>
</property>
</configuration>

View File

@@ -0,0 +1,33 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://test-hudi-path:9000</value>
<description>The name of the default file system. A URI whose
scheme and authority determine the FileSystem implementation. The
uri's scheme determines the config property (fs.SCHEME.impl) naming
the FileSystem implementation class. The uri's authority is used to
determine the host, port, etc. for a filesystem.</description>
</property>
</configuration>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<configuration>
<property>
<name>dfs.namenode.http-address</name>
<value>http://test-hudi-path:50070</value>
<description>
The address and the base port where the dfs namenode web ui will listen on.
</description>
</property>
</configuration>