1
0

Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub

Look at quickstart document for how to setup docker and run demo
This commit is contained in:
Balaji Varadarajan
2018-08-21 22:54:57 -07:00
committed by vinoth chandar
parent 9710b5a3a6
commit f3418e4718
63 changed files with 8952 additions and 9 deletions

View File

@@ -0,0 +1,178 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.integ;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.awaitility.Awaitility.await;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.command.DockerCmdExecFactory;
import com.github.dockerjava.api.command.ExecCreateCmd;
import com.github.dockerjava.api.command.ExecCreateCmdResponse;
import com.github.dockerjava.api.model.Container;
import com.github.dockerjava.core.DefaultDockerClientConfig;
import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.core.command.ExecStartResultCallback;
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
import com.google.common.collect.ImmutableList;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.Before;
public abstract class ITTestBase {
public static final Logger LOG = LogManager.getLogger(ITTestBase.class);
protected static final String SPARK_WORKER_CONTAINER = "/spark-worker-1";
protected static final String ADHOC_1_CONTAINER = "/adhoc-1";
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
protected static final String HIVESERVER = "/hiveserver";
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
protected static final String HUDI_HADOOP_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
protected static final String HUDI_HIVE_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
protected static final String HUDI_SPARK_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
// Skip these lines when capturing output from hive
protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
protected DockerClient dockerClient;
protected Map<String, Container> runningContainers;
protected static String[] getHiveConsoleCommand(String rawCommand) {
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
String fullCommand = jarCommand + rawCommand;
List<String> cmd = new ImmutableList.Builder().add("hive")
.add("--hiveconf")
.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
.add("--hiveconf")
.add("hive.stats.autogather=false")
.add("-e")
.add("\"" + fullCommand + "\"")
.build();
return cmd.stream().toArray(String[]::new);
}
@Before
public void init() throws IOException {
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
//Assuming insecure docker engine
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
.withDockerHost(dockerHost)
.build();
// using jaxrs/jersey implementation here (netty impl is also available)
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory()
.withConnectTimeout(1000)
.withMaxTotalConnections(100)
.withMaxPerRouteConnections(10);
dockerClient = DockerClientBuilder.getInstance(config)
.withDockerCmdExecFactory(dockerCmdExecFactory)
.build();
await().atMost(60, SECONDS).until(this::servicesUp);
}
private boolean servicesUp() {
List<Container> containerList = dockerClient.listContainersCmd().exec();
for (Container c : containerList) {
if (!c.getState().equalsIgnoreCase("running")) {
System.out.println("Container : " + Arrays.toString(c.getNames())
+ "not in running state, Curr State :" + c.getState());
return false;
}
}
runningContainers = containerList.stream().map(c -> Pair.of(c.getNames()[0], c))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
return true;
}
protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
boolean expectedToSucceed)
throws Exception {
LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
Container sparkWorkerContainer = runningContainers.get(containerName);
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
ExecCreateCmdResponse createCmdResponse = cmd.exec();
TestExecStartResultCallback callback = new TestExecStartResultCallback(new ByteArrayOutputStream(),
new ByteArrayOutputStream());
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
.exec(callback).awaitCompletion();
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
if (exitCode != 0) {
LOG.error("Command (" + Arrays.toString(command) + ") failed.");
LOG.error("Stdout is :" + callback.getStdout().toString());
LOG.error("Stderr is :" + callback.getStderr().toString());
}
if (expectedToSucceed) {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
} else {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to fail. Exit (" + exitCode + ")", exitCode != 0);
}
cmd.close();
return callback;
}
public class TestExecStartResultCallback extends ExecStartResultCallback {
// Storing the reference in subclass to expose to clients
private final ByteArrayOutputStream stdout;
private final ByteArrayOutputStream stderr;
public TestExecStartResultCallback(ByteArrayOutputStream stdout, ByteArrayOutputStream stderr) {
super(stdout, stderr);
this.stdout = stdout;
this.stderr = stderr;
}
@Override
public void onComplete() {
super.onComplete();
LOG.info("onComplete called");
try {
stderr.flush();
stdout.flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public ByteArrayOutputStream getStdout() {
return stdout;
}
public ByteArrayOutputStream getStderr() {
return stderr;
}
}
}

View File

@@ -0,0 +1,139 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.integ;
import java.util.Arrays;
import org.junit.Assert;
import org.junit.Test;
/**
* Smoke tests to run as part of verification.
*/
public class ITTestHoodieSanity extends ITTestBase {
@Test
public void testRunEcho() throws Exception {
String[] cmd = new String[]{"echo", "Happy Testing"};
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
cmd, true);
String stdout = callback.getStdout().toString();
String stderr = callback.getStderr().toString();
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
}
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
*/
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, true);
}
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with multiple partition-keys
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
*/
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, false);
}
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* TODO: Add spark-shell test-case
*/
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {
// Drop Table if it exists
{
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
}
// Ensure table does not exist
{
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
TestExecStartResultCallback callback =
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
String stderr = callback.getStderr().toString();
String stdout = callback.getStdout().toString();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
}
// Run Hoodie Java App
{
String[] cmd = null;
if (singlePartitionKey) {
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--hive-url", HIVE_SERVER_JDBC_URL,
"--hive-table", hiveTableName
};
} else {
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--hive-url", HIVE_SERVER_JDBC_URL,
"--use-multi-partition-keys",
"--hive-table", hiveTableName
};
}
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
cmd, true);
String stdout = callback.getStdout().toString().trim();
String stderr = callback.getStderr().toString().trim();
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
}
// Ensure table does exist
{
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
TestExecStartResultCallback callback =
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
String stderr = callback.getStderr().toString().trim();
String stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
Assert.assertEquals("Table exists", hiveTableName, stdout);
}
// Ensure row count is 100 (without duplicates)
{
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
TestExecStartResultCallback callback =
executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
String stderr = callback.getStderr().toString().trim();
String stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
Integer.parseInt(stdout.trim()));
}
}
}

View File

@@ -0,0 +1,23 @@
#
# Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger=WARN, A1
log4j.category.com.uber=INFO
log4j.category.org.apache.parquet.hadoop=WARN
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n