Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub
Look at quickstart document for how to setup docker and run demo
This commit is contained in:
committed by
vinoth chandar
parent
9710b5a3a6
commit
f3418e4718
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.integ;
|
||||
|
||||
import static java.util.concurrent.TimeUnit.SECONDS;
|
||||
import static org.awaitility.Awaitility.await;
|
||||
|
||||
import com.github.dockerjava.api.DockerClient;
|
||||
import com.github.dockerjava.api.command.DockerCmdExecFactory;
|
||||
import com.github.dockerjava.api.command.ExecCreateCmd;
|
||||
import com.github.dockerjava.api.command.ExecCreateCmdResponse;
|
||||
import com.github.dockerjava.api.model.Container;
|
||||
import com.github.dockerjava.core.DefaultDockerClientConfig;
|
||||
import com.github.dockerjava.core.DockerClientBuilder;
|
||||
import com.github.dockerjava.core.DockerClientConfig;
|
||||
import com.github.dockerjava.core.command.ExecStartResultCallback;
|
||||
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
|
||||
public abstract class ITTestBase {
|
||||
|
||||
public static final Logger LOG = LogManager.getLogger(ITTestBase.class);
|
||||
protected static final String SPARK_WORKER_CONTAINER = "/spark-worker-1";
|
||||
protected static final String ADHOC_1_CONTAINER = "/adhoc-1";
|
||||
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
|
||||
protected static final String HIVESERVER = "/hiveserver";
|
||||
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
|
||||
protected static final String HUDI_HADOOP_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
||||
protected static final String HUDI_HIVE_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
|
||||
protected static final String HUDI_SPARK_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
|
||||
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
|
||||
// Skip these lines when capturing output from hive
|
||||
protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
|
||||
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
|
||||
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
|
||||
protected DockerClient dockerClient;
|
||||
protected Map<String, Container> runningContainers;
|
||||
|
||||
protected static String[] getHiveConsoleCommand(String rawCommand) {
|
||||
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
|
||||
String fullCommand = jarCommand + rawCommand;
|
||||
|
||||
List<String> cmd = new ImmutableList.Builder().add("hive")
|
||||
.add("--hiveconf")
|
||||
.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
|
||||
.add("--hiveconf")
|
||||
.add("hive.stats.autogather=false")
|
||||
.add("-e")
|
||||
.add("\"" + fullCommand + "\"")
|
||||
.build();
|
||||
return cmd.stream().toArray(String[]::new);
|
||||
}
|
||||
|
||||
@Before
|
||||
public void init() throws IOException {
|
||||
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
|
||||
//Assuming insecure docker engine
|
||||
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
||||
.withDockerHost(dockerHost)
|
||||
.build();
|
||||
// using jaxrs/jersey implementation here (netty impl is also available)
|
||||
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory()
|
||||
.withConnectTimeout(1000)
|
||||
.withMaxTotalConnections(100)
|
||||
.withMaxPerRouteConnections(10);
|
||||
dockerClient = DockerClientBuilder.getInstance(config)
|
||||
.withDockerCmdExecFactory(dockerCmdExecFactory)
|
||||
.build();
|
||||
await().atMost(60, SECONDS).until(this::servicesUp);
|
||||
}
|
||||
|
||||
private boolean servicesUp() {
|
||||
List<Container> containerList = dockerClient.listContainersCmd().exec();
|
||||
for (Container c : containerList) {
|
||||
if (!c.getState().equalsIgnoreCase("running")) {
|
||||
System.out.println("Container : " + Arrays.toString(c.getNames())
|
||||
+ "not in running state, Curr State :" + c.getState());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
runningContainers = containerList.stream().map(c -> Pair.of(c.getNames()[0], c))
|
||||
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
||||
boolean expectedToSucceed)
|
||||
throws Exception {
|
||||
LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
|
||||
Container sparkWorkerContainer = runningContainers.get(containerName);
|
||||
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
|
||||
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
|
||||
|
||||
ExecCreateCmdResponse createCmdResponse = cmd.exec();
|
||||
TestExecStartResultCallback callback = new TestExecStartResultCallback(new ByteArrayOutputStream(),
|
||||
new ByteArrayOutputStream());
|
||||
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
|
||||
.exec(callback).awaitCompletion();
|
||||
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
|
||||
LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
|
||||
if (exitCode != 0) {
|
||||
LOG.error("Command (" + Arrays.toString(command) + ") failed.");
|
||||
LOG.error("Stdout is :" + callback.getStdout().toString());
|
||||
LOG.error("Stderr is :" + callback.getStderr().toString());
|
||||
}
|
||||
if (expectedToSucceed) {
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
|
||||
} else {
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||
+ ") expected to fail. Exit (" + exitCode + ")", exitCode != 0);
|
||||
}
|
||||
cmd.close();
|
||||
return callback;
|
||||
}
|
||||
|
||||
public class TestExecStartResultCallback extends ExecStartResultCallback {
|
||||
|
||||
// Storing the reference in subclass to expose to clients
|
||||
private final ByteArrayOutputStream stdout;
|
||||
private final ByteArrayOutputStream stderr;
|
||||
|
||||
public TestExecStartResultCallback(ByteArrayOutputStream stdout, ByteArrayOutputStream stderr) {
|
||||
super(stdout, stderr);
|
||||
this.stdout = stdout;
|
||||
this.stderr = stderr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onComplete() {
|
||||
super.onComplete();
|
||||
LOG.info("onComplete called");
|
||||
try {
|
||||
stderr.flush();
|
||||
stdout.flush();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public ByteArrayOutputStream getStdout() {
|
||||
return stdout;
|
||||
}
|
||||
|
||||
public ByteArrayOutputStream getStderr() {
|
||||
return stderr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.integ;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Smoke tests to run as part of verification.
|
||||
*/
|
||||
public class ITTestHoodieSanity extends ITTestBase {
|
||||
|
||||
@Test
|
||||
public void testRunEcho() throws Exception {
|
||||
String[] cmd = new String[]{"echo", "Happy Testing"};
|
||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
||||
cmd, true);
|
||||
String stdout = callback.getStdout().toString();
|
||||
String stderr = callback.getStderr().toString();
|
||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
||||
}
|
||||
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
|
||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with multiple partition-keys
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
|
||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie
|
||||
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
|
||||
* query in hive console.
|
||||
* TODO: Add spark-shell test-case
|
||||
*/
|
||||
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {
|
||||
|
||||
// Drop Table if it exists
|
||||
{
|
||||
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
|
||||
executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
|
||||
}
|
||||
|
||||
// Ensure table does not exist
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString();
|
||||
String stdout = callback.getStdout().toString();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
|
||||
Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
|
||||
}
|
||||
|
||||
// Run Hoodie Java App
|
||||
{
|
||||
String[] cmd = null;
|
||||
if (singlePartitionKey) {
|
||||
cmd = new String[]{
|
||||
HOODIE_JAVA_APP,
|
||||
"--hive-sync",
|
||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||
"--hive-table", hiveTableName
|
||||
};
|
||||
} else {
|
||||
cmd = new String[]{
|
||||
HOODIE_JAVA_APP,
|
||||
"--hive-sync",
|
||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||
"--use-multi-partition-keys",
|
||||
"--hive-table", hiveTableName
|
||||
};
|
||||
}
|
||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
||||
cmd, true);
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
||||
}
|
||||
|
||||
// Ensure table does exist
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||
Assert.assertEquals("Table exists", hiveTableName, stdout);
|
||||
}
|
||||
|
||||
// Ensure row count is 100 (without duplicates)
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
||||
Integer.parseInt(stdout.trim()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
#
|
||||
# Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
log4j.rootLogger=WARN, A1
|
||||
log4j.category.com.uber=INFO
|
||||
log4j.category.org.apache.parquet.hadoop=WARN
|
||||
# A1 is set to be a ConsoleAppender.
|
||||
log4j.appender.A1=org.apache.log4j.ConsoleAppender
|
||||
# A1 uses PatternLayout.
|
||||
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
|
||||
Reference in New Issue
Block a user