1
0

Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub

Look at quickstart document for how to setup docker and run demo
This commit is contained in:
Balaji Varadarajan
2018-08-21 22:54:57 -07:00
committed by vinoth chandar
parent 9710b5a3a6
commit f3418e4718
63 changed files with 8952 additions and 9 deletions

212
hoodie-integ-test/pom.xml Normal file
View File

@@ -0,0 +1,212 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hoodie</artifactId>
<groupId>com.uber.hoodie</groupId>
<version>0.4.5-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>hoodie-integ-test</artifactId>
<modelVersion>4.0.0</modelVersion>
<dependencies>
<dependency>
<groupId>org.glassfish.jersey.connectors</groupId>
<artifactId>jersey-apache-connector</artifactId>
<version>2.17</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.core</groupId>
<artifactId>jersey-server</artifactId>
<version>2.17</version>
</dependency>
<dependency>
<groupId>org.glassfish.jersey.containers</groupId>
<artifactId>jersey-container-servlet-core</artifactId>
<version>2.17</version>
</dependency>
<dependency>
<groupId>com.uber.hoodie</groupId>
<artifactId>hoodie-spark</artifactId>
<version>${project.version}</version>
<exclusions>
<exclusion>
<groupId>org.glassfish.**</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.uber.hoodie</groupId>
<artifactId>hoodie-common</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<version>3.1.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.uber.hoodie</groupId>
<artifactId>hoodie-spark</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.glassfish.**</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>20.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.6.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.6.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-guava</artifactId>
<version>2.9.4</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.github.docker-java</groupId>
<artifactId>docker-java</artifactId>
<version>3.1.0-rc-3</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.glassfish.**</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.uber.hoodie</groupId>
<artifactId>hoodie-hadoop-sparkworker-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<properties>
<dockerCompose.envFile>${project.basedir}/compose_env</dockerCompose.envFile>
<dockerCompose.file>${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark231.yml</dockerCompose.file>
<skipITs>false</skipITs>
<docker.compose.skip>${skipITs}</docker.compose.skip>
<checkstyle.skip>true</checkstyle.skip>
</properties>
<build>
<plugins>
<plugin>
<artifactId>exec-maven-plugin</artifactId>
<groupId>org.codehaus.mojo</groupId>
<executions>
<execution><!-- setup HUDI_WS variable in docker compose env file -->
<id>Setup HUDI_WS</id>
<phase>generate-sources</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<executable>/bin/bash</executable>
<arguments>
<argument> -c </argument>
<argument>echo HUDI_WS=`dirname ${project.basedir}`</argument>
</arguments>
<outputFile>${dockerCompose.envFile}</outputFile>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>2.22.0</version>
<executions>
<execution>
<goals>
<goal>integration-test</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.dkanejs.maven.plugins</groupId>
<artifactId>docker-compose-maven-plugin</artifactId>
<version>2.0.1</version>
<executions>
<execution>
<id>up</id>
<phase>pre-integration-test</phase>
<goals>
<goal>up</goal>
</goals>
<configuration>
<skip>${docker.compose.skip}</skip>
<host>unix:///var/run/docker.sock</host>
<composeFile>${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark231.yml</composeFile>
<detachedMode>true</detachedMode>
<envFile>${dockerCompose.envFile}</envFile>
</configuration>
</execution>
<execution>
<id>down</id>
<phase>integration-test</phase>
<goals>
<goal>down</goal>
</goals>
<configuration>
<skip>${docker.compose.skip}</skip>
<composeFile>${project.basedir}/../docker/compose/docker-compose_hadoop284_hive233_spark231.yml</composeFile>
<removeVolumes>true</removeVolumes>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,178 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.integ;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.awaitility.Awaitility.await;
import com.github.dockerjava.api.DockerClient;
import com.github.dockerjava.api.command.DockerCmdExecFactory;
import com.github.dockerjava.api.command.ExecCreateCmd;
import com.github.dockerjava.api.command.ExecCreateCmdResponse;
import com.github.dockerjava.api.model.Container;
import com.github.dockerjava.core.DefaultDockerClientConfig;
import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.core.command.ExecStartResultCallback;
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
import com.google.common.collect.ImmutableList;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.Assert;
import org.junit.Before;
public abstract class ITTestBase {
public static final Logger LOG = LogManager.getLogger(ITTestBase.class);
protected static final String SPARK_WORKER_CONTAINER = "/spark-worker-1";
protected static final String ADHOC_1_CONTAINER = "/adhoc-1";
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
protected static final String HIVESERVER = "/hiveserver";
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
protected static final String HUDI_HADOOP_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
protected static final String HUDI_HIVE_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
protected static final String HUDI_SPARK_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
// Skip these lines when capturing output from hive
protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
protected DockerClient dockerClient;
protected Map<String, Container> runningContainers;
protected static String[] getHiveConsoleCommand(String rawCommand) {
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
String fullCommand = jarCommand + rawCommand;
List<String> cmd = new ImmutableList.Builder().add("hive")
.add("--hiveconf")
.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
.add("--hiveconf")
.add("hive.stats.autogather=false")
.add("-e")
.add("\"" + fullCommand + "\"")
.build();
return cmd.stream().toArray(String[]::new);
}
@Before
public void init() throws IOException {
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
//Assuming insecure docker engine
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
.withDockerHost(dockerHost)
.build();
// using jaxrs/jersey implementation here (netty impl is also available)
DockerCmdExecFactory dockerCmdExecFactory = new JerseyDockerCmdExecFactory()
.withConnectTimeout(1000)
.withMaxTotalConnections(100)
.withMaxPerRouteConnections(10);
dockerClient = DockerClientBuilder.getInstance(config)
.withDockerCmdExecFactory(dockerCmdExecFactory)
.build();
await().atMost(60, SECONDS).until(this::servicesUp);
}
private boolean servicesUp() {
List<Container> containerList = dockerClient.listContainersCmd().exec();
for (Container c : containerList) {
if (!c.getState().equalsIgnoreCase("running")) {
System.out.println("Container : " + Arrays.toString(c.getNames())
+ "not in running state, Curr State :" + c.getState());
return false;
}
}
runningContainers = containerList.stream().map(c -> Pair.of(c.getNames()[0], c))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
return true;
}
protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
boolean expectedToSucceed)
throws Exception {
LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
Container sparkWorkerContainer = runningContainers.get(containerName);
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
ExecCreateCmdResponse createCmdResponse = cmd.exec();
TestExecStartResultCallback callback = new TestExecStartResultCallback(new ByteArrayOutputStream(),
new ByteArrayOutputStream());
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
.exec(callback).awaitCompletion();
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
if (exitCode != 0) {
LOG.error("Command (" + Arrays.toString(command) + ") failed.");
LOG.error("Stdout is :" + callback.getStdout().toString());
LOG.error("Stderr is :" + callback.getStderr().toString());
}
if (expectedToSucceed) {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
} else {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to fail. Exit (" + exitCode + ")", exitCode != 0);
}
cmd.close();
return callback;
}
public class TestExecStartResultCallback extends ExecStartResultCallback {
// Storing the reference in subclass to expose to clients
private final ByteArrayOutputStream stdout;
private final ByteArrayOutputStream stderr;
public TestExecStartResultCallback(ByteArrayOutputStream stdout, ByteArrayOutputStream stderr) {
super(stdout, stderr);
this.stdout = stdout;
this.stderr = stderr;
}
@Override
public void onComplete() {
super.onComplete();
LOG.info("onComplete called");
try {
stderr.flush();
stdout.flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public ByteArrayOutputStream getStdout() {
return stdout;
}
public ByteArrayOutputStream getStderr() {
return stderr;
}
}
}

View File

@@ -0,0 +1,139 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.integ;
import java.util.Arrays;
import org.junit.Assert;
import org.junit.Test;
/**
* Smoke tests to run as part of verification.
*/
public class ITTestHoodieSanity extends ITTestBase {
@Test
public void testRunEcho() throws Exception {
String[] cmd = new String[]{"echo", "Happy Testing"};
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
cmd, true);
String stdout = callback.getStdout().toString();
String stderr = callback.getStderr().toString();
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
}
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
*/
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, true);
}
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with multiple partition-keys
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
*/
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, false);
}
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie
* data-set and performs upserts on it. Hive integration and upsert functionality is checked by running a count
* query in hive console.
* TODO: Add spark-shell test-case
*/
public void testRunHoodieJavaAppOnCOWTable(String hiveTableName, boolean singlePartitionKey) throws Exception {
// Drop Table if it exists
{
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
}
// Ensure table does not exist
{
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
TestExecStartResultCallback callback =
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
String stderr = callback.getStderr().toString();
String stdout = callback.getStdout().toString();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
}
// Run Hoodie Java App
{
String[] cmd = null;
if (singlePartitionKey) {
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--hive-url", HIVE_SERVER_JDBC_URL,
"--hive-table", hiveTableName
};
} else {
cmd = new String[]{
HOODIE_JAVA_APP,
"--hive-sync",
"--hive-url", HIVE_SERVER_JDBC_URL,
"--use-multi-partition-keys",
"--hive-table", hiveTableName
};
}
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
cmd, true);
String stdout = callback.getStdout().toString().trim();
String stderr = callback.getStderr().toString().trim();
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
}
// Ensure table does exist
{
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
TestExecStartResultCallback callback =
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
String stderr = callback.getStderr().toString().trim();
String stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
Assert.assertEquals("Table exists", hiveTableName, stdout);
}
// Ensure row count is 100 (without duplicates)
{
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
TestExecStartResultCallback callback =
executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
String stderr = callback.getStderr().toString().trim();
String stdout = callback.getStdout().toString().trim();
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
Integer.parseInt(stdout.trim()));
}
}
}

View File

@@ -0,0 +1,23 @@
#
# Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger=WARN, A1
log4j.category.com.uber=INFO
log4j.category.org.apache.parquet.hadoop=WARN
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n