Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub
Look at quickstart document for how to setup docker and run demo
This commit is contained in:
committed by
vinoth chandar
parent
9710b5a3a6
commit
f3418e4718
46
docker/hoodie/hadoop/spark_base/Dockerfile
Normal file
46
docker/hoodie/hadoop/spark_base/Dockerfile
Normal file
@@ -0,0 +1,46 @@
|
||||
ARG HADOOP_VERSION=2.8.4
|
||||
ARG HIVE_VERSION=2.3.3
|
||||
FROM varadarb/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}
|
||||
|
||||
ENV ENABLE_INIT_DAEMON true
|
||||
ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon
|
||||
ENV INIT_DAEMON_STEP spark_master_init
|
||||
|
||||
ARG SPARK_VERSION=2.3.1
|
||||
ARG SPARK_HADOOP_VERSION=2.7
|
||||
|
||||
ENV SPARK_VERSION ${SPARK_VERSION}
|
||||
ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION}
|
||||
|
||||
COPY wait-for-step.sh /
|
||||
COPY execute-step.sh /
|
||||
COPY finish-step.sh /
|
||||
|
||||
RUN echo "Installing Spark-version (${SPARK_VERSION})" \
|
||||
&& wget http://apache.mirror.iphh.net/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
|
||||
&& tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
|
||||
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /opt/spark \
|
||||
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
|
||||
&& cd /
|
||||
|
||||
#Give permission to execute scripts
|
||||
RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh
|
||||
|
||||
# Fix the value of PYTHONHASHSEED
|
||||
# Note: this is needed when you use Python 3.3 or greater
|
||||
ENV PYTHONHASHSEED 1
|
||||
|
||||
ENV SPARK_HOME /opt/spark
|
||||
ENV SPARK_INSTALL ${SPARK_HOME}
|
||||
ENV SPARK_CONF_DIR ${SPARK_HOME}/conf
|
||||
ENV PATH $SPARK_INSTALL/bin:$PATH
|
||||
|
||||
ENV SPARK_DRIVER_PORT 5001
|
||||
ENV SPARK_UI_PORT 5002
|
||||
ENV SPARK_BLOCKMGR_PORT 5003
|
||||
|
||||
EXPOSE $SPARK_DRIVER_PORT $SPARK_UI_PORT $SPARK_BLOCKMGR_PORT
|
||||
|
||||
# Without this spark-shell fails - Download if it is not already there in $SPARK_INSTALL
|
||||
RUN wget -nc -q -O "${SPARK_INSTALL}/jars/jersey-bundle-1.19.4.jar" "http://repo1.maven.org/maven2/com/sun/jersey/jersey-bundle/1.19.4/jersey-bundle-1.19.4.jar"
|
||||
|
||||
14
docker/hoodie/hadoop/spark_base/execute-step.sh
Normal file
14
docker/hoodie/hadoop/spark_base/execute-step.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $ENABLE_INIT_DAEMON = "true" ]
|
||||
then
|
||||
echo "Execute step ${INIT_DAEMON_STEP} in pipeline"
|
||||
while true; do
|
||||
sleep 5
|
||||
echo -n '.'
|
||||
string=$(curl -sL -w "%{http_code}" -X PUT $INIT_DAEMON_BASE_URI/execute?step=$INIT_DAEMON_STEP -o /dev/null)
|
||||
[ "$string" = "204" ] && break
|
||||
done
|
||||
echo "Notified execution of step ${INIT_DAEMON_STEP}"
|
||||
fi
|
||||
|
||||
16
docker/hoodie/hadoop/spark_base/finish-step.sh
Normal file
16
docker/hoodie/hadoop/spark_base/finish-step.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $ENABLE_INIT_DAEMON = "true" ]
|
||||
then
|
||||
echo "Finish step ${INIT_DAEMON_STEP} in pipeline"
|
||||
while true; do
|
||||
sleep 5
|
||||
echo -n '.'
|
||||
string=$(curl -sL -w "%{http_code}" -X PUT $INIT_DAEMON_BASE_URI/finish?step=$INIT_DAEMON_STEP -o /dev/null)
|
||||
[ "$string" = "204" ] && break
|
||||
done
|
||||
echo "Notified finish of step ${INIT_DAEMON_STEP}"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
89
docker/hoodie/hadoop/spark_base/pom.xml
Normal file
89
docker/hoodie/hadoop/spark_base/pom.xml
Normal file
@@ -0,0 +1,89 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>hoodie-hadoop-docker</artifactId>
|
||||
<groupId>com.uber.hoodie</groupId>
|
||||
<version>0.4.5-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<packaging>pom</packaging>
|
||||
<artifactId>hoodie-hadoop-sparkbase-docker</artifactId>
|
||||
|
||||
<description>Base Docker Image with Hoodie</description>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<checkstyle.skip>true</checkstyle.skip>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.uber.hoodie</groupId>
|
||||
<artifactId>hoodie-hadoop-hive-docker</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- Build Docker image -->
|
||||
<plugin>
|
||||
<groupId>com.spotify</groupId>
|
||||
<artifactId>dockerfile-maven-plugin</artifactId>
|
||||
<version>${dockerfile.maven.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>tag-latest</id>
|
||||
<phase>pre-integration-test</phase>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
<goal>tag</goal>
|
||||
<!-- <goal>push</goal> -->
|
||||
</goals>
|
||||
<configuration>
|
||||
<skip>${docker.build.skip}</skip>
|
||||
<pullNewerImage>false</pullNewerImage>
|
||||
<repository>varadarb/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkbase_${docker.spark.version}</repository>
|
||||
<forceTags>true</forceTags>
|
||||
<tag>latest</tag>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>tag-version</id>
|
||||
<phase>pre-integration-test</phase>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
<goal>tag</goal>
|
||||
<!-- <goal>push</goal> -->
|
||||
</goals>
|
||||
<configuration>
|
||||
<skip>${docker.build.skip}</skip>
|
||||
<pullNewerImage>false</pullNewerImage>
|
||||
<repository>varadarb/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkbase_${docker.spark.version}</repository>
|
||||
<forceTags>true</forceTags>
|
||||
<tag>${project.version}</tag>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
13
docker/hoodie/hadoop/spark_base/wait-for-step.sh
Normal file
13
docker/hoodie/hadoop/spark_base/wait-for-step.sh
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $ENABLE_INIT_DAEMON = "true" ]
|
||||
then
|
||||
echo "Validating if step ${INIT_DAEMON_STEP} can start in pipeline"
|
||||
while true; do
|
||||
sleep 5
|
||||
echo -n '.'
|
||||
string=$(curl -s $INIT_DAEMON_BASE_URI/canStart?step=$INIT_DAEMON_STEP)
|
||||
[ "$string" = "true" ] && break
|
||||
done
|
||||
echo "Can start step ${INIT_DAEMON_STEP}"
|
||||
fi
|
||||
Reference in New Issue
Block a user