1
0

Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub

Look at quickstart document for how to setup docker and run demo
This commit is contained in:
Balaji Varadarajan
2018-08-21 22:54:57 -07:00
committed by vinoth chandar
parent 9710b5a3a6
commit f3418e4718
63 changed files with 8952 additions and 9 deletions

View File

@@ -0,0 +1,45 @@
FROM frolvlad/alpine-oraclejdk8
MAINTAINER Hoodie
USER root
# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
# Updating & Installing packages
RUN apk add net-tools curl bash perl procps
ARG HADOOP_VERSION=2.8.4
ARG HADOOP_URL=https://www.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
RUN set -x \
&& echo "Fetch URL2 is : ${HADOOP_URL}" \
&& curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
&& curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \
&& mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* \
&& ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
&& cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
&& mkdir /hadoop-data
ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV HADOOP_HOME=${HADOOP_PREFIX}
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV USER=root
ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
# Exposing a union of ports across hadoop versions
# Well known ports including ssh
EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
ADD entrypoint.sh /entrypoint.sh
ADD export_container_ip.sh /usr/bin/
RUN chmod a+x /usr/bin/export_container_ip.sh \
&& chmod a+x /entrypoint.sh
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

View File

@@ -0,0 +1,91 @@
#!/bin/bash
#######################################################################################
## COPIED FROM ##
## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ##
# ##
#######################################################################################
# Set some sensible defaults
export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}
function addProperty() {
local path=$1
local name=$2
local value=$3
local entry="<property><name>$name</name><value>${value}</value></property>"
local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
}
function configure() {
local path=$1
local module=$2
local envPrefix=$3
local var
local value
echo "Configuring $module"
for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do
name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
var="${envPrefix}_${c}"
value=${!var}
echo " - Setting $name=$value"
addProperty /etc/hadoop/$module-site.xml $name "$value"
done
}
configure /etc/hadoop/core-site.xml core CORE_CONF
configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF
if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"
# HDFS
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true
# YARN
addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0
# MAPRED
addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
fi
if [ -n "$GANGLIA_HOST" ]; then
mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig
for module in mapred jvm rpc ugi; do
echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
echo "$module.period=10"
echo "$module.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics.properties
for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
echo "$module.sink.ganglia.period=10"
echo "$module.sink.ganglia.supportsparse=true"
echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics2.properties
fi
# Save Container IP in ENV variable
/usr/bin/export_container_ip.sh
exec "$@"

View File

@@ -0,0 +1,13 @@
interfaces=( "en0" "eth0" )
ipAddr=""
for interface in "${interfaces[@]}"
do
ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
if [ -n "$ipAddr" ]; then
break
fi
done
echo "Container IP is set to : $ipAddr"
export MY_CONTAINER_IP=$ipAddr

View File

@@ -0,0 +1,90 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hoodie-hadoop-docker</artifactId>
<groupId>com.uber.hoodie</groupId>
<version>0.4.5-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hoodie-hadoop-base-docker</artifactId>
<description>Base Docker Image with Hoodie</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
</properties>
<dependencies>
<dependency>
<groupId>com.uber.hoodie</groupId>
<artifactId>hoodie-hadoop-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
<build>
<finalName>hoodie</finalName>
<plugins>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>varadarb/hudi-hadoop_${docker.hadoop.version}-base</repository>
<forceTags>true</forceTags>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>varadarb/hudi-hadoop_${docker.hadoop.version}-base</repository>
<forceTags>true</forceTags>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>