Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub

Look at quickstart document for how to setup docker and run demo
2018-08-21 22:54:57 -07:00
parent 9710b5a3a6
commit f3418e4718
63 changed files with 8952 additions and 9 deletions
--- a/docker/hoodie/hadoop/hive_base/Dockerfile
+++ b/docker/hoodie/hadoop/hive_base/Dockerfile
@@ -0,0 +1,51 @@
+ARG HADOOP_VERSION=2.8.4 
+FROM varadarb/hudi-hadoop_${HADOOP_VERSION}-base:latest
+
+ENV HIVE_HOME /opt/hive
+ENV PATH $HIVE_HOME/bin:$PATH
+ENV HADOOP_HOME /opt/hadoop-$HADOOP_VERSION
+
+WORKDIR /opt
+
+ARG HIVE_VERSION=2.3.3
+ARG HIVE_URL=https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
+ENV HIVE_VERSION ${HIVE_VERSION}
+ENV HIVE_URL ${HIVE_URL}
+
+#Install Hive MySQL, PostgreSQL JDBC
+RUN echo "Hive URL is :${HIVE_URL}" && wget ${HIVE_URL} -O hive.tar.gz && \
+	tar -xzvf hive.tar.gz && mv *hive*-bin hive && \
+        ln -s /usr/share/java/mysql-connector-java.jar $HIVE_HOME/lib/mysql-connector-java.jar && \
+	wget https://jdbc.postgresql.org/download/postgresql-9.4.1212.jar -O $HIVE_HOME/lib/postgresql-jdbc.jar && \
+	rm hive.tar.gz && mkdir -p /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/
+
+#Spark should be compiled with Hive to be able to use it
+#hive-site.xml should be copied to $SPARK_HOME/conf folder
+
+#Custom configuration goes here
+ADD conf/hive-site.xml $HADOOP_CONF_DIR
+ADD conf/beeline-log4j2.properties $HIVE_HOME/conf
+ADD conf/hive-env.sh $HIVE_HOME/conf
+ADD conf/hive-exec-log4j2.properties $HIVE_HOME/conf
+ADD conf/hive-log4j2.properties $HIVE_HOME/conf
+ADD conf/ivysettings.xml $HIVE_HOME/conf
+ADD conf/llap-daemon-log4j2.properties $HIVE_HOME/conf
+
+# Setup Hoodie Library jars
+ADD target/ /var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/
+
+ENV HUDI_HADOOP_BUNDLE=/var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar
+ENV HUDI_HIVE_BUNDLE=/var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar
+ENV HUDI_SPARK_BUNDLE=/var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar
+ENV HUDI_UTILITIES_BUNDLE=/var/hoodie/ws/docker/hoodie/hadoop/hive_base/target/hoodie-utilities.jar
+
+COPY startup.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/startup.sh
+
+COPY entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+ENV PATH $HIVE_HOME/bin/:$PATH
+
+ENTRYPOINT ["entrypoint.sh"]
+CMD startup.sh
--- a/docker/hoodie/hadoop/hive_base/conf/beeline-log4j2.properties
+++ b/docker/hoodie/hadoop/hive_base/conf/beeline-log4j2.properties
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = BeelineLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.hive.log.level = WARN
+property.hive.root.logger = console
+
+# list of all appenders
+appenders = console
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
+
+# list of all loggers
+loggers = HiveConnection
+
+# HiveConnection logs useful info for dynamic service discovery
+logger.HiveConnection.name = org.apache.hive.jdbc.HiveConnection
+logger.HiveConnection.level = INFO
+
+# root logger
+rootLogger.level = ${sys:hive.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
--- a/docker/hoodie/hadoop/hive_base/conf/hive-env.sh
+++ b/docker/hoodie/hadoop/hive_base/conf/hive-env.sh
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Set Hive and Hadoop environment variables here. These variables can be used
+# to control the execution of Hive. It should be used by admins to configure
+# the Hive installation (so that users do not have to set environment variables
+# or set command line parameters to get correct behavior).
+#
+# The hive service being invoked (CLI/HWI etc.) is available via the environment
+# variable SERVICE
+
+
+# Hive Client memory usage can be an issue if a large number of clients
+# are running at the same time. The flags below have been useful in 
+# reducing memory usage:
+#
+# if [ "$SERVICE" = "cli" ]; then
+#   if [ -z "$DEBUG" ]; then
+#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
+#   else
+#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
+#   fi
+# fi
+
+# The heap size of the jvm stared by hive shell script can be controlled via:
+#
+# export HADOOP_HEAPSIZE=1024
+#
+# Larger heap size may be required when running queries over large number of files or partitions. 
+# By default hive shell scripts use a heap size of 256 (MB).  Larger heap size would also be 
+# appropriate for hive server (hwi etc).
+
+
+# Set HADOOP_HOME to point to a specific hadoop install directory
+# HADOOP_HOME=${bin}/../../hadoop
+
+# Hive Configuration Directory can be controlled by:
+# export HIVE_CONF_DIR=
+
+# Folder containing extra ibraries required for hive compilation/execution can be controlled by:
+# export HIVE_AUX_JARS_PATH=
--- a/docker/hoodie/hadoop/hive_base/conf/hive-exec-log4j2.properties
+++ b/docker/hoodie/hadoop/hive_base/conf/hive-exec-log4j2.properties
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = HiveExecLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.hive.log.level = INFO
+property.hive.root.logger = FA
+property.hive.query.id = hadoop
+property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
+property.hive.log.file = ${sys:hive.query.id}.log
+
+# list of all appenders
+appenders = console, FA
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
+
+# simple file appender
+appender.FA.type = File
+appender.FA.name = FA
+appender.FA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file}
+appender.FA.layout.type = PatternLayout
+appender.FA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
+
+# list of all loggers
+loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX
+
+logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
+logger.NIOServerCnxn.level = WARN
+
+logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
+logger.ClientCnxnSocketNIO.level = WARN
+
+logger.DataNucleus.name = DataNucleus
+logger.DataNucleus.level = ERROR
+
+logger.Datastore.name = Datastore
+logger.Datastore.level = ERROR
+
+logger.JPOX.name = JPOX
+logger.JPOX.level = ERROR
+
+# root logger
+rootLogger.level = ${sys:hive.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
--- a/docker/hoodie/hadoop/hive_base/conf/hive-log4j2.properties
+++ b/docker/hoodie/hadoop/hive_base/conf/hive-log4j2.properties
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = HiveLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.hive.log.level = INFO
+property.hive.root.logger = DRFA
+property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name}
+property.hive.log.file = hive.log
+
+# list of all appenders
+appenders = console, DRFA
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n
+
+# daily rolling file appender
+appender.DRFA.type = RollingFile
+appender.DRFA.name = DRFA
+appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file}
+# Use %pid in the filePattern to append <process-id>@<host-name> to the filename if you want separate log files for different CLI session
+appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd}
+appender.DRFA.layout.type = PatternLayout
+appender.DRFA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
+appender.DRFA.policies.type = Policies
+appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy
+appender.DRFA.policies.time.interval = 1
+appender.DRFA.policies.time.modulate = true
+appender.DRFA.strategy.type = DefaultRolloverStrategy
+appender.DRFA.strategy.max = 30
+
+# list of all loggers
+loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX
+
+logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
+logger.NIOServerCnxn.level = WARN
+
+logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
+logger.ClientCnxnSocketNIO.level = WARN
+
+logger.DataNucleus.name = DataNucleus
+logger.DataNucleus.level = ERROR
+
+logger.Datastore.name = Datastore
+logger.Datastore.level = ERROR
+
+logger.JPOX.name = JPOX
+logger.JPOX.level = ERROR
+
+# root logger
+rootLogger.level = ${sys:hive.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:hive.root.logger}
--- a/docker/hoodie/hadoop/hive_base/conf/hive-site.xml
+++ b/docker/hoodie/hadoop/hive_base/conf/hive-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+--><configuration>
+</configuration>
--- a/docker/hoodie/hadoop/hive_base/conf/ivysettings.xml
+++ b/docker/hoodie/hadoop/hive_base/conf/ivysettings.xml
@@ -0,0 +1,45 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   -->
+
+<!--This file is used by grapes to download dependencies from a maven repository.
+    This is just a template and can be edited to add more repositories.
+-->
+
+<ivysettings>
+  <!--name of the defaultResolver should always be 'downloadGrapes'. -->
+  <settings defaultResolver="downloadGrapes"/>
+  <!-- Only set maven.local.repository if not already set -->
+  <property name="maven.local.repository" value="${user.home}/.m2/repository" override="false" />
+  <property name="m2-pattern"
+            value="file:${maven.local.repository}/[organisation]/[module]/[revision]/[module]-[revision](-[classifier]).[ext]"
+            override="false"/>
+  <resolvers>
+    <!-- more resolvers can be added here -->
+    <chain name="downloadGrapes">
+      <!-- This resolver uses ibiblio to find artifacts, compatible with maven2 repository -->
+      <ibiblio name="central" m2compatible="true"/>
+      <url name="local-maven2" m2compatible="true">
+        <artifact pattern="${m2-pattern}"/>
+      </url>
+      <!-- File resolver to add jars from the local system. -->
+      <filesystem name="test" checkmodified="true">
+        <artifact pattern="/tmp/[module]-[revision](-[classifier]).jar"/>
+      </filesystem>
+
+    </chain>
+  </resolvers>
+</ivysettings>
--- a/docker/hoodie/hadoop/hive_base/conf/llap-daemon-log4j2.properties
+++ b/docker/hoodie/hadoop/hive_base/conf/llap-daemon-log4j2.properties
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+status = INFO
+name = LlapDaemonLog4j2
+packages = org.apache.hadoop.hive.ql.log
+
+# list of properties
+property.llap.daemon.log.level = INFO
+property.llap.daemon.root.logger = console
+property.llap.daemon.log.dir = .
+property.llap.daemon.log.file = llapdaemon.log
+property.llap.daemon.historylog.file = llapdaemon_history.log
+property.llap.daemon.log.maxfilesize = 256MB
+property.llap.daemon.log.maxbackupindex = 20
+
+# list of all appenders
+appenders = console, RFA, HISTORYAPPENDER
+
+# console appender
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t%x] %p %c{2} : %m%n
+
+# rolling file appender
+appender.RFA.type = RollingFile
+appender.RFA.name = RFA
+appender.RFA.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file}
+appender.RFA.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file}_%i
+appender.RFA.layout.type = PatternLayout
+appender.RFA.layout.pattern = %d{ISO8601} %-5p [%t%x]: %c{2} (%F:%M(%L)) - %m%n
+appender.RFA.policies.type = Policies
+appender.RFA.policies.size.type = SizeBasedTriggeringPolicy
+appender.RFA.policies.size.size = ${sys:llap.daemon.log.maxfilesize}
+appender.RFA.strategy.type = DefaultRolloverStrategy
+appender.RFA.strategy.max = ${sys:llap.daemon.log.maxbackupindex}
+
+# history file appender
+appender.HISTORYAPPENDER.type = RollingFile
+appender.HISTORYAPPENDER.name = HISTORYAPPENDER
+appender.HISTORYAPPENDER.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file}
+appender.HISTORYAPPENDER.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file}_%i
+appender.HISTORYAPPENDER.layout.type = PatternLayout
+appender.HISTORYAPPENDER.layout.pattern = %m%n
+appender.HISTORYAPPENDER.policies.type = Policies
+appender.HISTORYAPPENDER.policies.size.type = SizeBasedTriggeringPolicy
+appender.HISTORYAPPENDER.policies.size.size = ${sys:llap.daemon.log.maxfilesize}
+appender.HISTORYAPPENDER.strategy.type = DefaultRolloverStrategy
+appender.HISTORYAPPENDER.strategy.max = ${sys:llap.daemon.log.maxbackupindex}
+
+# list of all loggers
+loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, HistoryLogger
+
+logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn
+logger.NIOServerCnxn.level = WARN
+
+logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO
+logger.ClientCnxnSocketNIO.level = WARN
+
+logger.DataNucleus.name = DataNucleus
+logger.DataNucleus.level = ERROR
+
+logger.Datastore.name = Datastore
+logger.Datastore.level = ERROR
+
+logger.JPOX.name = JPOX
+logger.JPOX.level = ERROR
+
+logger.HistoryLogger.name = org.apache.hadoop.hive.llap.daemon.HistoryLogger
+logger.HistoryLogger.level = INFO
+logger.HistoryLogger.additivity = false
+logger.HistoryLogger.appenderRefs = HistoryAppender
+logger.HistoryLogger.appenderRef.HistoryAppender.ref = HISTORYAPPENDER
+
+# root logger
+rootLogger.level = ${sys:llap.daemon.log.level}
+rootLogger.appenderRefs = root
+rootLogger.appenderRef.root.ref = ${sys:llap.daemon.root.logger}
--- a/docker/hoodie/hadoop/hive_base/entrypoint.sh
+++ b/docker/hoodie/hadoop/hive_base/entrypoint.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# Set some sensible defaults
+export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}
+
+function addProperty() {
+  local path=$1
+  local name=$2
+  local value=$3
+
+  local entry="<property><name>$name</name><value>${value}</value></property>"
+  local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
+  sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
+}
+
+function configure() {
+    local path=$1
+    local module=$2
+    local envPrefix=$3
+
+    local var
+    local value
+    
+    echo "Configuring $module"
+    for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do 
+        name=`echo ${c} | perl -pe 's/___/-/g; s/__/_/g; s/_/./g'`
+        var="${envPrefix}_${c}"
+        value=${!var}
+        echo " - Setting $name=$value"
+        addProperty $path $name "$value"
+    done
+}
+
+configure /etc/hadoop/core-site.xml core CORE_CONF
+configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
+configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
+configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
+configure /etc/hadoop/kms-site.xml kms KMS_CONF
+configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF
+configure /etc/hadoop/hive-site.xml hive HIVE_SITE_CONF
+
+if [ "$MULTIHOMED_NETWORK" = "1" ]; then
+    echo "Configuring for multihomed network"
+
+    # HDFS
+    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
+    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
+    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
+    addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
+    addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
+    addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true
+
+    # YARN
+    addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
+    addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
+    addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
+    addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0
+
+    # MAPRED
+    addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
+fi
+
+if [ -n "$GANGLIA_HOST" ]; then
+    mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
+    mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig
+
+    for module in mapred jvm rpc ugi; do
+        echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
+        echo "$module.period=10"
+        echo "$module.servers=$GANGLIA_HOST:8649"
+    done > /etc/hadoop/hadoop-metrics.properties
+    
+    for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
+        echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
+        echo "$module.sink.ganglia.period=10"
+        echo "$module.sink.ganglia.supportsparse=true"
+        echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
+        echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
+        echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
+    done > /etc/hadoop/hadoop-metrics2.properties
+fi
+
+function wait_for_it()
+{
+    local serviceport=$1
+    local service=${serviceport%%:*}
+    local port=${serviceport#*:}
+    local retry_seconds=5
+    local max_try=100
+    let i=1
+
+    nc -z $service $port
+    result=$?
+
+    until [ $result -eq 0 ]; do
+      echo "[$i/$max_try] check for ${service}:${port}..."
+      echo "[$i/$max_try] ${service}:${port} is not available yet"
+      if (( $i == $max_try )); then
+        echo "[$i/$max_try] ${service}:${port} is still not available; giving up after ${max_try} tries. :/"
+        exit 1
+      fi
+      
+      echo "[$i/$max_try] try in ${retry_seconds}s once again ..."
+      let "i++"
+      sleep $retry_seconds
+
+      nc -z $service $port
+      result=$?
+    done
+    echo "[$i/$max_try] $service:${port} is available."
+}
+
+for i in ${SERVICE_PRECONDITION[@]}
+do
+    wait_for_it ${i}
+done
+
+exec $@
--- a/docker/hoodie/hadoop/hive_base/pom.xml
+++ b/docker/hoodie/hadoop/hive_base/pom.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~          http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hoodie-hadoop-docker</artifactId>
+    <groupId>com.uber.hoodie</groupId>
+    <version>0.4.5-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+  <packaging>pom</packaging>
+  <artifactId>hoodie-hadoop-hive-docker</artifactId>
+
+  <description>Base Docker Image with Hoodie</description>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <checkstyle.skip>true</checkstyle.skip>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-hadoop-base-docker</artifactId>
+      <version>${project.version}</version>
+      <type>pom</type>
+      <scope>import</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <version>1.7</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <configuration>
+              <tasks>
+                <copy file="${project.basedir}/../../../../packaging/hoodie-hadoop-mr-bundle/target/hoodie-hadoop-mr-bundle-${project.version}.jar"
+                      tofile="target/hoodie-hadoop-mr-bundle.jar" />
+                <copy file="${project.basedir}/../../../../packaging/hoodie-hive-bundle/target/hoodie-hive-bundle-${project.version}.jar"
+                      tofile="target/hoodie-hive-bundle.jar" />
+                <copy file="${project.basedir}/../../../../packaging/hoodie-spark-bundle/target/hoodie-spark-bundle-${project.version}.jar"
+                      tofile="target/hoodie-spark-bundle.jar" />
+                <copy file="${project.basedir}/../../../../hoodie-utilities/target/hoodie-utilities-${project.version}.jar"
+                      tofile="target/hoodie-utilities.jar" />
+              </tasks>
+            </configuration>
+            <goals>
+              <goal>run</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Build Docker image -->
+      <plugin>
+        <groupId>com.spotify</groupId>
+        <artifactId>dockerfile-maven-plugin</artifactId>
+        <version>${dockerfile.maven.version}</version>
+        <executions>
+          <execution>
+            <id>tag-latest</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>build</goal>
+              <goal>tag</goal>
+              <!-- <goal>push</goal> -->
+            </goals>
+            <configuration>
+              <skip>${docker.build.skip}</skip>
+              <pullNewerImage>false</pullNewerImage>
+              <repository>varadarb/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}</repository>
+              <forceTags>true</forceTags>
+              <tag>latest</tag>
+            </configuration>
+          </execution>
+          <execution>
+            <id>tag-version</id>
+            <phase>pre-integration-test</phase>
+            <goals>
+              <goal>build</goal>
+              <goal>tag</goal>
+              <!-- <goal>push</goal> -->
+            </goals>
+            <configuration>
+              <skip>${docker.build.skip}</skip>
+              <pullNewerImage>false</pullNewerImage>
+              <repository>varadarb/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}</repository>
+              <forceTags>true</forceTags>
+              <tag>${project.version}</tag>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
--- a/docker/hoodie/hadoop/hive_base/startup.sh
+++ b/docker/hoodie/hadoop/hive_base/startup.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+hadoop fs -mkdir       /tmp
+hadoop fs -mkdir -p    /user/hive/warehouse
+hadoop fs -chmod g+w   /tmp
+hadoop fs -chmod g+w   /user/hive/warehouse
+
+cd $HIVE_HOME/bin
+export AUX_CLASSPATH=file://${HUDI_HADOOP_BUNDLE}
+./hiveserver2 --hiveconf hive.server2.enable.doAs=false  --hiveconf hive.aux.jars.path=file://${HUDI_HADOOP_BUNDLE}