ARG HADOOP_VERSION=2.8.4 ARG HIVE_VERSION=2.3.3 FROM varadarb/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION} ENV ENABLE_INIT_DAEMON true ENV INIT_DAEMON_BASE_URI http://identifier/init-daemon ENV INIT_DAEMON_STEP spark_master_init ARG SPARK_VERSION=2.3.1 ARG SPARK_HADOOP_VERSION=2.7 ENV SPARK_VERSION ${SPARK_VERSION} ENV HADOOP_VERSION ${SPARK_HADOOP_VERSION} COPY wait-for-step.sh / COPY execute-step.sh / COPY finish-step.sh / RUN echo "Installing Spark-version (${SPARK_VERSION})" \ && wget http://apache.mirror.iphh.net/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} /opt/spark \ && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ && cd / #Give permission to execute scripts RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh # Fix the value of PYTHONHASHSEED # Note: this is needed when you use Python 3.3 or greater ENV PYTHONHASHSEED 1 ENV SPARK_HOME /opt/spark ENV SPARK_INSTALL ${SPARK_HOME} ENV SPARK_CONF_DIR ${SPARK_HOME}/conf ENV PATH $SPARK_INSTALL/bin:$PATH ENV SPARK_DRIVER_PORT 5001 ENV SPARK_UI_PORT 5002 ENV SPARK_BLOCKMGR_PORT 5003 EXPOSE $SPARK_DRIVER_PORT $SPARK_UI_PORT $SPARK_BLOCKMGR_PORT # Without this spark-shell fails - Download if it is not already there in $SPARK_INSTALL RUN wget -nc -q -O "${SPARK_INSTALL}/jars/jersey-bundle-1.19.4.jar" "http://repo1.maven.org/maven2/com/sun/jersey/jersey-bundle/1.19.4/jersey-bundle-1.19.4.jar"