From 8cb86b4d36de2ee7f8360b715f9f85c635c71a90 Mon Sep 17 00:00:00 2001 From: Vinoth Govindarajan Date: Sun, 31 May 2020 22:53:50 -0700 Subject: [PATCH] Added python3 to the spark_base docker image to support pyspark (#1632) --- docker/hoodie/hadoop/spark_base/Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/hoodie/hadoop/spark_base/Dockerfile b/docker/hoodie/hadoop/spark_base/Dockerfile index 520afacea..7eeab093a 100644 --- a/docker/hoodie/hadoop/spark_base/Dockerfile +++ b/docker/hoodie/hadoop/spark_base/Dockerfile @@ -40,6 +40,12 @@ RUN echo "Installing Spark-version (${SPARK_VERSION})" \ && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ && cd / +# Install python3 to enable and use pyspark shell +RUN apt-get update \ + && apt-get -yq install python3 \ + && ln -sf $(which python3) /usr/bin/python \ + && rm -rf /var/lib/apt/lists/* + #Give permission to execute scripts RUN chmod +x /wait-for-step.sh && chmod +x /execute-step.sh && chmod +x /finish-step.sh