diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark231.yml b/docker/compose/docker-compose_hadoop284_hive233_spark231.yml
index bbb9f10e9..2f539aefc 100644
--- a/docker/compose/docker-compose_hadoop284_hive233_spark231.yml
+++ b/docker/compose/docker-compose_hadoop284_hive233_spark231.yml
@@ -170,6 +170,42 @@ services:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- ALLOW_PLAINTEXT_LISTENER=yes
+ presto-coordinator-1:
+ container_name: presto-coordinator-1
+ hostname: presto-coordinator-1
+ image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.217:latest
+ ports:
+ - '8090:8090'
+ environment:
+ - PRESTO_JVM_MAX_HEAP=512M
+ - PRESTO_QUERY_MAX_MEMORY=1GB
+ - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
+ - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
+ - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
+ - TERM=xterm
+ links:
+ - "hivemetastore"
+ command: coordinator
+
+ presto-worker-1:
+ container_name: presto-worker-1
+ hostname: presto-worker-1
+ image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.217:latest
+ depends_on: ["presto-coordinator-1"]
+ environment:
+ - PRESTO_JVM_MAX_HEAP=512M
+ - PRESTO_QUERY_MAX_MEMORY=1GB
+ - PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
+ - PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
+ - PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
+ - TERM=xterm
+ links:
+ - "hivemetastore"
+ - "hiveserver"
+ - "hive-metastore-postgresql"
+ - "namenode"
+ command: worker
+
adhoc-1:
image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.3.1:latest
hostname: adhoc-1
@@ -187,6 +223,7 @@ services:
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
+ - "presto-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
@@ -205,6 +242,7 @@ services:
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
+ - "presto-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml
index 7191fd407..f4e6de43a 100644
--- a/docker/hoodie/hadoop/pom.xml
+++ b/docker/hoodie/hadoop/pom.xml
@@ -36,6 +36,7 @@
sparkmaster
sparkworker
sparkadhoc
+ prestobase
@@ -52,6 +53,7 @@
2.3.1
2.3.3
2.8.4
+ 0.217
1.4.3
true
diff --git a/docker/hoodie/hadoop/prestobase/Dockerfile b/docker/hoodie/hadoop/prestobase/Dockerfile
new file mode 100644
index 000000000..e8cbfa2b4
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/Dockerfile
@@ -0,0 +1,69 @@
+## Presto docker setup is based on https://github.com/smizy/docker-presto
+
+ARG HADOOP_VERSION=2.8.4
+ARG HIVE_VERSION=2.3.3
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest as hadoop-base
+
+ARG PRESTO_VERSION=0.217
+
+ENV PRESTO_VERSION ${PRESTO_VERSION}
+ENV PRESTO_HOME /opt/presto-server-${PRESTO_VERSION}
+ENV PRESTO_CONF_DIR ${PRESTO_HOME}/etc
+ENV PRESTO_LOG_DIR /var/log/presto
+ENV PRESTO_JVM_MAX_HEAP 2G
+ENV PRESTO_QUERY_MAX_MEMORY 1GB
+ENV PRESTO_QUERY_MAX_MEMORY_PER_NODE 512MB
+ENV PRESTO_DISCOVERY_URI http://presto-coordinator-1:8090
+ENV PATH $PATH:${PRESTO_HOME}/bin
+
+RUN set -x \
+ && DEBIAN_FRONTEND=noninteractive apt-get -yq update \
+ && apt-get -yq install \
+ bash \
+ less \
+ python \
+ tar \
+ wget \
+ ## - hadoop native dependency lib
+ bzip2 \
+ fts \
+ fuse \
+ libtirpc1 \
+ libsnappy1v5 \
+ zip \
+ cron \
+ gosu \
+ && rm -rf /var/lib/apt/lists/* \
+ ## presto-server
+ && wget -q -O - https://repo1.maven.org/maven2/com/facebook/presto/presto-server/${PRESTO_VERSION}/presto-server-${PRESTO_VERSION}.tar.gz \
+ | tar -xzf - -C /opt/ \
+ && mkdir -p /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/ \
+ ## presto-client
+ && wget -q -O /usr/local/bin/presto https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \
+ && chmod +x /usr/local/bin/presto \
+ ## user/dir/permmsion
+ && adduser --shell /sbin/nologin --uid 1000 docker \
+ && adduser --shell /sbin/nologin presto \
+ && mkdir -p \
+ ${PRESTO_CONF_DIR} \
+ ${PRESTO_LOG_DIR} \
+ && chmod -R 777 \
+ ${PRESTO_HOME} \
+ ${PRESTO_LOG_DIR} \
+ ## cleanup
+ && rm -rf /tmp/nativelib
+
+COPY etc/ ${PRESTO_CONF_DIR}/
+COPY bin/* /usr/local/bin/
+COPY lib/* /usr/local/lib/
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+ADD target/ /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/
+ENV HUDI_PRESTO_BUNDLE /var/hoodie/ws/docker/hoodie/hadoop/prestobase/target/hudi-presto-bundle.jar
+RUN cp ${HUDI_PRESTO_BUNDLE} ${PRESTO_HOME}/plugin/hive-hadoop2/
+
+VOLUME ["${PRESTO_LOG_DIR}"]
+
+WORKDIR ${PRESTO_HOME}
+
+ENTRYPOINT ["entrypoint.sh"]
diff --git a/docker/hoodie/hadoop/prestobase/bin/entrypoint.sh b/docker/hoodie/hadoop/prestobase/bin/entrypoint.sh
new file mode 100755
index 000000000..e3d917698
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/bin/entrypoint.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -eo pipefail
+
+wait_until() {
+ local hostname=${1?}
+ local port=${2?}
+ local retry=${3:-100}
+ local sleep_secs=${4:-2}
+
+ local address_up=0
+
+ while [ ${retry} -gt 0 ] ; do
+ echo "Waiting until ${hostname}:${port} is up ... with retry count: ${retry}"
+ if nc -z ${hostname} ${port}; then
+ address_up=1
+ break
+ fi
+ retry=$((retry-1))
+ sleep ${sleep_secs}
+ done
+
+ if [ $address_up -eq 0 ]; then
+ echo "GIVE UP waiting until ${hostname}:${port} is up! "
+ exit 1
+ fi
+}
+
+if [ ! -e ${PRESTO_LOG_DIR}/node.id ]; then
+ cat /proc/sys/kernel/random/uuid > ${PRESTO_LOG_DIR}/node.id
+fi
+
+export PRESTO_NODE_ID=$(cat ${PRESTO_LOG_DIR}/node.id)
+
+# apply template
+for template in $(ls ${PRESTO_CONF_DIR}/*.mustache)
+do
+ conf_file=${template%.mustache}
+ cat ${conf_file}.mustache | mustache.sh > ${conf_file}
+done
+
+case "$1" in
+ "coordinator" | "worker" )
+ server_role="$1"
+ shift
+ exec gosu presto launcher --config=${PRESTO_CONF_DIR}/${server_role}.properties "$@" run
+ ;;
+ *)
+ ;;
+esac
+
+exec "$@"
diff --git a/docker/hoodie/hadoop/prestobase/bin/mustache.sh b/docker/hoodie/hadoop/prestobase/bin/mustache.sh
new file mode 100755
index 000000000..b626b2ca3
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/bin/mustache.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# `mustache.sh`, Mustache in POSIX shell.
+
+set -e
+
+# Load the `mustache` function and its friends. These are assumed to be
+# in the `lib` directory in the same tree as this `bin` directory.
+. "$(dirname "$(dirname "$0")")/lib/mustache.sh"
+
+# Call `mustache` to make this behave somewhat like `mustache`(1).
+# Because it doesn't accept the `--compile` or `--tokens` command-line
+# options and does not accept input file(s) as arguments, this program
+# is called `mustache.sh`(1), not `mustache`(1).
+mustache
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/catalog/hive.properties b/docker/hoodie/hadoop/prestobase/etc/catalog/hive.properties
new file mode 100644
index 000000000..c4c097ca9
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/catalog/hive.properties
@@ -0,0 +1,18 @@
+connector.name=hive-hadoop2
+hive.metastore-cache-ttl=1s
+hive.metastore-refresh-interval=1m
+hive.metastore-timeout=20s
+hive.metastore.uri=thrift://hivemetastore:9083
+hive.storage-format=PARQUET
+hive.parquet.use-column-names=true
+hive.max-split-size=128MB
+hive.assume-canonical-partition-keys=true
+hive.recursive-directories=true
+hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml
+hive.hdfs.authentication.type=NONE
+hive.hdfs.impersonation.enabled=false
+hive.bucket-execution=false
+hive.table-statistics-enabled=true
+hive.max-partitions-per-writers=3000
+hive.split-loader-concurrency=1
+hive.orc.bloom-filters.enabled=true
diff --git a/docker/hoodie/hadoop/prestobase/etc/catalog/jmx.properties b/docker/hoodie/hadoop/prestobase/etc/catalog/jmx.properties
new file mode 100644
index 000000000..6deb9117e
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/catalog/jmx.properties
@@ -0,0 +1,4 @@
+connector.name=jmx
+jmx.dump-tables=java.lang:type=Runtime,com.facebook.presto.execution.scheduler:name=NodeScheduler
+jmx.dump-period=10s
+jmx.max-entries=86400
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/catalog/localfile.properties b/docker/hoodie/hadoop/prestobase/etc/catalog/localfile.properties
new file mode 100644
index 000000000..fd94f6df5
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/catalog/localfile.properties
@@ -0,0 +1,3 @@
+connector.name=localfile
+presto-logs.http-request-log.location=/var/log/presto
+presto-logs.http-request-log.pattern=http-request.*
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/coordinator.properties.mustache b/docker/hoodie/hadoop/prestobase/etc/coordinator.properties.mustache
new file mode 100644
index 000000000..fa8d5fa58
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/coordinator.properties.mustache
@@ -0,0 +1,6 @@
+coordinator=true
+node-scheduler.include-coordinator=false
+http-server.http.port=8090
+query.max-memory={{PRESTO_QUERY_MAX_MEMORY}}
+discovery-server.enabled=true
+discovery.uri={{PRESTO_DISCOVERY_URI}}
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/jvm.config.mustache b/docker/hoodie/hadoop/prestobase/etc/jvm.config.mustache
new file mode 100644
index 000000000..256196322
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/jvm.config.mustache
@@ -0,0 +1,9 @@
+-server
+-Xmx{{PRESTO_JVM_MAX_HEAP}}
+-XX:+UseG1GC
+-XX:G1HeapRegionSize=32M
+-XX:+UseGCOverheadLimit
+-XX:+ExplicitGCInvokesConcurrent
+-XX:+HeapDumpOnOutOfMemoryError
+-XX:OnOutOfMemoryError=kill -9 %p
+-DHADOOP_USER_NAME=hive
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/log.properties b/docker/hoodie/hadoop/prestobase/etc/log.properties
new file mode 100644
index 000000000..3abc29ce3
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/log.properties
@@ -0,0 +1 @@
+com.facebook.presto=INFO
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/node.properties.mustache b/docker/hoodie/hadoop/prestobase/etc/node.properties.mustache
new file mode 100644
index 000000000..fa07f76e0
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/node.properties.mustache
@@ -0,0 +1,3 @@
+node.environment=production
+node.id={{PRESTO_NODE_ID}}
+node.data-dir={{PRESTO_LOG_DIR}}
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/etc/worker.properties.mustache b/docker/hoodie/hadoop/prestobase/etc/worker.properties.mustache
new file mode 100644
index 000000000..7af1af690
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/etc/worker.properties.mustache
@@ -0,0 +1,4 @@
+coordinator=false
+http-server.http.port=8090
+query.max-memory={{PRESTO_QUERY_MAX_MEMORY}}
+discovery.uri={{PRESTO_DISCOVERY_URI}}
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/lib/mustache.sh b/docker/hoodie/hadoop/prestobase/lib/mustache.sh
new file mode 100644
index 000000000..9eea306ff
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/lib/mustache.sh
@@ -0,0 +1,252 @@
+# `mustache.sh`, Mustache in POSIX shell.
+
+set -e
+
+# File descriptor 3 is commandeered for debug output, which may end up being
+# forwarded to standard error.
+[ -z "$MUSTACHE_DEBUG" ] && exec 3>/dev/null || exec 3>&2
+
+# File descriptor 4 is commandeered for use as a sink for literal and
+# variable output of (inverted) sections that are not destined for standard
+# output because their condition is not met.
+exec 4>/dev/null
+
+# File descriptor 5 is commandeered for capturing input for list processing.
+exec 5>/dev/null
+
+# Consume standard input one character at a time to render `mustache`(5)
+# templates with data from the environment.
+mustache() {
+
+ # Initialize the file descriptor to be used to emit characters. At
+ # times this value will be 4 to send output to `/dev/null`.
+ _M_FD=1
+
+ # IFS must only contain '\n' so as to be able to read space and tab
+ # characters from standard input one-at-a-time. The easiest way to
+ # convince it to actually contain the correct byte, and only the
+ # correct byte, is to use a single-quoted literal newline.
+ IFS='
+'
+
+ # Consuming standard input one character at a time is quite a feat
+ # within the confines of POSIX shell. Bash's `read` builtin has
+ # `-n` for limiting the number of characters consumed. Here it is
+ # faked using `sed`(1) to place each character on its own line.
+ # The subtlety is that real newline characters are chomped so they
+ # must be indirectly detected by checking for zero-length
+ # characters, which is done as the character is emitted.
+ _mustache_sed | _mustache
+ # TODO Replace the original value of IFS. Be careful if it's unset.
+
+}
+
+# Process the one-character-per-line stream from `sed` via a state machine.
+# This function will be called recursively in subshell environments to
+# isolate nested section tags from the outside environment.
+_mustache() {
+
+ # Always start by assuming a character is a literal.
+ _M_STATE="literal"
+
+ # The `read` builtin consumes one line at a time but by now each line
+ # contains only a single character.
+ while read _M_C
+ do
+ echo " _M_C: $_M_C (${#_M_C}), _M_STATE: $_M_STATE" >&3
+ echo "$_M_C" >&5
+ case "$_M_STATE" in
+
+ # Consume a single character literal. In the event this
+ # character and the previous character have been opening
+ # braces, progress to the "tag" state and initialize the
+ # tag name to the empty string (this invariant is relied
+ # on by the "tag" state). If this is the first opening
+ # brace, wait and see. Otherwise, emit this character.
+ "literal")
+ if [ -z "$_M_PREV_C" ]
+ then
+ case "$_M_C" in
+ "{") ;;
+ "") echo;;
+ *) printf "%s" "$_M_C";;
+ esac
+ else
+ case "$_M_PREV_C$_M_C" in
+ "{{") _M_STATE="tag" _M_TAG="";;
+ ?"{") ;;
+ *)
+ [ "$_M_PREV_C" = "{" ] && printf "%s" "{"
+ [ -z "$_M_C" ] && echo || printf "%s" "$_M_C";;
+ esac
+ fi >&$_M_FD;;
+
+ # Consume the tag type and tag.
+ "tag")
+ case "$_M_PREV_C$_M_C" in
+
+ # A third opening brace in a row could be treated as
+ # a literal and the beginning of tag, as it is here,
+ # or as the beginning of a tag which begins with an
+ # opening brace.
+ "{{") printf "{" >&$_M_FD;;
+
+ # Note the type of this tag, defaulting to "variable".
+ "{#"|"{^"|"{/"|"{!"|"{>") _M_TAG_TYPE="$_M_C" _M_TAG="";;
+
+ # A variable tag must note the first character of the
+ # variable name. Since it's possible that an opening
+ # brace comes in the middle of the tag, check that
+ # this is indeed the beginning of the tag.
+ "{"?)
+ if [ -z "$_M_TAG" ]
+ then
+ _M_TAG_TYPE="variable" _M_TAG="$_M_C"
+ fi;;
+
+ # Two closing braces in a row closes the tag. The
+ # state resets to "literal" and the tag is processed,
+ # possibly in a subshell.
+ "}}")
+ _M_STATE="literal"
+ _mustache_tag;;
+
+ # A single closing brace is ignored at first.
+ ?"}") ;;
+
+ # If the variable continues, the closing brace becomes
+ # part of the variable name.
+ "}"?) _M_TAG="$_M_TAG}";;
+
+ # Any other character becomes part of the variable name.
+ *) _M_TAG="$_M_TAG$_M_C";;
+
+ esac;;
+
+ esac
+
+ # This character becomes the previous character.
+ _M_PREV_C="$_M_C"
+
+ done
+
+}
+
+# Paper over different versions of cat.
+_mustache_cat() {
+ set +e
+ cat -A <"/dev/null" >"/dev/null" 2>&1
+ _M_STATUS="$?"
+ set -e
+ if [ "$_M_STATUS" -eq 1 ]
+ then cat -e
+ else cat -A
+ fi
+}
+
+# Execute a tag surrounded by backticks. Remove the backticks first.
+_mustache_cmd() {
+ _M_CMD="$*"
+ _M_CMD="${_M_CMD#"\`"}"
+ _M_CMD="${_M_CMD%"\`"}"
+ sh -c "$_M_CMD"
+}
+
+# Print an error message and GTFO. The message is the concatenation
+# of all the arguments to this function.
+_mustache_die() {
+ echo "mustache.sh: $*" >&2
+ exit 1
+}
+
+# Paper over differences between GNU sed and BSD sed
+_mustache_sed() {
+ _M_NEWLINE="
+"
+ set +e
+ sed -r <"/dev/null" >"/dev/null" 2>&1
+ _M_STATUS="$?"
+ set -e
+ if [ "$_M_STATUS" -eq 1 ]
+ then sed -E "s/./&\\$_M_NEWLINE/g; s/\\\\/\\\\\\\\/g"
+ else sed -r "s/./&\\n/g; s/\\\\/\\\\\\\\/g"
+ fi
+}
+
+# Process a complete tag. Variables are emitted, sections are recursed
+# into, comments are ignored, and (for now) partials raise an error.
+_mustache_tag() {
+ case "$_M_TAG_TYPE" in
+
+ # Variable tags expand to the value of an environment variable
+ # or the empty string if the environment variable is unset.
+ #
+ # If the tag is surrounded by backticks, execute it as a shell
+ # command, instead, using standard output as its value.
+ #
+ # Since the variable tag has been completely consumed, return
+ # to the assumption that everything's a literal until proven
+ # otherwise for this character.
+ "variable")
+ case "$_M_TAG" in
+ "\`"*"\`") _mustache_cmd "$_M_TAG";;
+ *) eval printf "%s" "\"\$$_M_TAG\"";;
+ esac >&$_M_FD;;
+
+ # Section tags expand to the expanded value of the section's
+ # literals and tags if and only if the section tag is in the
+ # environment and non-empty. Inverted section tags expand
+ # if the section tag is empty or unset in the environment.
+ #
+ # If the tag is surrounded by backticks, execute it as a shell
+ # command, instead, and process the section once for each line
+ # of standard output (made available as `_M_LINE`).
+ #
+ # Sections not being expanded are redirected to `/dev/null`.
+ "#"|"^")
+ echo " # _M_TAG: $_M_TAG" >&3
+ _M_TAG_V="$(eval printf "%s" "\"\$$_M_TAG\"")"
+ case "$_M_TAG_TYPE" in
+ "#") [ -z "$_M_TAG_V" ] && _M_FD=4;;
+ "^") [ -n "$_M_TAG_V" ] && _M_FD=4;;
+ esac
+ case "$_M_TAG" in
+ "\`"*"\`")
+ _M_CAPTURE="$(_M_SECTION_TAG="$_M_TAG" _mustache 5>&1 >&4)"
+ echo " _M_CAPTURE: $_M_CAPTURE" | _mustache_cat >&3
+ _mustache_cmd "$_M_TAG" | while read _M_LINE
+ do
+ echo " _M_LINE: $_M_LINE" >&3
+ (
+ _M_SECTION_TAG="$_M_TAG"
+ echo "$_M_CAPTURE" | _mustache
+ )
+ done;;
+ *)
+ (
+ _M_SECTION_TAG="$_M_TAG"
+ _mustache
+ );;
+ esac
+ _M_FD=1;;
+
+ # Closing tags for (inverted) sections must match the expected
+ # tag name. Any redirections made when the (inverted) section
+ # opened are reset when the section closes.
+ "/")
+ echo " / _M_TAG: $_M_TAG, _M_SECTION_TAG: $_M_SECTION_TAG" >&3
+ if [ "$_M_TAG" != "$_M_SECTION_TAG" ]
+ then
+ _mustache_die "mismatched closing tag $_M_TAG," \
+ "expected $_M_SECTION_TAG"
+ fi
+ exit;;
+
+ # Comments do nothing.
+ "!") ;;
+
+ # TODO Partials.
+ ">") _mustache_die "{{>$_M_TAG}} syntax not implemented";;
+
+ esac
+}
\ No newline at end of file
diff --git a/docker/hoodie/hadoop/prestobase/pom.xml b/docker/hoodie/hadoop/prestobase/pom.xml
new file mode 100644
index 000000000..f4d99c6bf
--- /dev/null
+++ b/docker/hoodie/hadoop/prestobase/pom.xml
@@ -0,0 +1,106 @@
+
+
+
+
+
+ hudi-hadoop-docker
+ org.apache.hudi
+ 0.5.0-SNAPSHOT
+
+ 4.0.0
+ pom
+ hudi-hadoop-presto-docker
+ Base Docker Image with Hoodie
+
+
+ UTF-8
+ true
+
+
+
+
+ org.apache.hudi
+ hudi-hadoop-base-docker
+ ${project.version}
+ pom
+ import
+
+
+
+
+
+
+ maven-antrun-plugin
+ 1.7
+
+
+ package
+
+
+
+
+
+
+ run
+
+
+
+
+
+
+ com.spotify
+ dockerfile-maven-plugin
+ ${dockerfile.maven.version}
+
+
+ tag-latest
+ pre-integration-test
+
+ build
+ tag
+
+
+ ${docker.build.skip}
+ false
+ apachehudi/hudi-hadoop_${docker.hadoop.version}-prestobase_${docker.presto.version}
+ true
+ latest
+
+
+
+ tag-version
+ pre-integration-test
+
+ build
+ tag
+
+
+
+ ${docker.build.skip}
+ false
+ apachehudi/hudi-hadoop_${docker.hadoop.version}-prestobase_${docker.presto.version}
+ true
+ ${project.version}
+
+
+
+
+
+
+
diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile
index 25a350427..e0f2113b9 100644
--- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile
+++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile
@@ -21,10 +21,16 @@ ARG HIVE_VERSION=2.3.3
ARG SPARK_VERSION=2.3.1
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}
+ARG PRESTO_VERSION=0.217
COPY adhoc.sh /opt/spark
ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark-master:7077"
+ENV PRESTO_VERSION ${PRESTO_VERSION}
+RUN set -x \
+ ## presto-client
+ && wget -q -O /usr/local/bin/presto https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \
+ && chmod +x /usr/local/bin/presto
CMD ["/bin/bash", "/opt/spark/adhoc.sh"]
diff --git a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh
index c5a5a938a..20dd85153 100644
--- a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh
+++ b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh
@@ -24,8 +24,10 @@
export SPARK_HOME=/opt/spark
+export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1"
date
echo "SPARK HOME is : $SPARK_HOME"
+echo "PRESTO CLI CMD is : $PRESTO_CLI_CMD"
tail -f /dev/null