diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml index 05790963b..933f9eb27 100644 --- a/docker/compose/docker-compose_hadoop284_hive233_spark244.yml +++ b/docker/compose/docker-compose_hadoop284_hive233_spark244.yml @@ -221,6 +221,34 @@ services: - ${HUDI_WS}:/var/hoodie/ws command: worker + trino-coordinator-1: + container_name: trino-coordinator-1 + hostname: trino-coordinator-1 + image: apachehudi/hudi-hadoop_2.8.4-trinocoordinator_368:latest + ports: + - '8091:8091' + links: + - "hivemetastore" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + command: http://trino-coordinator-1:8091 trino-coordinator-1 + + trino-worker-1: + container_name: trino-worker-1 + hostname: trino-worker-1 + image: apachehudi/hudi-hadoop_2.8.4-trinoworker_368:latest + depends_on: [ "trino-coordinator-1" ] + ports: + - '8092:8092' + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + command: http://trino-coordinator-1:8091 trino-worker-1 + graphite: container_name: graphite hostname: graphite @@ -248,6 +276,7 @@ services: - "hive-metastore-postgresql" - "namenode" - "presto-coordinator-1" + - "trino-coordinator-1" volumes: - ${HUDI_WS}:/var/hoodie/ws @@ -267,6 +296,7 @@ services: - "hive-metastore-postgresql" - "namenode" - "presto-coordinator-1" + - "trino-coordinator-1" volumes: - ${HUDI_WS}:/var/hoodie/ws diff --git a/docker/hoodie/hadoop/base_java11/Dockerfile b/docker/hoodie/hadoop/base_java11/Dockerfile new file mode 100644 index 000000000..8052eae6a --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/Dockerfile @@ -0,0 +1,60 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM openjdk:11-jdk-slim-bullseye +MAINTAINER Hoodie +USER root + +# Default to UTF-8 file.encoding +ENV LANG C.UTF-8 + +ARG HADOOP_VERSION=2.8.4 +ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz +ENV HADOOP_VERSION ${HADOOP_VERSION} +ENV HADOOP_URL ${HADOOP_URL} + +RUN set -x \ + && DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \ + && echo "Fetch URL2 is : ${HADOOP_URL}" \ + && curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \ + && curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \ + && mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \ + && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ + && rm /tmp/hadoop.tar.gz* \ + && ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \ + && cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \ + && mkdir /hadoop-data + +ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION +ENV HADOOP_CONF_DIR=/etc/hadoop +ENV MULTIHOMED_NETWORK=1 +ENV HADOOP_HOME=${HADOOP_PREFIX} +ENV HADOOP_INSTALL=${HADOOP_HOME} +ENV USER=root +ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH + +# Exposing a union of ports across hadoop versions +# Well known ports including ssh +EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042 + +ADD entrypoint.sh /entrypoint.sh +ADD export_container_ip.sh /usr/bin/ +RUN chmod a+x /usr/bin/export_container_ip.sh \ + && chmod a+x /entrypoint.sh + +ENTRYPOINT ["/bin/bash", "/entrypoint.sh"] + diff --git a/docker/hoodie/hadoop/base_java11/entrypoint.sh b/docker/hoodie/hadoop/base_java11/entrypoint.sh new file mode 100644 index 000000000..7c26f29f6 --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/entrypoint.sh @@ -0,0 +1,107 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +####################################################################################### +## COPIED FROM ## +## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ## +# ## +####################################################################################### + +# Set some sensible defaults +export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020} + +function addProperty() { + local path=$1 + local name=$2 + local value=$3 + + local entry="$name${value}" + local escapedEntry=$(echo $entry | sed 's/\//\\\//g') + sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path +} + +function configure() { + local path=$1 + local module=$2 + local envPrefix=$3 + + local var + local value + + echo "Configuring $module" + for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do + name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'` + var="${envPrefix}_${c}" + value=${!var} + echo " - Setting $name=$value" + addProperty /etc/hadoop/$module-site.xml $name "$value" + done +} + +configure /etc/hadoop/core-site.xml core CORE_CONF +configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF +configure /etc/hadoop/yarn-site.xml yarn YARN_CONF +configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF +configure /etc/hadoop/kms-site.xml kms KMS_CONF + +if [ "$MULTIHOMED_NETWORK" = "1" ]; then + echo "Configuring for multihomed network" + + # HDFS + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0 + addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true + addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true + + # YARN + addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0 + addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0 + + # MAPRED + addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0 +fi + +if [ -n "$GANGLIA_HOST" ]; then + mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig + mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig + + for module in mapred jvm rpc ugi; do + echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31" + echo "$module.period=10" + echo "$module.servers=$GANGLIA_HOST:8649" + done > /etc/hadoop/hadoop-metrics.properties + + for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do + echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31" + echo "$module.sink.ganglia.period=10" + echo "$module.sink.ganglia.supportsparse=true" + echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both" + echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40" + echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649" + done > /etc/hadoop/hadoop-metrics2.properties +fi + +# Save Container IP in ENV variable +/usr/bin/export_container_ip.sh + +exec "$@" diff --git a/docker/hoodie/hadoop/base_java11/export_container_ip.sh b/docker/hoodie/hadoop/base_java11/export_container_ip.sh new file mode 100755 index 000000000..b427f92cc --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/export_container_ip.sh @@ -0,0 +1,30 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +interfaces=( "en0" "eth0" ) + +ipAddr="" +for interface in "${interfaces[@]}" +do + ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head` + if [ -n "$ipAddr" ]; then + break + fi +done + +echo "Container IP is set to : $ipAddr" +export MY_CONTAINER_IP=$ipAddr diff --git a/docker/hoodie/hadoop/base_java11/pom.xml b/docker/hoodie/hadoop/base_java11/pom.xml new file mode 100644 index 000000000..b5d5a3ec1 --- /dev/null +++ b/docker/hoodie/hadoop/base_java11/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-base-java11-docker + + Base Docker Image with Hoodie + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + + org.apache.hudi + hudi-hadoop-docker + ${project.version} + pom + import + + + + + + + hudi + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11 + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11 + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/pom.xml b/docker/hoodie/hadoop/pom.xml index db47f3924..2d0d75283 100644 --- a/docker/hoodie/hadoop/pom.xml +++ b/docker/hoodie/hadoop/pom.xml @@ -28,6 +28,7 @@ pom base + base_java11 namenode datanode historyserver @@ -37,6 +38,9 @@ sparkworker sparkadhoc prestobase + trinobase + trinocoordinator + trinoworker @@ -54,6 +58,7 @@ 2.3.3 2.8.4 0.217 + 368 1.4.13 true ${project.parent.basedir} diff --git a/docker/hoodie/hadoop/sparkadhoc/Dockerfile b/docker/hoodie/hadoop/sparkadhoc/Dockerfile index a114cf0fb..19af7351b 100644 --- a/docker/hoodie/hadoop/sparkadhoc/Dockerfile +++ b/docker/hoodie/hadoop/sparkadhoc/Dockerfile @@ -21,15 +21,36 @@ ARG SPARK_VERSION=2.4.4 FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION} ARG PRESTO_VERSION=0.217 +ARG TRINO_VERSION=368 COPY adhoc.sh /opt/spark ENV SPARK_WORKER_WEBUI_PORT 8081 ENV SPARK_WORKER_LOG /spark/logs ENV SPARK_MASTER "spark://spark-master:7077" ENV PRESTO_VERSION ${PRESTO_VERSION} +ENV TRINO_VERSION ${TRINO_VERSION} +ENV BASE_URL=https://repo1.maven.org/maven2 + +RUN apt-get update +RUN apt-get install -y \ + curl \ + tar \ + sudo \ + rsync \ + python \ + wget \ + python3-pip \ + python-dev \ + build-essential \ + uuid-runtime \ + less RUN set -x \ ## presto-client - && wget -q -O /usr/local/bin/presto https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \ + && wget -q -O /usr/local/bin/presto ${BASE_URL}/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \ && chmod +x /usr/local/bin/presto +RUN set -x \ + ## trino-cli + && wget -q -O /usr/local/bin/trino ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar \ + && chmod +x /usr/local/bin/trino CMD ["/bin/bash", "/opt/spark/adhoc.sh"] diff --git a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh index b20e8cb5f..fd2ef6517 100644 --- a/docker/hoodie/hadoop/sparkadhoc/adhoc.sh +++ b/docker/hoodie/hadoop/sparkadhoc/adhoc.sh @@ -22,10 +22,12 @@ export SPARK_HOME=/opt/spark -export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1" +export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1:8090" +export TRINO_CLI_CMD="/usr/local/bin/trino --server trino-coordinator-1:8091" date echo "SPARK HOME is : $SPARK_HOME" echo "PRESTO CLI CMD is : $PRESTO_CLI_CMD" +echo "TRINO CLI CMD is : $TRINO_CLI_CMD" tail -f /dev/null diff --git a/docker/hoodie/hadoop/sparkadhoc/pom.xml b/docker/hoodie/hadoop/sparkadhoc/pom.xml index 20a9cab16..e554313ac 100644 --- a/docker/hoodie/hadoop/sparkadhoc/pom.xml +++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml @@ -64,7 +64,9 @@ ${docker.build.skip} false - apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + + apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + true latest @@ -80,7 +82,9 @@ ${docker.build.skip} false - apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + + apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version} + true ${project.version} diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile b/docker/hoodie/hadoop/trinobase/Dockerfile new file mode 100644 index 000000000..9d7c23010 --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/Dockerfile @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG HIVE_VERSION=2.3.3 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base + +ENV TRINO_VERSION=368 +ENV TRINO_HOME=/usr/local/trino +ENV BASE_URL=https://repo1.maven.org/maven2 + +RUN apt-get update +RUN apt-get install -y \ + curl \ + tar \ + sudo \ + rsync \ + python \ + wget \ + python3-pip \ + python-dev \ + build-essential \ + uuid-runtime \ + less + +ENV JAVA_HOME /usr/java/default +ENV PATH $PATH:$JAVA_HOME/bin + +WORKDIR /usr/local/bin +RUN wget -q ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar +RUN chmod +x trino-cli-${TRINO_VERSION}-executable.jar +RUN mv trino-cli-${TRINO_VERSION}-executable.jar trino-cli + +WORKDIR /usr/local +RUN wget -q ${BASE_URL}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz +RUN tar xvzf trino-server-${TRINO_VERSION}.tar.gz -C /usr/local/ +RUN ln -s /usr/local/trino-server-${TRINO_VERSION} $TRINO_HOME + +ENV TRINO_BASE_WS /var/hoodie/ws/docker/hoodie/hadoop/trinobase +RUN mkdir -p ${TRINO_BASE_WS}/target/ +ADD target/ ${TRINO_BASE_WS}/target/ +ENV HUDI_TRINO_BUNDLE ${TRINO_BASE_WS}/target/hudi-trino-bundle.jar +RUN cp ${HUDI_TRINO_BUNDLE} ${TRINO_HOME}/plugin/hive/ + +ADD scripts ${TRINO_HOME}/scripts +RUN chmod +x ${TRINO_HOME}/scripts/trino.sh + +RUN mkdir -p $TRINO_HOME/data +VOLUME ["$TRINO_HOME/data"] diff --git a/docker/hoodie/hadoop/trinobase/pom.xml b/docker/hoodie/hadoop/trinobase/pom.xml new file mode 100644 index 000000000..5a290556e --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/pom.xml @@ -0,0 +1,116 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinobase-docker + Trino Base Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-base-java11-docker + ${project.version} + pom + import + + + + + + + + org.apache.maven.plugins + maven-antrun-plugin + 1.7 + + + package + + + + + + + run + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version} + + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/trinobase/scripts/trino.sh b/docker/hoodie/hadoop/trinobase/scripts/trino.sh new file mode 100644 index 000000000..9aacd842c --- /dev/null +++ b/docker/hoodie/hadoop/trinobase/scripts/trino.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +/usr/local/trino/bin/launcher run diff --git a/docker/hoodie/hadoop/trinocoordinator/Dockerfile b/docker/hoodie/hadoop/trinocoordinator/Dockerfile new file mode 100644 index 000000000..67a31448d --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG TRINO_VERSION=368 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base + +ADD etc /usr/local/trino/etc +EXPOSE 8091 + +WORKDIR /usr/local/trino +ENTRYPOINT [ "./scripts/trino.sh" ] diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties new file mode 100644 index 000000000..ed7fce1b3 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/catalog/hive.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=hive +hive.metastore.uri=thrift://hivemetastore:9083 +hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml +hive.hdfs.authentication.type=NONE diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/config.properties b/docker/hoodie/hadoop/trinocoordinator/etc/config.properties new file mode 100644 index 000000000..9876a0fe0 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/config.properties @@ -0,0 +1,26 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +coordinator=true +node-scheduler.include-coordinator=false +http-server.http.port=8091 +query.max-memory=50GB +query.max-memory-per-node=1GB +query.max-total-memory-per-node=2GB +discovery-server.enabled=true +discovery.uri=http://trino-coordinator-1:8091 diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config b/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config new file mode 100644 index 000000000..fb17203ca --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/jvm.config @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +-server +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:OnOutOfMemoryError=kill -9 %p +-Djdk.attach.allowAttachSelf=true diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/log.properties b/docker/hoodie/hadoop/trinocoordinator/etc/log.properties new file mode 100644 index 000000000..23b063080 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/log.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +io.trinosql=INFO diff --git a/docker/hoodie/hadoop/trinocoordinator/etc/node.properties b/docker/hoodie/hadoop/trinocoordinator/etc/node.properties new file mode 100644 index 000000000..d97d54748 --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/etc/node.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +node.environment=development +node.id=3044b958-f077-4fce-87ed-ca8308f800b6 +node.data-dir=/usr/local/trino/data diff --git a/docker/hoodie/hadoop/trinocoordinator/pom.xml b/docker/hoodie/hadoop/trinocoordinator/pom.xml new file mode 100644 index 000000000..564a2083e --- /dev/null +++ b/docker/hoodie/hadoop/trinocoordinator/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinocoordinator-docker + Trino Coordinator Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-trinobase-docker + ${project.version} + pom + + + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version} + + true + ${project.version} + + + + + + + diff --git a/docker/hoodie/hadoop/trinoworker/Dockerfile b/docker/hoodie/hadoop/trinoworker/Dockerfile new file mode 100644 index 000000000..ae5b2766d --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/Dockerfile @@ -0,0 +1,29 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster + +ARG HADOOP_VERSION=2.8.4 +ARG TRINO_VERSION=368 +FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base + +ADD etc /usr/local/trino/etc +EXPOSE 8092 + +WORKDIR /usr/local/trino +ENTRYPOINT [ "./scripts/trino.sh" ] diff --git a/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties new file mode 100644 index 000000000..ed7fce1b3 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/catalog/hive.properties @@ -0,0 +1,22 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +connector.name=hive +hive.metastore.uri=thrift://hivemetastore:9083 +hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml +hive.hdfs.authentication.type=NONE diff --git a/docker/hoodie/hadoop/trinoworker/etc/config.properties b/docker/hoodie/hadoop/trinoworker/etc/config.properties new file mode 100644 index 000000000..0e15d3d7c --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/config.properties @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +coordinator=false +http-server.http.port=8091 +query.max-memory=50GB +query.max-memory-per-node=1GB +query.max-total-memory-per-node=2GB +discovery.uri=http://trino-coordinator-1:8091 diff --git a/docker/hoodie/hadoop/trinoworker/etc/jvm.config b/docker/hoodie/hadoop/trinoworker/etc/jvm.config new file mode 100644 index 000000000..fb17203ca --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/jvm.config @@ -0,0 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +-server +-Xmx16G +-XX:+UseG1GC +-XX:G1HeapRegionSize=32M +-XX:+UseGCOverheadLimit +-XX:+ExplicitGCInvokesConcurrent +-XX:+HeapDumpOnOutOfMemoryError +-XX:OnOutOfMemoryError=kill -9 %p +-Djdk.attach.allowAttachSelf=true diff --git a/docker/hoodie/hadoop/trinoworker/etc/log.properties b/docker/hoodie/hadoop/trinoworker/etc/log.properties new file mode 100644 index 000000000..23b063080 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/log.properties @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +io.trinosql=INFO diff --git a/docker/hoodie/hadoop/trinoworker/etc/node.properties b/docker/hoodie/hadoop/trinoworker/etc/node.properties new file mode 100644 index 000000000..6cfebf995 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/etc/node.properties @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +node.environment=development +node.id=6606f0b3-6ae7-4152-a4b1-ddadb6345fe6 +node.data-dir=/var/trino/data diff --git a/docker/hoodie/hadoop/trinoworker/pom.xml b/docker/hoodie/hadoop/trinoworker/pom.xml new file mode 100644 index 000000000..54f56b9e1 --- /dev/null +++ b/docker/hoodie/hadoop/trinoworker/pom.xml @@ -0,0 +1,96 @@ + + + + + hudi-hadoop-docker + org.apache.hudi + 0.11.0-SNAPSHOT + + 4.0.0 + pom + hudi-hadoop-trinoworker-docker + Trino Worker Docker Image with Hudi + + + UTF-8 + true + ${project.parent.parent.basedir} + + + + + + org.apache.hudi + hudi-hadoop-trinobase-docker + ${project.version} + pom + + + + + + + + + com.spotify + dockerfile-maven-plugin + ${dockerfile.maven.version} + + + tag-latest + pre-integration-test + + build + tag + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + + true + latest + + + + tag-version + pre-integration-test + + build + tag + + + + ${docker.build.skip} + false + + apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version} + + true + ${project.version} + + + + + + +