1
0

[HUDI-2785] Add Trino setup in Docker Demo (#4300)

* [HUDI-2785] Add Trino setup in Docker Demo

* Update docker account and remove unnecessary configs

* Adjust sparkadhoc Dockerfile
This commit is contained in:
Y Ethan Guo
2022-01-14 08:38:55 -08:00
committed by GitHub
parent 7d163ee3de
commit 53f75f84b8
26 changed files with 1040 additions and 4 deletions

View File

@@ -221,6 +221,34 @@ services:
- ${HUDI_WS}:/var/hoodie/ws
command: worker
trino-coordinator-1:
container_name: trino-coordinator-1
hostname: trino-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-trinocoordinator_368:latest
ports:
- '8091:8091'
links:
- "hivemetastore"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: http://trino-coordinator-1:8091 trino-coordinator-1
trino-worker-1:
container_name: trino-worker-1
hostname: trino-worker-1
image: apachehudi/hudi-hadoop_2.8.4-trinoworker_368:latest
depends_on: [ "trino-coordinator-1" ]
ports:
- '8092:8092'
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: http://trino-coordinator-1:8091 trino-worker-1
graphite:
container_name: graphite
hostname: graphite
@@ -248,6 +276,7 @@ services:
- "hive-metastore-postgresql"
- "namenode"
- "presto-coordinator-1"
- "trino-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
@@ -267,6 +296,7 @@ services:
- "hive-metastore-postgresql"
- "namenode"
- "presto-coordinator-1"
- "trino-coordinator-1"
volumes:
- ${HUDI_WS}:/var/hoodie/ws

View File

@@ -0,0 +1,60 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM openjdk:11-jdk-slim-bullseye
MAINTAINER Hoodie
USER root
# Default to UTF-8 file.encoding
ENV LANG C.UTF-8
ARG HADOOP_VERSION=2.8.4
ARG HADOOP_URL=https://archive.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz
ENV HADOOP_VERSION ${HADOOP_VERSION}
ENV HADOOP_URL ${HADOOP_URL}
RUN set -x \
&& DEBIAN_FRONTEND=noninteractive apt-get -yq update && apt-get -yq install curl wget netcat procps \
&& echo "Fetch URL2 is : ${HADOOP_URL}" \
&& curl -fSL "${HADOOP_URL}" -o /tmp/hadoop.tar.gz \
&& curl -fSL "${HADOOP_URL}.asc" -o /tmp/hadoop.tar.gz.asc \
&& mkdir -p /opt/hadoop-$HADOOP_VERSION/logs \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* \
&& ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop \
&& cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml \
&& mkdir /hadoop-data
ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV HADOOP_HOME=${HADOOP_PREFIX}
ENV HADOOP_INSTALL=${HADOOP_HOME}
ENV USER=root
ENV PATH /usr/bin:/bin:$HADOOP_PREFIX/bin/:$PATH
# Exposing a union of ports across hadoop versions
# Well known ports including ssh
EXPOSE 0-1024 4040 7000-10100 5000-5100 50000-50200 58188 58088 58042
ADD entrypoint.sh /entrypoint.sh
ADD export_container_ip.sh /usr/bin/
RUN chmod a+x /usr/bin/export_container_ip.sh \
&& chmod a+x /entrypoint.sh
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]

View File

@@ -0,0 +1,107 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#######################################################################################
## COPIED FROM ##
## https://github.com/big-data-europe/docker-hadoop/blob/master/base/entrypoint.sh ##
# ##
#######################################################################################
# Set some sensible defaults
export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}
function addProperty() {
local path=$1
local name=$2
local value=$3
local entry="<property><name>$name</name><value>${value}</value></property>"
local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
}
function configure() {
local path=$1
local module=$2
local envPrefix=$3
local var
local value
echo "Configuring $module"
for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do
name=`echo ${c} | perl -pe 's/___/-/g; s/__/@/g; s/_/./g; s/@/_/g;'`
var="${envPrefix}_${c}"
value=${!var}
echo " - Setting $name=$value"
addProperty /etc/hadoop/$module-site.xml $name "$value"
done
}
configure /etc/hadoop/core-site.xml core CORE_CONF
configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF
if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"
# HDFS
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true
# YARN
addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0
# MAPRED
addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
fi
if [ -n "$GANGLIA_HOST" ]; then
mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig
for module in mapred jvm rpc ugi; do
echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
echo "$module.period=10"
echo "$module.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics.properties
for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
echo "$module.sink.ganglia.period=10"
echo "$module.sink.ganglia.supportsparse=true"
echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics2.properties
fi
# Save Container IP in ENV variable
/usr/bin/export_container_ip.sh
exec "$@"

View File

@@ -0,0 +1,30 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
interfaces=( "en0" "eth0" )
ipAddr=""
for interface in "${interfaces[@]}"
do
ipAddr=`ifconfig $interface | grep -Eo 'inet (addr:)?([0-9]+\.){3}[0-9]+' | grep -Eo '([0-9]+\.){3}[0-9]+' | grep -v '127.0.0.1' | head`
if [ -n "$ipAddr" ]; then
break
fi
done
echo "Container IP is set to : $ipAddr"
export MY_CONTAINER_IP=$ipAddr

View File

@@ -0,0 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.11.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hudi-hadoop-base-java11-docker</artifactId>
<description>Base Docker Image with Hoodie</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<finalName>hudi</finalName>
<plugins>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-base-java11</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -28,6 +28,7 @@
<packaging>pom</packaging>
<modules>
<module>base</module>
<module>base_java11</module>
<module>namenode</module>
<module>datanode</module>
<module>historyserver</module>
@@ -37,6 +38,9 @@
<module>sparkworker</module>
<module>sparkadhoc</module>
<module>prestobase</module>
<module>trinobase</module>
<module>trinocoordinator</module>
<module>trinoworker</module>
</modules>
<dependencies>
@@ -54,6 +58,7 @@
<docker.hive.version>2.3.3</docker.hive.version>
<docker.hadoop.version>2.8.4</docker.hadoop.version>
<docker.presto.version>0.217</docker.presto.version>
<docker.trino.version>368</docker.trino.version>
<dockerfile.maven.version>1.4.13</dockerfile.maven.version>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.basedir}</main.basedir>

View File

@@ -21,15 +21,36 @@ ARG SPARK_VERSION=2.4.4
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}-sparkbase_${SPARK_VERSION}
ARG PRESTO_VERSION=0.217
ARG TRINO_VERSION=368
COPY adhoc.sh /opt/spark
ENV SPARK_WORKER_WEBUI_PORT 8081
ENV SPARK_WORKER_LOG /spark/logs
ENV SPARK_MASTER "spark://spark-master:7077"
ENV PRESTO_VERSION ${PRESTO_VERSION}
ENV TRINO_VERSION ${TRINO_VERSION}
ENV BASE_URL=https://repo1.maven.org/maven2
RUN apt-get update
RUN apt-get install -y \
curl \
tar \
sudo \
rsync \
python \
wget \
python3-pip \
python-dev \
build-essential \
uuid-runtime \
less
RUN set -x \
## presto-client
&& wget -q -O /usr/local/bin/presto https://repo1.maven.org/maven2/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \
&& wget -q -O /usr/local/bin/presto ${BASE_URL}/com/facebook/presto/presto-cli/${PRESTO_VERSION}/presto-cli-${PRESTO_VERSION}-executable.jar \
&& chmod +x /usr/local/bin/presto
RUN set -x \
## trino-cli
&& wget -q -O /usr/local/bin/trino ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar \
&& chmod +x /usr/local/bin/trino
CMD ["/bin/bash", "/opt/spark/adhoc.sh"]

View File

@@ -22,10 +22,12 @@
export SPARK_HOME=/opt/spark
export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1"
export PRESTO_CLI_CMD="/usr/local/bin/presto --server presto-coordinator-1:8090"
export TRINO_CLI_CMD="/usr/local/bin/trino --server trino-coordinator-1:8091"
date
echo "SPARK HOME is : $SPARK_HOME"
echo "PRESTO CLI CMD is : $PRESTO_CLI_CMD"
echo "TRINO CLI CMD is : $TRINO_CLI_CMD"
tail -f /dev/null

View File

@@ -64,7 +64,9 @@
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version}</repository>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version}
</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
@@ -80,7 +82,9 @@
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version}</repository>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-hive_${docker.hive.version}-sparkadhoc_${docker.spark.version}
</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>

View File

@@ -0,0 +1,66 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base
ENV TRINO_VERSION=368
ENV TRINO_HOME=/usr/local/trino
ENV BASE_URL=https://repo1.maven.org/maven2
RUN apt-get update
RUN apt-get install -y \
curl \
tar \
sudo \
rsync \
python \
wget \
python3-pip \
python-dev \
build-essential \
uuid-runtime \
less
ENV JAVA_HOME /usr/java/default
ENV PATH $PATH:$JAVA_HOME/bin
WORKDIR /usr/local/bin
RUN wget -q ${BASE_URL}/io/trino/trino-cli/${TRINO_VERSION}/trino-cli-${TRINO_VERSION}-executable.jar
RUN chmod +x trino-cli-${TRINO_VERSION}-executable.jar
RUN mv trino-cli-${TRINO_VERSION}-executable.jar trino-cli
WORKDIR /usr/local
RUN wget -q ${BASE_URL}/io/trino/trino-server/${TRINO_VERSION}/trino-server-${TRINO_VERSION}.tar.gz
RUN tar xvzf trino-server-${TRINO_VERSION}.tar.gz -C /usr/local/
RUN ln -s /usr/local/trino-server-${TRINO_VERSION} $TRINO_HOME
ENV TRINO_BASE_WS /var/hoodie/ws/docker/hoodie/hadoop/trinobase
RUN mkdir -p ${TRINO_BASE_WS}/target/
ADD target/ ${TRINO_BASE_WS}/target/
ENV HUDI_TRINO_BUNDLE ${TRINO_BASE_WS}/target/hudi-trino-bundle.jar
RUN cp ${HUDI_TRINO_BUNDLE} ${TRINO_HOME}/plugin/hive/
ADD scripts ${TRINO_HOME}/scripts
RUN chmod +x ${TRINO_HOME}/scripts/trino.sh
RUN mkdir -p $TRINO_HOME/data
VOLUME ["$TRINO_HOME/data"]

View File

@@ -0,0 +1,116 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.11.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hudi-hadoop-trinobase-docker</artifactId>
<description>Trino Base Docker Image with Hudi</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-base-java11-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<phase>package</phase>
<configuration>
<target>
<copy file="${project.basedir}/../../../../packaging/hudi-trino-bundle/target/hudi-trino-bundle-${project.version}.jar"
tofile="target/hudi-trino-bundle.jar"/>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version}
</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinobase_${docker.trino.version}
</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,21 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
/usr/local/trino/bin/launcher run

View File

@@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
ARG HADOOP_VERSION=2.8.4
ARG TRINO_VERSION=368
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base
ADD etc /usr/local/trino/etc
EXPOSE 8091
WORKDIR /usr/local/trino
ENTRYPOINT [ "./scripts/trino.sh" ]

View File

@@ -0,0 +1,22 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
connector.name=hive
hive.metastore.uri=thrift://hivemetastore:9083
hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml
hive.hdfs.authentication.type=NONE

View File

@@ -0,0 +1,26 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
coordinator=true
node-scheduler.include-coordinator=false
http-server.http.port=8091
query.max-memory=50GB
query.max-memory-per-node=1GB
query.max-total-memory-per-node=2GB
discovery-server.enabled=true
discovery.uri=http://trino-coordinator-1:8091

View File

@@ -0,0 +1,27 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
-server
-Xmx16G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:OnOutOfMemoryError=kill -9 %p
-Djdk.attach.allowAttachSelf=true

View File

@@ -0,0 +1,19 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
io.trinosql=INFO

View File

@@ -0,0 +1,21 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
node.environment=development
node.id=3044b958-f077-4fce-87ed-ca8308f800b6
node.data-dir=/usr/local/trino/data

View File

@@ -0,0 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.11.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hudi-hadoop-trinocoordinator-docker</artifactId>
<description>Trino Coordinator Docker Image with Hudi</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-trinobase-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version}
</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinocoordinator_${docker.trino.version}
</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,29 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Trino docker setup is adapted from https://github.com/Lewuathe/docker-trino-cluster
ARG HADOOP_VERSION=2.8.4
ARG TRINO_VERSION=368
FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-trinobase_${TRINO_VERSION}:latest as trino-base
ADD etc /usr/local/trino/etc
EXPOSE 8092
WORKDIR /usr/local/trino
ENTRYPOINT [ "./scripts/trino.sh" ]

View File

@@ -0,0 +1,22 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
connector.name=hive
hive.metastore.uri=thrift://hivemetastore:9083
hive.config.resources=/etc/hadoop/core-site.xml,/etc/hadoop/hdfs-site.xml
hive.hdfs.authentication.type=NONE

View File

@@ -0,0 +1,24 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
coordinator=false
http-server.http.port=8091
query.max-memory=50GB
query.max-memory-per-node=1GB
query.max-total-memory-per-node=2GB
discovery.uri=http://trino-coordinator-1:8091

View File

@@ -0,0 +1,27 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
-server
-Xmx16G
-XX:+UseG1GC
-XX:G1HeapRegionSize=32M
-XX:+UseGCOverheadLimit
-XX:+ExplicitGCInvokesConcurrent
-XX:+HeapDumpOnOutOfMemoryError
-XX:OnOutOfMemoryError=kill -9 %p
-Djdk.attach.allowAttachSelf=true

View File

@@ -0,0 +1,19 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
io.trinosql=INFO

View File

@@ -0,0 +1,21 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
node.environment=development
node.id=6606f0b3-6ae7-4152-a4b1-ddadb6345fe6
node.data-dir=/var/trino/data

View File

@@ -0,0 +1,96 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>hudi-hadoop-docker</artifactId>
<groupId>org.apache.hudi</groupId>
<version>0.11.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<packaging>pom</packaging>
<artifactId>hudi-hadoop-trinoworker-docker</artifactId>
<description>Trino Worker Docker Image with Hudi</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<checkstyle.skip>true</checkstyle.skip>
<main.basedir>${project.parent.parent.basedir}</main.basedir>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-trinobase-docker</artifactId>
<version>${project.version}</version>
<type>pom</type>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<!-- Build Docker image -->
<plugin>
<groupId>com.spotify</groupId>
<artifactId>dockerfile-maven-plugin</artifactId>
<version>${dockerfile.maven.version}</version>
<executions>
<execution>
<id>tag-latest</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version}
</repository>
<force>true</force>
<tag>latest</tag>
</configuration>
</execution>
<execution>
<id>tag-version</id>
<phase>pre-integration-test</phase>
<goals>
<goal>build</goal>
<goal>tag</goal>
<!-- <goal>push</goal> -->
</goals>
<configuration>
<skip>${docker.build.skip}</skip>
<pullNewerImage>false</pullNewerImage>
<repository>
apachehudi/hudi-hadoop_${docker.hadoop.version}-trinoworker_${docker.trino.version}
</repository>
<force>true</force>
<tag>${project.version}</tag>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>