1
0

Docker Container Build and Run setup with foundations for adding docker integration tests. Docker images built with Hadoop 2.8.4 Hive 2.3.3 and Spark 2.3.1 and published to docker-hub

Look at quickstart document for how to setup docker and run demo
This commit is contained in:
Balaji Varadarajan
2018-08-21 22:54:57 -07:00
committed by vinoth chandar
parent 9710b5a3a6
commit f3418e4718
63 changed files with 8952 additions and 9 deletions

View File

@@ -0,0 +1,217 @@
version: "3.3"
services:
namenode:
image: varadarb/hudi-hadoop_2.8.4-namenode:latest
hostname: namenode
container_name: namenode
volumes:
- /tmp/hadoop_name:/hadoop/dfs/name
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark231
ports:
- "50070:50070"
- "8020:8020"
env_file:
- ./hadoop.env
healthcheck:
test: ["CMD", "curl", "-f", "http://namenode:50070"]
interval: 30s
timeout: 10s
retries: 3
datanode1:
image: varadarb/hudi-hadoop_2.8.4-datanode:latest
container_name: datanode1
hostname: datanode1
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark231
env_file:
- ./hadoop.env
ports:
- "50075:50075"
- "50010:50010"
links:
- "namenode"
- "historyserver"
healthcheck:
test: ["CMD", "curl", "-f", "http://datanode1:50075"]
interval: 30s
timeout: 10s
retries: 3
depends_on:
- namenode
volumes:
- /tmp/hadoop_data:/hadoop/dfs/data
historyserver:
image: varadarb/hudi-hadoop_2.8.4-history:latest
hostname: historyserver
container_name: historyserver
environment:
- CLUSTER_NAME=hudi_hadoop284_hive232_spark231
depends_on:
- "namenode"
links:
- "namenode"
ports:
- "58188:8188"
healthcheck:
test: ["CMD", "curl", "-f", "http://historyserver:8188"]
interval: 30s
timeout: 10s
retries: 3
env_file:
- ./hadoop.env
volumes:
- historyserver:/hadoop/yarn/timeline
hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
volumes:
- hive-metastore-postgresql:/var/lib/postgresql
hostname: hive-metastore-postgresql
container_name: hive-metastore-postgresql
hivemetastore:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3:latest
hostname: hivemetastore
container_name: hivemetastore
links:
- "hive-metastore-postgresql"
- "namenode"
env_file:
- ./hadoop.env
command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432"
ports:
- "9083:9083"
healthcheck:
test: ["CMD", "nc", "-z", "hivemetastore", "9083"]
interval: 30s
timeout: 10s
retries: 3
depends_on:
- "hive-metastore-postgresql"
- "namenode"
hiveserver:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3:latest
hostname: hiveserver
container_name: hiveserver
env_file:
- ./hadoop.env
environment:
SERVICE_PRECONDITION: "hivemetastore:9083"
ports:
- "10000:10000"
depends_on:
- "hivemetastore"
links:
- "hivemetastore"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
sparkmaster:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.3.1:latest
hostname: sparkmaster
container_name: sparkmaster
env_file:
- ./hadoop.env
ports:
- "8080:8080"
- "7077:7077"
environment:
- INIT_DAEMON_STEP=setup_spark
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
spark-worker-1:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.3.1:latest
hostname: spark-worker-1
container_name: spark-worker-1
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
ports:
- "8081:8081"
environment:
- "SPARK_MASTER=spark://sparkmaster:7077"
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
zookeeper:
image: 'bitnami/zookeeper:3.4.12-r68'
hostname: zookeeper
container_name: zookeeper
ports:
- '2181:2181'
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
kafka:
image: 'bitnami/kafka:2.0.0'
hostname: kafkabroker
container_name: kafkabroker
ports:
- '9092:9092'
environment:
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- ALLOW_PLAINTEXT_LISTENER=yes
adhoc-1:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.3.1:latest
hostname: adhoc-1
container_name: adhoc-1
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
ports:
- '4040:4040'
environment:
- "SPARK_MASTER=spark://sparkmaster:7077"
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
adhoc-2:
image: varadarb/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.3.1:latest
hostname: adhoc-2
container_name: adhoc-2
env_file:
- ./hadoop.env
depends_on:
- sparkmaster
environment:
- "SPARK_MASTER=spark://sparkmaster:7077"
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
volumes:
namenode:
historyserver:
hive-metastore-postgresql:
networks:
default:

33
docker/compose/hadoop.env Normal file
View File

@@ -0,0 +1,33 @@
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://hive-metastore-postgresql/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://hivemetastore:9083
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
#HDFS_CONF_dfs_client_use_datanode_hostname=true
#HDFS_CONF_dfs_namenode_use_datanode_hostname=true
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031
YARN_CONF_yarn_nodemanager_vmem___check___enabled=false