[HUDI-68] Pom cleanup & demo automation (#846)
- [HUDI-172] Cleanup Maven POM/Classpath - Fix ordering of dependencies in poms, to enable better resolution - Idea is to place more specific ones at the top - And place dependencies which use them below them - [HUDI-68] : Automate demo steps on docker setup - Move hive queries from hive cli to beeline - Standardize on taking query input from text command files - Deltastreamer ingest, also does hive sync in a single step - Spark Incremental Query materialized as a derived Hive table using datasource - Fix flakiness in HDFS spin up and output comparison - Code cleanup around streamlining and loc reduction - Also fixed pom to not shade some hive classs in spark, to enable hive sync
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -15,6 +15,7 @@ target/
|
||||
*.war
|
||||
*.ear
|
||||
*.db
|
||||
*.patch
|
||||
|
||||
######################
|
||||
# OSX
|
||||
@@ -74,5 +75,4 @@ dependency-reduced-pom.xml
|
||||
#######################################
|
||||
# Docker
|
||||
#######################################
|
||||
hoodie-integ-test/compose_env
|
||||
|
||||
hudi-integ-test/compose_env
|
||||
|
||||
12
.travis.yml
12
.travis.yml
@@ -1,16 +1,20 @@
|
||||
language: java
|
||||
jdk:
|
||||
- oraclejdk8
|
||||
- oraclejdk8
|
||||
sudo: required
|
||||
env:
|
||||
- HUDI_QUIETER_LOGGING=1
|
||||
- HUDI_QUIETER_LOGGING=1 TEST_SUITE=unit
|
||||
- TEST_SUITE=integration
|
||||
install: true
|
||||
services:
|
||||
- docker
|
||||
- docker
|
||||
cache:
|
||||
directories:
|
||||
- "$HOME/.m2"
|
||||
- "$HOME/.m2"
|
||||
notifications:
|
||||
slack:
|
||||
rooms:
|
||||
- secure: WNIZPBY//xf/xTJL1YUPzvPUDwjawaMM4IJ6IqxjRGcZCmuhNVu2XTJ3aL1g6X7ZcJKxJuwoU/TbSO8Dl6rgWSo/2OfyzBd4ks+hgeCsdycccTcvO8giQO1DOUGUSRdvUzOvKjWVK7iARYzQhoZawAYwI09UJLlwhYRCJ1IKc1ZksrEt964GeEmPyJbwMoZOJVUU84jJIAZPIpOFGTKM652FMermg9yaY2W5oSjDXaV98z0/mJV4Ry++J2v0fvoDs5HxkXYhZJP+dpWR82KDr6Q6LGL5/IlJ+b+IH3pF8LyKR4nCH6l1EZ8KpoFZapyYWYQpXMfQoF2K/JEQkpz1EqBCeEDSJ2+j1PPLhOWXd7ok4DsS26S8BP2ImvyXwua51THN1/r1fCGSIdxiQ5C8aeYmPCSr+oLChCVivEG2eeU34Z1nQJ5aDymNGeFE9qUUpjS0ETfFcjI/WQaA+FiYiPkDfeAoT1+6ySdY7l9gJhMygupILjq57IHbqx4nEr/8AB3Rqb8iIDTWDXgUBI9xKmty36zjIGcVOsCT/SGPccxvEJBXQk8uQqs/rDhaA/ErJPMLX/2b7ElSSObKFdjpMaxVvZIE6wvMLJpIYfChDoXwgfhN6zlAFZrEib7PFI4dGkS8u4wkkHkBS7C+uz2e92EhsAB+BIhUR1M3NQ33+Is=
|
||||
on_pull_requests: false
|
||||
script:
|
||||
tools/run_travis_tests.sh $TEST_SUITE
|
||||
5
docker/demo/compaction.commands
Normal file
5
docker/demo/compaction.commands
Normal file
@@ -0,0 +1,5 @@
|
||||
connect --path /user/hive/warehouse/stock_ticks_mor
|
||||
compactions show all
|
||||
compaction schedule
|
||||
compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
|
||||
|
||||
27
docker/demo/config/dfs-source.properties
Normal file
27
docker/demo/config/dfs-source.properties
Normal file
@@ -0,0 +1,27 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
include=base.properties
|
||||
# Key fields, for kafka example
|
||||
hoodie.datasource.write.recordkey.field=key
|
||||
hoodie.datasource.write.partitionpath.field=date
|
||||
# Schema provider props (change to absolute path based on your installation)
|
||||
hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
|
||||
hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
|
||||
# DFS Source
|
||||
hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/
|
||||
21
docker/demo/get_min_commit_time.sh
Executable file
21
docker/demo/get_min_commit_time.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
MIN_COMMIT_TIME=`hdfs dfs -ls -t /user/hive/warehouse/stock_ticks_cow/.hoodie/*.commit | head -1 | awk -F'/' ' { print $7 } ' | awk -F'.' ' { print $1 } '`
|
||||
echo $MIN_COMMIT_TIME;
|
||||
11
docker/demo/hive-batch1.commands
Normal file
11
docker/demo/hive-batch1.commands
Normal file
@@ -0,0 +1,11 @@
|
||||
add jar ${hudi.hadoop.bundle};
|
||||
|
||||
select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG';
|
||||
select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
|
||||
select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
|
||||
|
||||
select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG';
|
||||
select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
|
||||
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
|
||||
|
||||
!quit
|
||||
9
docker/demo/hive-batch2-after-compaction.commands
Normal file
9
docker/demo/hive-batch2-after-compaction.commands
Normal file
@@ -0,0 +1,9 @@
|
||||
add jar ${hudi.hadoop.bundle};
|
||||
|
||||
select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
|
||||
select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
|
||||
|
||||
select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
|
||||
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
|
||||
|
||||
!quit
|
||||
10
docker/demo/hive-incremental.commands
Normal file
10
docker/demo/hive-incremental.commands
Normal file
@@ -0,0 +1,10 @@
|
||||
add jar ${hudi.hadoop.bundle};
|
||||
|
||||
set hoodie.stock_ticks_cow.consume.mode=INCREMENTAL;
|
||||
set hoodie.stock_ticks_cow.consume.max.commits=3;
|
||||
set hoodie.stock_ticks_cow.consume.start.timestamp=${min.commit.time};
|
||||
|
||||
select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
|
||||
|
||||
!quit
|
||||
|
||||
10
docker/demo/hive-table-check.commands
Normal file
10
docker/demo/hive-table-check.commands
Normal file
@@ -0,0 +1,10 @@
|
||||
add jar ${hudi.hadoop.bundle};
|
||||
show tables;
|
||||
|
||||
show partitions stock_ticks_cow;
|
||||
show partitions stock_ticks_mor;
|
||||
show partitions stock_ticks_mor_rt;
|
||||
|
||||
!quit
|
||||
|
||||
|
||||
30
docker/demo/sparksql-batch1.commands
Normal file
30
docker/demo/sparksql-batch1.commands
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
spark.sql("show tables").show(100, false)
|
||||
// Copy-On-Write table
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
|
||||
|
||||
// Merge-On-Read table
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
|
||||
|
||||
System.exit(0)
|
||||
29
docker/demo/sparksql-batch2.commands
Normal file
29
docker/demo/sparksql-batch2.commands
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Copy-On-Write table
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
|
||||
|
||||
// Merge-On-Read table
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
|
||||
|
||||
System.exit(0)
|
||||
59
docker/demo/sparksql-incremental.commands
Normal file
59
docker/demo/sparksql-incremental.commands
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.hudi.DataSourceReadOptions;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.HoodieDataSourceHelpers;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
||||
val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
|
||||
val beginInstantTime = HoodieDataSourceHelpers.listCommitsSince(fs, "/user/hive/warehouse/stock_ticks_cow", "00000").get(0)
|
||||
val hoodieIncViewDF = spark.read.format("org.apache.hudi").
|
||||
option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL).
|
||||
option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, beginInstantTime).
|
||||
load("/user/hive/warehouse/stock_ticks_cow");
|
||||
hoodieIncViewDF.registerTempTable("stock_ticks_cow_incr")
|
||||
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_incr where symbol = 'GOOG'").show(100, false);
|
||||
|
||||
spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_incr").
|
||||
write.format("org.apache.hudi").
|
||||
option("hoodie.insert.shuffle.parallelism", "2").
|
||||
option("hoodie.upsert.shuffle.parallelism","2").
|
||||
option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL).
|
||||
option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
|
||||
option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
|
||||
option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
|
||||
option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
|
||||
option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor").
|
||||
option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor").
|
||||
option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
|
||||
option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
|
||||
option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
|
||||
option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
|
||||
option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
|
||||
option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
|
||||
mode(SaveMode.Overwrite).
|
||||
save("/user/hive/warehouse/stock_ticks_derived_mor");
|
||||
|
||||
spark.sql("show tables").show(20, false)
|
||||
spark.sql("select count(*) from stock_ticks_derived_mor").show(20, false)
|
||||
spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)
|
||||
|
||||
System.exit(0);
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
HUDI_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
||||
HOODIE_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||
echo "setting hadoop conf dir"
|
||||
HADOOP_CONF_DIR="/etc/hadoop/conf"
|
||||
@@ -13,5 +13,4 @@ fi
|
||||
if [ -z "$CLIENT_JAR" ]; then
|
||||
echo "client jar location not set"
|
||||
fi
|
||||
echo "java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap"
|
||||
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap
|
||||
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
<properties>
|
||||
<spring.shell.version>1.2.0.RELEASE</spring.shell.version>
|
||||
<jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
<junit.version>4.10</junit.version>
|
||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||
</properties>
|
||||
|
||||
@@ -61,7 +59,7 @@
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>3.2.1</version>
|
||||
<version>${scala-maven-plugin.version}</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
@@ -133,23 +131,42 @@
|
||||
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- Scala -->
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.shell</groupId>
|
||||
<artifactId>spring-shell</artifactId>
|
||||
<version>${spring.shell.version}</version>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>de.vandermeer</groupId>
|
||||
<artifactId>asciitable</artifactId>
|
||||
<version>0.2.5</version>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
@@ -159,6 +176,24 @@
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.shell</groupId>
|
||||
<artifactId>spring-shell</artifactId>
|
||||
<version>${spring.shell.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>de.vandermeer</groupId>
|
||||
<artifactId>asciitable</artifactId>
|
||||
<version>0.2.5</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.jakewharton.fliptables</groupId>
|
||||
<artifactId>fliptables</artifactId>
|
||||
@@ -166,60 +201,25 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit-dep</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<version>${junit-dep.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
<version>2.9.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
@@ -217,12 +217,23 @@ public class CompactionCommand implements CommandMarker {
|
||||
final String sparkMemory,
|
||||
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
|
||||
final String retry,
|
||||
@CliOption(key = "compactionInstant", mandatory = true, help = "Base path for the target hoodie dataset")
|
||||
final String compactionInstantTime) throws Exception {
|
||||
@CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
|
||||
String compactionInstantTime) throws Exception {
|
||||
boolean initialized = HoodieCLI.initConf();
|
||||
HoodieCLI.initFS(initialized);
|
||||
|
||||
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||
if (null == compactionInstantTime) {
|
||||
// pick outstanding one with lowest timestamp
|
||||
Option<String> firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
|
||||
.filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
|
||||
.equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
|
||||
if (!firstPendingInstant.isPresent()) {
|
||||
return "NO PENDING COMPACTION TO RUN";
|
||||
}
|
||||
compactionInstantTime = firstPendingInstant.get();
|
||||
}
|
||||
|
||||
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
|
||||
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
@@ -78,6 +79,71 @@
|
||||
<artifactId>hudi-timeline-service</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Dropwizard Metrics -->
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-graphite</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
<version>1.48</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.htrace</groupId>
|
||||
<artifactId>htrace-core</artifactId>
|
||||
<version>3.0.4</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
@@ -117,6 +183,14 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Hbase -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Tests -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
@@ -126,61 +200,35 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-graphite</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
<version>1.48</version>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Parent dependencies -->
|
||||
<!-- HBase - Tests -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-testing-util</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-core-asl</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive - Tests -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
@@ -191,46 +239,7 @@
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Hbase dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.htrace</groupId>
|
||||
<artifactId>htrace-core</artifactId>
|
||||
<version>3.0.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-testing-util</artifactId>
|
||||
<version>1.2.3</version>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-core-asl</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.5</version>
|
||||
<version>${maven-jar-plugin.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
@@ -79,77 +79,20 @@
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.5.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
<version>4.5.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
@@ -161,21 +104,91 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>com.github.stefanbirkner</groupId>
|
||||
<artifactId>system-rules</artifactId>
|
||||
<version>1.16.0</version>
|
||||
<scope>test</scope>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Twitter -->
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo-shaded</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.stefanbirkner</groupId>
|
||||
<artifactId>system-rules</artifactId>
|
||||
<version>1.16.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -30,32 +30,52 @@
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hudi -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
@@ -64,52 +84,41 @@
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
||||
@@ -31,6 +31,81 @@
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
@@ -48,66 +123,17 @@
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop Testing -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
@@ -139,64 +165,53 @@
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware.kryo</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<version>2.21</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||
<artifactId>jetty-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
@@ -213,7 +228,7 @@
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<version>2.5</version>
|
||||
<version>${maven-jar-plugin.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
|
||||
@@ -9,32 +9,57 @@
|
||||
<artifactId>hudi-integ-test</artifactId>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.connectors</groupId>
|
||||
<artifactId>jersey-apache-connector</artifactId>
|
||||
<version>2.17</version>
|
||||
</dependency>
|
||||
<!-- Glassfish -->
|
||||
<!-- needs to be at top -->
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.core</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
<version>2.17</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.connectors</groupId>
|
||||
<artifactId>jersey-apache-connector</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.containers</groupId>
|
||||
<artifactId>jersey-container-servlet-core</artifactId>
|
||||
<version>2.17</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Docker-Java - Test-->
|
||||
<!-- needs to be at top to force javax.ws version -->
|
||||
<dependency>
|
||||
<groupId>com.github.docker-java</groupId>
|
||||
<artifactId>docker-java</artifactId>
|
||||
<version>3.1.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.glassfish.**</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Import -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-sparkworker-docker</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
@@ -43,12 +68,6 @@
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.awaitility</groupId>
|
||||
<artifactId>awaitility</artifactId>
|
||||
<version>3.1.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
@@ -56,56 +75,39 @@
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.glassfish.**</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml - Test-->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-guava</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.awaitility</groupId>
|
||||
<artifactId>awaitility</artifactId>
|
||||
<version>3.1.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>20.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<version>2.6.4</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.6.4</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-guava</artifactId>
|
||||
<version>2.9.4</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.github.docker-java</groupId>
|
||||
<artifactId>docker-java</artifactId>
|
||||
<version>3.1.0-rc-3</version>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.glassfish.**</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-sparkworker-docker</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
@@ -150,12 +152,25 @@
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-failsafe-plugin</artifactId>
|
||||
<version>2.22.0</version>
|
||||
<configuration>
|
||||
<includes>
|
||||
<include>**/ITT*.java</include>
|
||||
</includes>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>integration-test</phase>
|
||||
<goals>
|
||||
<goal>integration-test</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>verify</id>
|
||||
<phase>verify</phase>
|
||||
<goals>
|
||||
<goal>verify</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
@@ -179,7 +194,7 @@
|
||||
</execution>
|
||||
<execution>
|
||||
<id>down</id>
|
||||
<phase>integration-test</phase>
|
||||
<phase>post-integration-test</phase>
|
||||
<goals>
|
||||
<goal>down</goal>
|
||||
</goals>
|
||||
|
||||
@@ -31,14 +31,14 @@ import com.github.dockerjava.core.DockerClientBuilder;
|
||||
import com.github.dockerjava.core.DockerClientConfig;
|
||||
import com.github.dockerjava.core.command.ExecStartResultCallback;
|
||||
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.Assert;
|
||||
@@ -52,38 +52,64 @@ public abstract class ITTestBase {
|
||||
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
|
||||
protected static final String HIVESERVER = "/hiveserver";
|
||||
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
|
||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_app.sh";
|
||||
protected static final String HUDI_HADOOP_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
||||
protected static final String HUDI_HIVE_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
|
||||
protected static final String HUDI_SPARK_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
|
||||
protected static final String HUDI_UTILITIES_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-utilities.jar";
|
||||
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
|
||||
protected static final String HADOOP_CONF_DIR = "/etc/hadoop";
|
||||
|
||||
// Skip these lines when capturing output from hive
|
||||
protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
|
||||
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
|
||||
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
|
||||
protected DockerClient dockerClient;
|
||||
protected Map<String, Container> runningContainers;
|
||||
|
||||
protected static String[] getHiveConsoleCommand(String rawCommand) {
|
||||
static String[] getHiveConsoleCommand(String rawCommand) {
|
||||
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
|
||||
String fullCommand = jarCommand + rawCommand;
|
||||
|
||||
List<String> cmd = new ImmutableList.Builder().add("hive")
|
||||
.add("--hiveconf")
|
||||
.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
|
||||
.add("--hiveconf")
|
||||
.add("hive.stats.autogather=false")
|
||||
.add("-e")
|
||||
.add("\"" + fullCommand + "\"")
|
||||
.build();
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add("hive");
|
||||
cmd.add("--hiveconf");
|
||||
cmd.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
|
||||
cmd.add("--hiveconf");
|
||||
cmd.add("hive.stats.autogather=false");
|
||||
cmd.add("-e");
|
||||
cmd.add("\"" + fullCommand + "\"");
|
||||
return cmd.stream().toArray(String[]::new);
|
||||
}
|
||||
|
||||
private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append("beeline -u " + HIVE_SERVER_JDBC_URL)
|
||||
.append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
|
||||
.append(" --hiveconf hive.stats.autogather=false ")
|
||||
.append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
|
||||
|
||||
if (additionalVar != null) {
|
||||
builder.append(" --hivevar " + additionalVar + " ");
|
||||
}
|
||||
return builder.append(" -f ").append(commandFile).toString();
|
||||
}
|
||||
|
||||
static String getSparkShellCommand(String commandFile) {
|
||||
return new StringBuilder()
|
||||
.append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
|
||||
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
|
||||
.append(" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
||||
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ")
|
||||
.append(" -i ").append(commandFile)
|
||||
.toString();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void init() throws IOException {
|
||||
public void init() {
|
||||
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
|
||||
//Assuming insecure docker engine
|
||||
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
||||
@@ -104,7 +130,7 @@ public abstract class ITTestBase {
|
||||
List<Container> containerList = dockerClient.listContainersCmd().exec();
|
||||
for (Container c : containerList) {
|
||||
if (!c.getState().equalsIgnoreCase("running")) {
|
||||
System.out.println("Container : " + Arrays.toString(c.getNames())
|
||||
LOG.info("Container : " + Arrays.toString(c.getNames())
|
||||
+ "not in running state, Curr State :" + c.getState());
|
||||
return false;
|
||||
}
|
||||
@@ -114,10 +140,12 @@ public abstract class ITTestBase {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
||||
boolean expectedToSucceed)
|
||||
throws Exception {
|
||||
LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
|
||||
private String singleSpace(String str) {
|
||||
return str.replaceAll("[\\s]+"," ");
|
||||
}
|
||||
|
||||
private TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
||||
boolean expectedToSucceed) throws Exception {
|
||||
Container sparkWorkerContainer = runningContainers.get(containerName);
|
||||
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
|
||||
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
|
||||
@@ -128,12 +156,10 @@ public abstract class ITTestBase {
|
||||
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
|
||||
.exec(callback).awaitCompletion();
|
||||
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
|
||||
LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
|
||||
if (exitCode != 0) {
|
||||
LOG.error("Command (" + Arrays.toString(command) + ") failed.");
|
||||
LOG.error("Stdout is :" + callback.getStdout().toString());
|
||||
LOG.error("Stderr is :" + callback.getStderr().toString());
|
||||
}
|
||||
LOG.info("Exit code for command : " + exitCode);
|
||||
LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
|
||||
LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
|
||||
|
||||
if (expectedToSucceed) {
|
||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
|
||||
@@ -145,6 +171,71 @@ public abstract class ITTestBase {
|
||||
return callback;
|
||||
}
|
||||
|
||||
void executeCommandStringsInDocker(String containerName, List<String> commands) throws Exception {
|
||||
for (String cmd : commands) {
|
||||
executeCommandStringInDocker(containerName, cmd, true);
|
||||
}
|
||||
}
|
||||
|
||||
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd,
|
||||
boolean expectedToSucceed) throws Exception {
|
||||
LOG.info("\n\n#################################################################################################");
|
||||
LOG.info("Container : " + containerName + ", Running command :" + cmd);
|
||||
LOG.info("\n#################################################################################################");
|
||||
|
||||
String[] cmdSplits = singleSpace(cmd).split(" ");
|
||||
return executeCommandInDocker(containerName, cmdSplits, expectedToSucceed);
|
||||
}
|
||||
|
||||
Pair<String, String> executeHiveCommand(String hiveCommand) throws Exception {
|
||||
|
||||
LOG.info("\n\n#################################################################################################");
|
||||
LOG.info("Running hive command :" + hiveCommand);
|
||||
LOG.info("\n#################################################################################################");
|
||||
|
||||
String[] hiveCmd = getHiveConsoleCommand(hiveCommand);
|
||||
TestExecStartResultCallback callback = executeCommandInDocker(HIVESERVER, hiveCmd, true);
|
||||
return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
|
||||
}
|
||||
|
||||
Pair<String, String> executeHiveCommandFile(String commandFile) throws Exception {
|
||||
return executeHiveCommandFile(commandFile, null);
|
||||
}
|
||||
|
||||
Pair<String, String> executeHiveCommandFile(String commandFile, String additionalVar) throws Exception {
|
||||
String hiveCmd = getHiveConsoleCommandFile(commandFile, additionalVar);
|
||||
TestExecStartResultCallback callback = executeCommandStringInDocker(HIVESERVER, hiveCmd, true);
|
||||
return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
|
||||
}
|
||||
|
||||
Pair<String, String> executeSparkSQLCommand(String commandFile, boolean expectedToSucceed) throws Exception {
|
||||
String sparkShellCmd = getSparkShellCommand(commandFile);
|
||||
TestExecStartResultCallback callback = executeCommandStringInDocker(ADHOC_1_CONTAINER,
|
||||
sparkShellCmd, expectedToSucceed);
|
||||
return Pair.of(callback.getStdout().toString(), callback.getStderr().toString());
|
||||
}
|
||||
|
||||
void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput) {
|
||||
assertStdOutContains(stdOutErr, expectedOutput, 1);
|
||||
}
|
||||
|
||||
void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput, int times) {
|
||||
// this is so that changes in padding don't affect comparison
|
||||
String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", "");
|
||||
expectedOutput = singleSpace(expectedOutput).replaceAll(" ", "");
|
||||
|
||||
int lastIndex = 0;
|
||||
int count = 0;
|
||||
while(lastIndex != -1){
|
||||
lastIndex = stdOutSingleSpaced.indexOf(expectedOutput, lastIndex);
|
||||
if(lastIndex != -1){
|
||||
count ++;
|
||||
lastIndex += expectedOutput.length();
|
||||
}
|
||||
}
|
||||
Assert.assertEquals("Did not find output the expected number of times", times, count);
|
||||
}
|
||||
|
||||
public class TestExecStartResultCallback extends ExecStartResultCallback {
|
||||
|
||||
// Storing the reference in subclass to expose to clients
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.integ;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import java.util.List;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html
|
||||
*
|
||||
* To run this as a standalone test in the IDE or command line. First bring up the demo setup using
|
||||
* `docker/setup_demo.sh` and then run the test class as you would do normally.
|
||||
*/
|
||||
public class ITTestHoodieDemo extends ITTestBase {
|
||||
|
||||
private static String HDFS_DATA_DIR = "/usr/hive/data/input";
|
||||
private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json";
|
||||
private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json";
|
||||
|
||||
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT +
|
||||
"/docker/demo/data/batch_1.json";
|
||||
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT +
|
||||
"/docker/demo/data/batch_2.json";
|
||||
|
||||
private static String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
|
||||
private static String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
|
||||
private static String COW_TABLE_NAME = "stock_ticks_cow";
|
||||
private static String MOR_TABLE_NAME = "stock_ticks_mor";
|
||||
|
||||
private static String DEMO_CONTAINER_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/setup_demo_container.sh";
|
||||
private static String MIN_COMMIT_TIME_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/get_min_commit_time.sh";
|
||||
private static String HUDI_CLI_TOOL = HOODIE_WS_ROOT + "/hudi-cli/hudi-cli.sh";
|
||||
private static String COMPACTION_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/compaction.commands";
|
||||
private static String SPARKSQL_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch1.commands";
|
||||
private static String SPARKSQL_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch2.commands";
|
||||
private static String SPARKSQL_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-incremental.commands";
|
||||
private static String HIVE_TBLCHECK_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-table-check.commands";
|
||||
private static String HIVE_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch1.commands";
|
||||
private static String HIVE_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch2-after-compaction.commands";
|
||||
private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands";
|
||||
|
||||
|
||||
private static String HIVE_SYNC_CMD_FMT = " --enable-hive-sync "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
|
||||
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
|
||||
|
||||
|
||||
@Test
|
||||
public void testDemo() throws Exception {
|
||||
setupDemo();
|
||||
|
||||
// batch 1
|
||||
ingestFirstBatchAndHiveSync();
|
||||
testHiveAfterFirstBatch();
|
||||
testSparkSQLAfterFirstBatch();
|
||||
|
||||
// batch 2
|
||||
ingestSecondBatchAndHiveSync();
|
||||
testHiveAfterSecondBatch();
|
||||
testSparkSQLAfterSecondBatch();
|
||||
testIncrementalHiveQuery();
|
||||
testIncrementalSparkSQLQuery();
|
||||
|
||||
// compaction
|
||||
scheduleAndRunCompaction();
|
||||
testHiveAfterSecondBatchAfterCompaction();
|
||||
testIncrementalHiveQueryAfterCompaction();
|
||||
}
|
||||
|
||||
private void setupDemo() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("hdfs dfsadmin -safemode wait") // handle NN going into safe mode at times
|
||||
.add("hdfs dfs -mkdir -p " + HDFS_DATA_DIR)
|
||||
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1)
|
||||
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT)
|
||||
.build();
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
}
|
||||
|
||||
private void ingestFirstBatchAndHiveSync() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.build();
|
||||
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
}
|
||||
|
||||
private void testHiveAfterFirstBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_TBLCHECK_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_cow |");
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor |");
|
||||
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
|
||||
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| partition |\n"
|
||||
+ "+----------------+\n"
|
||||
+ "| dt=2018-08-31 |\n"
|
||||
+ "+----------------+\n", 3);
|
||||
|
||||
stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 |\n", 3);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n", 3);
|
||||
}
|
||||
|
||||
private void testSparkSQLAfterFirstBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n"
|
||||
+ "|default |stock_ticks_mor_rt |false |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||
+ "+------+-------------------+", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3);
|
||||
}
|
||||
|
||||
private void ingestSecondBatchAndHiveSync() throws Exception {
|
||||
List<String> cmds = new ImmutableList.Builder<String>()
|
||||
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||
+ " --props /var/demo/config/dfs-source.properties "
|
||||
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||
+ " --disable-compaction "
|
||||
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||
.build();
|
||||
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||
}
|
||||
|
||||
private void testHiveAfterSecondBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 |\n");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 |\n", 2);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n", 2);
|
||||
}
|
||||
|
||||
private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | _c1 |\n"
|
||||
+ "+---------+----------------------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 |", 2);
|
||||
assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |", 2);
|
||||
}
|
||||
|
||||
private void testSparkSQLAfterSecondBatch() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:59:00|\n"
|
||||
+ "+------+-------------------+", 2);
|
||||
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2);
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"+------+-------------------+\n"
|
||||
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||
+ "+------+-------------------+");
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|");
|
||||
}
|
||||
|
||||
private void testIncrementalHiveQuery() throws Exception {
|
||||
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||
.getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||
"min.commit.time=" + minCommitTime +"`");
|
||||
assertStdOutContains(stdOutErrPair, "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||
}
|
||||
|
||||
private void testIncrementalHiveQueryAfterCompaction() throws Exception {
|
||||
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||
.getStdout().toString();
|
||||
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||
"min.commit.time=" + minCommitTime +"`");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"| symbol | ts | volume | open | close |\n"
|
||||
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||
}
|
||||
|
||||
private void testIncrementalSparkSQLQuery() throws Exception {
|
||||
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
|
||||
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|default |stock_ticks_cow |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor |false |\n"
|
||||
+ "|default |stock_ticks_derived_mor_rt|false |\n"
|
||||
+ "|default |stock_ticks_mor |false |\n"
|
||||
+ "|default |stock_ticks_mor_rt |false |\n"
|
||||
+ "| |stock_ticks_cow_incr |true |");
|
||||
assertStdOutContains(stdOutErrPair,
|
||||
"|count(1)|\n"
|
||||
+ "+--------+\n"
|
||||
+ "|99 |", 2);
|
||||
}
|
||||
|
||||
private void scheduleAndRunCompaction() throws Exception {
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + COMPACTION_COMMANDS, true);
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
package org.apache.hudi.integ;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -33,17 +33,6 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
NON_PARTITIONED,
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRunEcho() throws Exception {
|
||||
String[] cmd = new String[]{"echo", "Happy Testing"};
|
||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
||||
cmd, true);
|
||||
String stdout = callback.getStdout().toString();
|
||||
String stderr = callback.getStderr().toString();
|
||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
||||
}
|
||||
|
||||
@Test
|
||||
/**
|
||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
|
||||
@@ -53,6 +42,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
|
||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.SINGLE_KEY_PARTITIONED);
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -64,6 +54,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
|
||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.MULTI_KEYS_PARTITIONED);
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -75,6 +66,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
|
||||
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
|
||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.NON_PARTITIONED);
|
||||
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -89,109 +81,54 @@ public class ITTestHoodieSanity extends ITTestBase {
|
||||
String hdfsUrl = "hdfs://namenode" + hdfsPath;
|
||||
|
||||
// Drop Table if it exists
|
||||
{
|
||||
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
|
||||
executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
|
||||
String hiveDropCmd = "drop table if exists " + hiveTableName;
|
||||
try {
|
||||
executeHiveCommand(hiveDropCmd);
|
||||
} catch (AssertionError ex) {
|
||||
// In travis, sometimes, the hivemetastore is not ready even though we wait for the port to be up
|
||||
// Workaround to sleep for 5 secs and retry
|
||||
Thread.sleep(5000);
|
||||
executeHiveCommand(hiveDropCmd);
|
||||
}
|
||||
|
||||
// Ensure table does not exist
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString();
|
||||
String stdout = callback.getStdout().toString();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
|
||||
Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
|
||||
}
|
||||
Pair<String, String> stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||
Assert.assertTrue("Dropped table " + hiveTableName + " exists!", stdOutErr.getLeft().isEmpty());
|
||||
|
||||
// Run Hoodie Java App
|
||||
{
|
||||
String[] cmd = null;
|
||||
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
||||
cmd = new String[]{
|
||||
HOODIE_JAVA_APP,
|
||||
"--hive-sync",
|
||||
"--table-path", hdfsUrl,
|
||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||
"--hive-table", hiveTableName
|
||||
};
|
||||
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||
cmd = new String[]{
|
||||
HOODIE_JAVA_APP,
|
||||
"--hive-sync",
|
||||
"--table-path", hdfsUrl,
|
||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||
"--use-multi-partition-keys",
|
||||
"--hive-table", hiveTableName
|
||||
};
|
||||
} else {
|
||||
cmd = new String[]{
|
||||
HOODIE_JAVA_APP,
|
||||
"--hive-sync",
|
||||
"--table-path", hdfsUrl,
|
||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
||||
"--non-partitioned",
|
||||
"--hive-table", hiveTableName
|
||||
};
|
||||
}
|
||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
||||
cmd, true);
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
||||
String cmd;
|
||||
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName;
|
||||
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||
+ " --use-multi-partition-keys";
|
||||
} else {
|
||||
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||
+ " --non-partitioned";
|
||||
}
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
|
||||
// Ensure table does exist
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||
Assert.assertEquals("Table exists", hiveTableName, stdout);
|
||||
}
|
||||
stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||
Assert.assertEquals("Table exists", hiveTableName, stdOutErr.getLeft());
|
||||
|
||||
// Ensure row count is 100 (without duplicates)
|
||||
{
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
||||
Integer.parseInt(stdout.trim()));
|
||||
}
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
||||
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||
|
||||
// Make the HDFS dataset non-hoodie and run the same query
|
||||
// Checks for interoperability with non-hoodie tables
|
||||
{
|
||||
// Delete Hoodie directory to make it non-hoodie dataset
|
||||
String[] cmd = new String[]{
|
||||
"hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie"
|
||||
};
|
||||
TestExecStartResultCallback callback =
|
||||
executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||
String stderr = callback.getStderr().toString().trim();
|
||||
String stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")");
|
||||
|
||||
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
||||
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
||||
callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
||||
stderr = callback.getStderr().toString().trim();
|
||||
stdout = callback.getStdout().toString().trim();
|
||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
||||
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
|
||||
Integer.parseInt(stdout.trim()));
|
||||
}
|
||||
// Delete Hoodie directory to make it non-hoodie dataset
|
||||
executeCommandStringInDocker(ADHOC_1_CONTAINER, "hdfs dfs -rm -r " + hdfsPath + "/.hoodie", true);
|
||||
|
||||
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
||||
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
|
||||
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,13 +23,10 @@
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
<junit.version>4.10</junit.version>
|
||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||
</properties>
|
||||
|
||||
@@ -52,12 +49,11 @@
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>3.3.1</version>
|
||||
<version>${scala-maven-plugin.version}</version>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>2.0.2</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
@@ -156,95 +152,14 @@
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<!-- Scala -->
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_2.11</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
@@ -265,12 +180,93 @@
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit-dep</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark (Packages) -->
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
@@ -287,5 +283,19 @@
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_2.11</artifactId>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit-dep</artifactId>
|
||||
<version>${junit-dep.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -121,40 +121,57 @@
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>io.javalin</groupId>
|
||||
<artifactId>javalin</artifactId>
|
||||
<version>2.8.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<version>2.9.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-core</artifactId>
|
||||
<version>2.9.7</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>2.9.7</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.javalin</groupId>
|
||||
<artifactId>javalin</artifactId>
|
||||
<version>2.8.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
<version>1.48</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
@@ -194,25 +211,6 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
<version>1.48</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Parent dependencies -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
@@ -223,20 +221,34 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -76,6 +76,124 @@
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<!-- Jetty -->
|
||||
<dependency>
|
||||
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
|
||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||
<artifactId>jetty-all</artifactId>
|
||||
<version>7.6.0.v20120127</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<version>7.6.0.v20120127</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Dropwizard Metrics -->
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.javalin</groupId>
|
||||
<artifactId>javalin</artifactId>
|
||||
@@ -89,43 +207,83 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<groupId>com.yammer.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Used for SQL templating -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>bijection-avro_2.11</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Kafka -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-avro-serializer</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-config</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-utils</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-schema-registry-client</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
@@ -133,13 +291,6 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty</groupId>
|
||||
<artifactId>jetty-server</artifactId>
|
||||
<version>7.6.0.v20120127</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
@@ -165,6 +316,7 @@
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
@@ -180,38 +332,13 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
@@ -220,163 +347,35 @@
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive - Test -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.yammer.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Used for SQL templating -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
<version>1.7.7</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>bijection-avro_2.11</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-avro-serializer</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-config</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-utils</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-schema-registry-client</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||
<artifactId>jetty-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
@@ -166,7 +166,7 @@ public class HoodieDeltaStreamer implements Serializable {
|
||||
public String propsFilePath =
|
||||
"file://" + System.getProperty("user.dir") + "/src/test/resources/delta-streamer-config/dfs-source.properties";
|
||||
|
||||
@Parameter(names = {"--hudi-conf"}, description = "Any configuration that can be set in the properties file "
|
||||
@Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
|
||||
+ "(using the CLI parameter \"--propsFilePath\") can also be passed command line using this parameter")
|
||||
public List<String> configs = new ArrayList<>();
|
||||
|
||||
|
||||
@@ -27,45 +27,90 @@
|
||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<!-- other hudi deps will come from hudi-hive-bundle -->
|
||||
<!-- other hoodie deps will come from hoodie-hive-bundle -->
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
@@ -92,7 +137,7 @@
|
||||
<artifactId>hive-shims</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-serde</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
@@ -109,49 +154,9 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter.common</groupId>
|
||||
<artifactId>objectsize</artifactId>
|
||||
<version>0.0.12</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
<artifactId>kryo</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -159,6 +164,7 @@
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
@@ -28,71 +28,28 @@
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libfb303</artifactId>
|
||||
<version>0.9.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<!-- All other hoodie deps will come from hoodie-hadoop-mr-bundle -->
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
@@ -105,43 +62,93 @@
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Thrift -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<!-- All other hudi deps will come from hudi-hadoop-mr-bundle -->
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libfb303</artifactId>
|
||||
<version>0.9.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -28,46 +28,16 @@
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
@@ -80,30 +50,70 @@
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Thrift -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<groupId>org.apache.thrift</groupId>
|
||||
<artifactId>libthrift</artifactId>
|
||||
<version>${thrift.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents-->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -127,6 +127,8 @@
|
||||
<pattern>parquet.schema</pattern>
|
||||
<shadedPattern>org.apache.hudi.parquet.schema</shadedPattern>
|
||||
</relocation>
|
||||
<!-- TODO: Revisit GH ISSUE #533 & PR#633-->
|
||||
<!--
|
||||
<relocation>
|
||||
<pattern>org.apache.hive.jdbc.</pattern>
|
||||
<shadedPattern>org.apache.hudi.org.apache.hive.jdbc.</shadedPattern>
|
||||
@@ -155,6 +157,11 @@
|
||||
<pattern>org.apache.hadoop.hive.service.</pattern>
|
||||
<shadedPattern>org.apache.hudi.org.apache.hadoop_hive.service.</shadedPattern>
|
||||
</relocation>
|
||||
<relocation>
|
||||
<pattern>org.apache.hadoop.hive.serde2.</pattern>
|
||||
<shadedPattern>org.apache.hudi.org.apache.hadoop_hive.serde2.</shadedPattern>
|
||||
</relocation>
|
||||
-->
|
||||
<relocation>
|
||||
<pattern>com.esotericsoftware.kryo.</pattern>
|
||||
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
|
||||
@@ -177,9 +184,8 @@
|
||||
<exclude>org.apache.derby:derby</exclude>
|
||||
<exclude>org.apache.hadoop:*</exclude>
|
||||
<exclude>org.apache.hbase:*</exclude>
|
||||
<!-- Just include hive-common, hive-service, hive-metastore and hive-jdbc -->
|
||||
<!-- Just include hive-common, hive-serde, hive-service, hive-metastore and hive-jdbc -->
|
||||
<exclude>org.apache.hive:hive-exec</exclude>
|
||||
<exclude>org.apache.hive:hive-serde</exclude>
|
||||
<exclude>org.apache.hive:hive-shims</exclude>
|
||||
<exclude>org.apache.spark:*</exclude>
|
||||
</excludes>
|
||||
@@ -206,102 +212,18 @@
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<!-- Scala -->
|
||||
<dependency>
|
||||
<groupId>org.scala-lang</groupId>
|
||||
<artifactId>scala-library</artifactId>
|
||||
<version>${scala.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_2.11</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
@@ -318,16 +240,141 @@
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark (Packages) -->
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive - Compile -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<classifier>standalone</classifier>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- TODO: Reinvestigate PR 633 -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-serde</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-metastore</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-common</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<!-- TODO: Reinvestigate PR 633 -->
|
||||
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_2.11</artifactId>
|
||||
<version>${scalatest.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
|
||||
@@ -28,8 +28,6 @@
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<properties>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
<junit.version>4.10</junit.version>
|
||||
<checkstyle.skip>true</checkstyle.skip>
|
||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||
<notice.file>HUDI_NOTICE.txt</notice.file>
|
||||
@@ -205,6 +203,98 @@
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<!-- Hoodie -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Dropwizard Metrics -->
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.javalin</groupId>
|
||||
<artifactId>javalin</artifactId>
|
||||
@@ -212,21 +302,114 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<groupId>com.yammer.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Used for SQL templating -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>bijection-avro_2.11</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Kafka -->
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-avro-serializer</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-config</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-utils</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-schema-registry-client</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<classifier>standalone</classifier>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Hoodie - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-common</artifactId>
|
||||
@@ -235,7 +418,6 @@
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
@@ -245,17 +427,7 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
<!-- Hadoop - Test -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
@@ -275,7 +447,13 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive - Test -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-exec</artifactId>
|
||||
@@ -283,191 +461,11 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-jdbc</artifactId>
|
||||
<version>${hive.version}</version>
|
||||
<classifier>standalone</classifier>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-hive</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-utilities</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-client</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>tests</classifier>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.yammer.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>2.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Used for SQL templating -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>stringtemplate</artifactId>
|
||||
<version>4.0.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.beust</groupId>
|
||||
<artifactId>jcommander</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
<version>1.7.7</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>bijection-avro_2.11</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-avro-serializer</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-config</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-utils</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-schema-registry-client</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
||||
|
||||
638
pom.xml
638
pom.xml
@@ -24,11 +24,9 @@
|
||||
<artifactId>hudi</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>0.5.0-SNAPSHOT</version>
|
||||
<description>Hoodie is a Apache Spark library that provides the ability to efficiently do
|
||||
incremental processing on datasets in HDFS
|
||||
</description>
|
||||
<url>https://github.com/uber/hudi</url>
|
||||
<name>Hoodie</name>
|
||||
<description>Apache Hudi brings stream style processing on big data</description>
|
||||
<url>https://github.com/apache/incubator-hudi</url>
|
||||
<name>Hudi</name>
|
||||
|
||||
<modules>
|
||||
<module>hudi-common</module>
|
||||
@@ -58,15 +56,15 @@
|
||||
</licenses>
|
||||
|
||||
<organization>
|
||||
<name>Uber Technologies Inc.</name>
|
||||
<url>http://www.uber.com/</url>
|
||||
<name>The Apache Software Foundation</name>
|
||||
<url>https://www.apache.org</url>
|
||||
</organization>
|
||||
|
||||
<developers>
|
||||
<developer>
|
||||
<id>vinothchandar</id>
|
||||
<name>Vinoth Chandar</name>
|
||||
<organization>Uber</organization>
|
||||
<organization>Confluent Inc</organization>
|
||||
</developer>
|
||||
<developer>
|
||||
<id>prasannarajaperumal</id>
|
||||
@@ -129,10 +127,13 @@
|
||||
<maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
|
||||
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
|
||||
<fasterxml.version>2.6.7</fasterxml.version>
|
||||
<glassfish.version>2.17</glassfish.version>
|
||||
<parquet.version>1.8.1</parquet.version>
|
||||
<junit.version>4.11</junit.version>
|
||||
<mockito.version>1.9.5</mockito.version>
|
||||
<junit-dep.version>4.10</junit-dep.version>
|
||||
<mockito.version>1.10.19</mockito.version>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
<slf4j.version>1.7.5</slf4j.version>
|
||||
<joda.version>2.9.9</joda.version>
|
||||
<hadoop.version>2.7.3</hadoop.version>
|
||||
<hive.groupid>org.apache.hive</hive.groupid>
|
||||
@@ -142,12 +143,17 @@
|
||||
<avro.version>1.7.7</avro.version>
|
||||
<scala.version>2.11.8</scala.version>
|
||||
<scala.libversion>2.11</scala.libversion>
|
||||
<scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
|
||||
<scalatest.version>3.0.1</scalatest.version>
|
||||
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
|
||||
<thrift.version>0.12.0</thrift.version>
|
||||
<hbase.version>1.2.3</hbase.version>
|
||||
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
|
||||
<notice.dir>${project.basedir}</notice.dir>
|
||||
<notice.file>NOTICE.txt</notice.file>
|
||||
<skipTests>false</skipTests>
|
||||
<skipITs>${skipTests}</skipITs>
|
||||
<skipUTs>${skipTests}</skipUTs>
|
||||
</properties>
|
||||
|
||||
<scm>
|
||||
@@ -232,32 +238,32 @@
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.scalastyle</groupId>
|
||||
<artifactId>scalastyle-maven-plugin</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<verbose>false</verbose>
|
||||
<failOnViolation>true</failOnViolation>
|
||||
<includeTestSourceDirectory>true</includeTestSourceDirectory>
|
||||
<failOnWarning>false</failOnWarning>
|
||||
<sourceDirectory>${project.basedir}/src/main/scala</sourceDirectory>
|
||||
<testSourceDirectory>${project.basedir}/src/test/scala</testSourceDirectory>
|
||||
<configLocation>style/scalastyle-config.xml</configLocation>
|
||||
<outputEncoding>UTF-8</outputEncoding>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>compile</phase>
|
||||
<goals>
|
||||
<goal>check</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<groupId>org.scalastyle</groupId>
|
||||
<artifactId>scalastyle-maven-plugin</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<verbose>false</verbose>
|
||||
<failOnViolation>true</failOnViolation>
|
||||
<includeTestSourceDirectory>true</includeTestSourceDirectory>
|
||||
<failOnWarning>false</failOnWarning>
|
||||
<sourceDirectory>${project.basedir}/src/main/scala</sourceDirectory>
|
||||
<testSourceDirectory>${project.basedir}/src/test/scala</testSourceDirectory>
|
||||
<configLocation>style/scalastyle-config.xml</configLocation>
|
||||
<outputEncoding>UTF-8</outputEncoding>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>compile</phase>
|
||||
<goals>
|
||||
<goal>check</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<version>${maven-compiler-plugin.version}</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
@@ -292,6 +298,7 @@
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>${maven-surefire-plugin.version}</version>
|
||||
<configuration>
|
||||
<skip>${skipUTs}</skip>
|
||||
<!-- Sets the VM argument line used when unit tests are run. -->
|
||||
<argLine>${surefireArgLine}</argLine>
|
||||
<systemPropertyVariables>
|
||||
@@ -452,18 +459,152 @@
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.3.1</version>
|
||||
<scope>test</scope>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>${slf4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Fasterxml -->
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-core</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>${fasterxml.version}.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||
<artifactId>jackson-datatype-guava</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Glassfish -->
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.core</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
<version>${glassfish.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.connectors</groupId>
|
||||
<artifactId>jersey-apache-connector</artifactId>
|
||||
<version>${glassfish.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.glassfish.jersey.containers</groupId>
|
||||
<artifactId>jersey-container-servlet-core</artifactId>
|
||||
<version>${glassfish.version}</version>
|
||||
</dependency>
|
||||
<!-- Needed for running HiveServer for Tests -->
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||
<artifactId>jetty-all</artifactId>
|
||||
<scope>test</scope>
|
||||
<version>7.6.0.v20120127</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Avro -->
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
<version>${avro.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
<version>${avro.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hive-bundle</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet (Twitter) -->
|
||||
<!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hive-bundle</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark -->
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Spark (Packages) -->
|
||||
<dependency>
|
||||
<groupId>com.databricks</groupId>
|
||||
<artifactId>spark-avro_2.11</artifactId>
|
||||
<version>4.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Dropwizard Metrics -->
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-graphite</artifactId>
|
||||
<version>${metrics.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>${metrics.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -473,28 +614,119 @@
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>${log4j.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<!-- Used by hudi-hive -->
|
||||
<groupId>joda-time</groupId>
|
||||
<artifactId>joda-time</artifactId>
|
||||
<version>${joda.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- we have to stay at <= 16.0, due to issues with HBase client -->
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>15.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>xerces</groupId>
|
||||
<artifactId>xercesImpl</artifactId>
|
||||
<version>2.9.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>xalan</groupId>
|
||||
<artifactId>xalan</artifactId>
|
||||
<version>2.7.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
<version>5.17.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons -->
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
<version>2.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
<version>2.1.1</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Httpcomponents -->
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Jackson -->
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-core-asl</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-jaxrs</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-xc</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.*</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
@@ -505,48 +737,11 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
<version>${avro.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- we have to stay at <= 16.0, due to issues with HBase client -->
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
<version>15.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hadoop Libraries -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
@@ -559,12 +754,6 @@
|
||||
<artifactId>hadoop-auth</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.*</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
@@ -591,214 +780,36 @@
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.rocksdb</groupId>
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
<version>5.17.2</version>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
<version>${hadoop.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
<version>2.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<version>1.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
<version>2.6</version>
|
||||
</dependency>
|
||||
<!-- Storage formats -->
|
||||
<!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-hive-bundle</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>1.6.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hive-bundle</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.**</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.**</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- HBase -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
<version>${avro.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- Metrics -->
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-graphite</artifactId>
|
||||
<version>${metrics.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.dropwizard.metrics</groupId>
|
||||
<artifactId>metrics-core</artifactId>
|
||||
<version>${metrics.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>xerces</groupId>
|
||||
<artifactId>xercesImpl</artifactId>
|
||||
<version>2.9.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>xalan</groupId>
|
||||
<artifactId>xalan</artifactId>
|
||||
<version>2.7.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-dbcp</groupId>
|
||||
<artifactId>commons-dbcp</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-pool</groupId>
|
||||
<artifactId>commons-pool</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpcore</artifactId>
|
||||
<version>4.3.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.3.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>1.7.5</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.5</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-configuration2</artifactId>
|
||||
<version>2.1.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-annotations</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-core</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>${fasterxml.version}.1</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
<version>${fasterxml.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-core-asl</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-jaxrs</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-xc</artifactId>
|
||||
<version>${codehaus-jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
<artifactId>hive-service</artifactId>
|
||||
@@ -919,34 +930,28 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
<version>${hadoop.version}</version>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.3.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
<version>1.10.19</version>
|
||||
<version>${mockito.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<!--Used to test execution in task executor after de-serializing-->
|
||||
<groupId>com.esotericsoftware</groupId>
|
||||
@@ -954,24 +959,29 @@
|
||||
<version>4.0.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- Needed for running HiveServer for Tests -->
|
||||
<dependency>
|
||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||
<artifactId>jetty-all</artifactId>
|
||||
<scope>test</scope>
|
||||
<version>7.6.0.v20120127</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</dependencyManagement>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>Maven repository</id>
|
||||
<url>https://central.maven.org/maven2/</url>
|
||||
<id>Maven Central</id>
|
||||
<name>Maven Repository</name>
|
||||
<url>https://repo.maven.apache.org/maven2</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
<repository>
|
||||
<id>cloudera-repo-releases</id>
|
||||
<url>https://repository.cloudera.com/artifactory/public/</url>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
|
||||
17
tools/run_travis_tests.sh
Executable file
17
tools/run_travis_tests.sh
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
mode=$1
|
||||
|
||||
if [ "$mode" = "unit" ];
|
||||
then
|
||||
echo "Running Unit Tests"
|
||||
mvn test -DskipITs=true -B
|
||||
elif [ "$mode" = "integration" ];
|
||||
then
|
||||
echo "Running Integration Tests"
|
||||
mvn verify -DskipUTs=true -B
|
||||
else
|
||||
echo "Unknown mode $mode"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user