[HUDI-68] Pom cleanup & demo automation (#846)
- [HUDI-172] Cleanup Maven POM/Classpath - Fix ordering of dependencies in poms, to enable better resolution - Idea is to place more specific ones at the top - And place dependencies which use them below them - [HUDI-68] : Automate demo steps on docker setup - Move hive queries from hive cli to beeline - Standardize on taking query input from text command files - Deltastreamer ingest, also does hive sync in a single step - Spark Incremental Query materialized as a derived Hive table using datasource - Fix flakiness in HDFS spin up and output comparison - Code cleanup around streamlining and loc reduction - Also fixed pom to not shade some hive classs in spark, to enable hive sync
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -15,6 +15,7 @@ target/
|
|||||||
*.war
|
*.war
|
||||||
*.ear
|
*.ear
|
||||||
*.db
|
*.db
|
||||||
|
*.patch
|
||||||
|
|
||||||
######################
|
######################
|
||||||
# OSX
|
# OSX
|
||||||
@@ -74,5 +75,4 @@ dependency-reduced-pom.xml
|
|||||||
#######################################
|
#######################################
|
||||||
# Docker
|
# Docker
|
||||||
#######################################
|
#######################################
|
||||||
hoodie-integ-test/compose_env
|
hudi-integ-test/compose_env
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,9 @@ jdk:
|
|||||||
- oraclejdk8
|
- oraclejdk8
|
||||||
sudo: required
|
sudo: required
|
||||||
env:
|
env:
|
||||||
- HUDI_QUIETER_LOGGING=1
|
- HUDI_QUIETER_LOGGING=1 TEST_SUITE=unit
|
||||||
|
- TEST_SUITE=integration
|
||||||
|
install: true
|
||||||
services:
|
services:
|
||||||
- docker
|
- docker
|
||||||
cache:
|
cache:
|
||||||
@@ -14,3 +16,5 @@ notifications:
|
|||||||
rooms:
|
rooms:
|
||||||
- secure: WNIZPBY//xf/xTJL1YUPzvPUDwjawaMM4IJ6IqxjRGcZCmuhNVu2XTJ3aL1g6X7ZcJKxJuwoU/TbSO8Dl6rgWSo/2OfyzBd4ks+hgeCsdycccTcvO8giQO1DOUGUSRdvUzOvKjWVK7iARYzQhoZawAYwI09UJLlwhYRCJ1IKc1ZksrEt964GeEmPyJbwMoZOJVUU84jJIAZPIpOFGTKM652FMermg9yaY2W5oSjDXaV98z0/mJV4Ry++J2v0fvoDs5HxkXYhZJP+dpWR82KDr6Q6LGL5/IlJ+b+IH3pF8LyKR4nCH6l1EZ8KpoFZapyYWYQpXMfQoF2K/JEQkpz1EqBCeEDSJ2+j1PPLhOWXd7ok4DsS26S8BP2ImvyXwua51THN1/r1fCGSIdxiQ5C8aeYmPCSr+oLChCVivEG2eeU34Z1nQJ5aDymNGeFE9qUUpjS0ETfFcjI/WQaA+FiYiPkDfeAoT1+6ySdY7l9gJhMygupILjq57IHbqx4nEr/8AB3Rqb8iIDTWDXgUBI9xKmty36zjIGcVOsCT/SGPccxvEJBXQk8uQqs/rDhaA/ErJPMLX/2b7ElSSObKFdjpMaxVvZIE6wvMLJpIYfChDoXwgfhN6zlAFZrEib7PFI4dGkS8u4wkkHkBS7C+uz2e92EhsAB+BIhUR1M3NQ33+Is=
|
- secure: WNIZPBY//xf/xTJL1YUPzvPUDwjawaMM4IJ6IqxjRGcZCmuhNVu2XTJ3aL1g6X7ZcJKxJuwoU/TbSO8Dl6rgWSo/2OfyzBd4ks+hgeCsdycccTcvO8giQO1DOUGUSRdvUzOvKjWVK7iARYzQhoZawAYwI09UJLlwhYRCJ1IKc1ZksrEt964GeEmPyJbwMoZOJVUU84jJIAZPIpOFGTKM652FMermg9yaY2W5oSjDXaV98z0/mJV4Ry++J2v0fvoDs5HxkXYhZJP+dpWR82KDr6Q6LGL5/IlJ+b+IH3pF8LyKR4nCH6l1EZ8KpoFZapyYWYQpXMfQoF2K/JEQkpz1EqBCeEDSJ2+j1PPLhOWXd7ok4DsS26S8BP2ImvyXwua51THN1/r1fCGSIdxiQ5C8aeYmPCSr+oLChCVivEG2eeU34Z1nQJ5aDymNGeFE9qUUpjS0ETfFcjI/WQaA+FiYiPkDfeAoT1+6ySdY7l9gJhMygupILjq57IHbqx4nEr/8AB3Rqb8iIDTWDXgUBI9xKmty36zjIGcVOsCT/SGPccxvEJBXQk8uQqs/rDhaA/ErJPMLX/2b7ElSSObKFdjpMaxVvZIE6wvMLJpIYfChDoXwgfhN6zlAFZrEib7PFI4dGkS8u4wkkHkBS7C+uz2e92EhsAB+BIhUR1M3NQ33+Is=
|
||||||
on_pull_requests: false
|
on_pull_requests: false
|
||||||
|
script:
|
||||||
|
tools/run_travis_tests.sh $TEST_SUITE
|
||||||
5
docker/demo/compaction.commands
Normal file
5
docker/demo/compaction.commands
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
connect --path /user/hive/warehouse/stock_ticks_mor
|
||||||
|
compactions show all
|
||||||
|
compaction schedule
|
||||||
|
compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
|
||||||
|
|
||||||
27
docker/demo/config/dfs-source.properties
Normal file
27
docker/demo/config/dfs-source.properties
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
################################################################################
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
include=base.properties
|
||||||
|
# Key fields, for kafka example
|
||||||
|
hoodie.datasource.write.recordkey.field=key
|
||||||
|
hoodie.datasource.write.partitionpath.field=date
|
||||||
|
# Schema provider props (change to absolute path based on your installation)
|
||||||
|
hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
|
||||||
|
hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
|
||||||
|
# DFS Source
|
||||||
|
hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/
|
||||||
21
docker/demo/get_min_commit_time.sh
Executable file
21
docker/demo/get_min_commit_time.sh
Executable file
@@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
################################################################################
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
MIN_COMMIT_TIME=`hdfs dfs -ls -t /user/hive/warehouse/stock_ticks_cow/.hoodie/*.commit | head -1 | awk -F'/' ' { print $7 } ' | awk -F'.' ' { print $1 } '`
|
||||||
|
echo $MIN_COMMIT_TIME;
|
||||||
11
docker/demo/hive-batch1.commands
Normal file
11
docker/demo/hive-batch1.commands
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
add jar ${hudi.hadoop.bundle};
|
||||||
|
|
||||||
|
select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG';
|
||||||
|
select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
|
||||||
|
select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
|
||||||
|
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG';
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
|
||||||
|
|
||||||
|
!quit
|
||||||
9
docker/demo/hive-batch2-after-compaction.commands
Normal file
9
docker/demo/hive-batch2-after-compaction.commands
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
add jar ${hudi.hadoop.bundle};
|
||||||
|
|
||||||
|
select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
|
||||||
|
select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
|
||||||
|
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
|
||||||
|
|
||||||
|
!quit
|
||||||
10
docker/demo/hive-incremental.commands
Normal file
10
docker/demo/hive-incremental.commands
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
add jar ${hudi.hadoop.bundle};
|
||||||
|
|
||||||
|
set hoodie.stock_ticks_cow.consume.mode=INCREMENTAL;
|
||||||
|
set hoodie.stock_ticks_cow.consume.max.commits=3;
|
||||||
|
set hoodie.stock_ticks_cow.consume.start.timestamp=${min.commit.time};
|
||||||
|
|
||||||
|
select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
|
||||||
|
|
||||||
|
!quit
|
||||||
|
|
||||||
10
docker/demo/hive-table-check.commands
Normal file
10
docker/demo/hive-table-check.commands
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
add jar ${hudi.hadoop.bundle};
|
||||||
|
show tables;
|
||||||
|
|
||||||
|
show partitions stock_ticks_cow;
|
||||||
|
show partitions stock_ticks_mor;
|
||||||
|
show partitions stock_ticks_mor_rt;
|
||||||
|
|
||||||
|
!quit
|
||||||
|
|
||||||
|
|
||||||
30
docker/demo/sparksql-batch1.commands
Normal file
30
docker/demo/sparksql-batch1.commands
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
spark.sql("show tables").show(100, false)
|
||||||
|
// Copy-On-Write table
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
|
||||||
|
|
||||||
|
// Merge-On-Read table
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
|
||||||
|
|
||||||
|
System.exit(0)
|
||||||
29
docker/demo/sparksql-batch2.commands
Normal file
29
docker/demo/sparksql-batch2.commands
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Copy-On-Write table
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
|
||||||
|
|
||||||
|
// Merge-On-Read table
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
|
||||||
|
|
||||||
|
System.exit(0)
|
||||||
59
docker/demo/sparksql-incremental.commands
Normal file
59
docker/demo/sparksql-incremental.commands
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.hudi.DataSourceReadOptions;
|
||||||
|
import org.apache.hudi.DataSourceWriteOptions;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.HoodieDataSourceHelpers;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
|
||||||
|
val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
|
||||||
|
val beginInstantTime = HoodieDataSourceHelpers.listCommitsSince(fs, "/user/hive/warehouse/stock_ticks_cow", "00000").get(0)
|
||||||
|
val hoodieIncViewDF = spark.read.format("org.apache.hudi").
|
||||||
|
option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL).
|
||||||
|
option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, beginInstantTime).
|
||||||
|
load("/user/hive/warehouse/stock_ticks_cow");
|
||||||
|
hoodieIncViewDF.registerTempTable("stock_ticks_cow_incr")
|
||||||
|
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_incr where symbol = 'GOOG'").show(100, false);
|
||||||
|
|
||||||
|
spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_incr").
|
||||||
|
write.format("org.apache.hudi").
|
||||||
|
option("hoodie.insert.shuffle.parallelism", "2").
|
||||||
|
option("hoodie.upsert.shuffle.parallelism","2").
|
||||||
|
option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL).
|
||||||
|
option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
|
||||||
|
option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
|
||||||
|
option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
|
||||||
|
option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
|
||||||
|
option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor").
|
||||||
|
option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor").
|
||||||
|
option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
|
||||||
|
option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
|
||||||
|
option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
|
||||||
|
option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
|
||||||
|
option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
|
||||||
|
option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
|
||||||
|
mode(SaveMode.Overwrite).
|
||||||
|
save("/user/hive/warehouse/stock_ticks_derived_mor");
|
||||||
|
|
||||||
|
spark.sql("show tables").show(20, false)
|
||||||
|
spark.sql("select count(*) from stock_ticks_derived_mor").show(20, false)
|
||||||
|
spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)
|
||||||
|
|
||||||
|
System.exit(0);
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
HUDI_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
HOODIE_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
|
||||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||||
echo "setting hadoop conf dir"
|
echo "setting hadoop conf dir"
|
||||||
HADOOP_CONF_DIR="/etc/hadoop/conf"
|
HADOOP_CONF_DIR="/etc/hadoop/conf"
|
||||||
@@ -13,5 +13,4 @@ fi
|
|||||||
if [ -z "$CLIENT_JAR" ]; then
|
if [ -z "$CLIENT_JAR" ]; then
|
||||||
echo "client jar location not set"
|
echo "client jar location not set"
|
||||||
fi
|
fi
|
||||||
echo "java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap"
|
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
|
||||||
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap
|
|
||||||
|
|||||||
@@ -29,8 +29,6 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<spring.shell.version>1.2.0.RELEASE</spring.shell.version>
|
<spring.shell.version>1.2.0.RELEASE</spring.shell.version>
|
||||||
<jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass>
|
<jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass>
|
||||||
<log4j.version>1.2.17</log4j.version>
|
|
||||||
<junit.version>4.10</junit.version>
|
|
||||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
@@ -61,7 +59,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>net.alchim31.maven</groupId>
|
<groupId>net.alchim31.maven</groupId>
|
||||||
<artifactId>scala-maven-plugin</artifactId>
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
<version>3.2.1</version>
|
<version>${scala-maven-plugin.version}</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
@@ -133,23 +131,42 @@
|
|||||||
|
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Scala -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scala-lang</groupId>
|
<groupId>org.scala-lang</groupId>
|
||||||
<artifactId>scala-library</artifactId>
|
<artifactId>scala-library</artifactId>
|
||||||
<version>${scala.version}</version>
|
<version>${scala.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.shell</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>spring-shell</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
<version>${spring.shell.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>de.vandermeer</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>asciitable</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
<version>0.2.5</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hive</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-utilities</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
@@ -159,6 +176,24 @@
|
|||||||
<artifactId>spark-sql_2.11</artifactId>
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.shell</groupId>
|
||||||
|
<artifactId>spring-shell</artifactId>
|
||||||
|
<version>${spring.shell.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>de.vandermeer</groupId>
|
||||||
|
<artifactId>asciitable</artifactId>
|
||||||
|
<version>0.2.5</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.jakewharton.fliptables</groupId>
|
<groupId>com.jakewharton.fliptables</groupId>
|
||||||
<artifactId>fliptables</artifactId>
|
<artifactId>fliptables</artifactId>
|
||||||
@@ -166,60 +201,25 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
<groupId>joda-time</groupId>
|
||||||
<artifactId>log4j</artifactId>
|
<artifactId>joda-time</artifactId>
|
||||||
<version>${log4j.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-hive</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-client</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit-dep</artifactId>
|
<artifactId>junit-dep</artifactId>
|
||||||
<version>${junit.version}</version>
|
<version>${junit-dep.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>joda-time</groupId>
|
|
||||||
<artifactId>joda-time</artifactId>
|
|
||||||
<version>2.9.6</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-utilities</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -217,12 +217,23 @@ public class CompactionCommand implements CommandMarker {
|
|||||||
final String sparkMemory,
|
final String sparkMemory,
|
||||||
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
|
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
|
||||||
final String retry,
|
final String retry,
|
||||||
@CliOption(key = "compactionInstant", mandatory = true, help = "Base path for the target hoodie dataset")
|
@CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
|
||||||
final String compactionInstantTime) throws Exception {
|
String compactionInstantTime) throws Exception {
|
||||||
boolean initialized = HoodieCLI.initConf();
|
boolean initialized = HoodieCLI.initConf();
|
||||||
HoodieCLI.initFS(initialized);
|
HoodieCLI.initFS(initialized);
|
||||||
|
|
||||||
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
|
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||||
|
if (null == compactionInstantTime) {
|
||||||
|
// pick outstanding one with lowest timestamp
|
||||||
|
Option<String> firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
|
||||||
|
.filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
|
||||||
|
.equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
|
||||||
|
if (!firstPendingInstant.isPresent()) {
|
||||||
|
return "NO PENDING COMPACTION TO RUN";
|
||||||
|
}
|
||||||
|
compactionInstantTime = firstPendingInstant.get();
|
||||||
|
}
|
||||||
|
|
||||||
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
|
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
|
||||||
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
|
||||||
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
|
||||||
|
|||||||
@@ -68,6 +68,7 @@
|
|||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
@@ -78,6 +79,71 @@
|
|||||||
<artifactId>hudi-timeline-service</artifactId>
|
<artifactId>hudi-timeline-service</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-hadoop</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Dropwizard Metrics -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-graphite</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
<version>1.48</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.htrace</groupId>
|
||||||
|
<artifactId>htrace-core</artifactId>
|
||||||
|
<version>3.0.4</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
@@ -117,6 +183,14 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hbase -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hbase</groupId>
|
||||||
|
<artifactId>hbase-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Tests -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
@@ -125,97 +199,18 @@
|
|||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
|
||||||
<artifactId>metrics-graphite</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
|
||||||
<artifactId>metrics-core</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
<version>1.48</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Parent dependencies -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-hadoop</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-exec</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.mockito</groupId>
|
|
||||||
<artifactId>mockito-all</artifactId>
|
|
||||||
<version>1.10.19</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-hadoop-mr</artifactId>
|
<artifactId>hudi-hadoop-mr</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- Hbase dependencies -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hbase</groupId>
|
|
||||||
<artifactId>hbase-client</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.htrace</groupId>
|
|
||||||
<artifactId>htrace-core</artifactId>
|
|
||||||
<version>3.0.4</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
|
<!-- HBase - Tests -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hbase</groupId>
|
<groupId>org.apache.hbase</groupId>
|
||||||
<artifactId>hbase-testing-util</artifactId>
|
<artifactId>hbase-testing-util</artifactId>
|
||||||
<version>1.2.3</version>
|
<version>${hbase.version}</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
@@ -232,5 +227,19 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive - Tests -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-exec</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-all</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -43,7 +43,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-jar-plugin</artifactId>
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
<version>2.5</version>
|
<version>${maven-jar-plugin.version}</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<goals>
|
<goals>
|
||||||
@@ -79,77 +79,20 @@
|
|||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<!-- Fasterxml -->
|
||||||
<groupId>org.rocksdb</groupId>
|
|
||||||
<artifactId>rocksdbjni</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
<version>${junit.version}</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-annotations</artifactId>
|
<artifactId>jackson-annotations</artifactId>
|
||||||
<version>${fasterxml.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.parquet</groupId>
|
<groupId>org.apache.avro</groupId>
|
||||||
<artifactId>parquet-avro</artifactId>
|
<artifactId>avro</artifactId>
|
||||||
<version>${parquet.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.mockito</groupId>
|
|
||||||
<artifactId>mockito-all</artifactId>
|
|
||||||
<version>1.10.19</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
<version>4.5.4</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-codec</groupId>
|
|
||||||
<artifactId>commons-codec</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>fluent-hc</artifactId>
|
|
||||||
<version>4.5.4</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.esotericsoftware</groupId>
|
|
||||||
<artifactId>kryo</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.avro</groupId>
|
<groupId>org.apache.avro</groupId>
|
||||||
@@ -161,21 +104,91 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.stefanbirkner</groupId>
|
<groupId>org.apache.parquet</groupId>
|
||||||
<artifactId>system-rules</artifactId>
|
<artifactId>parquet-avro</artifactId>
|
||||||
<version>1.16.0</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Twitter -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.twitter.common</groupId>
|
<groupId>com.twitter.common</groupId>
|
||||||
<artifactId>objectsize</artifactId>
|
<artifactId>objectsize</artifactId>
|
||||||
<version>0.0.12</version>
|
<version>0.0.12</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-codec</groupId>
|
||||||
|
<artifactId>commons-codec</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>fluent-hc</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.rocksdb</groupId>
|
||||||
|
<artifactId>rocksdbjni</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-all</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.esotericsoftware</groupId>
|
||||||
|
<artifactId>kryo</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>kryo-shaded</artifactId>
|
<artifactId>kryo-shaded</artifactId>
|
||||||
<version>4.0.2</version>
|
<version>4.0.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.stefanbirkner</groupId>
|
||||||
|
<artifactId>system-rules</artifactId>
|
||||||
|
<version>1.16.0</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -30,32 +30,52 @@
|
|||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hudi -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.avro</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>avro</artifactId>
|
||||||
<version>${project.version}</version>
|
</dependency>
|
||||||
<classifier>tests</classifier>
|
|
||||||
<type>test-jar</type>
|
<!-- Parquet -->
|
||||||
<scope>test</scope>
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet (Twitter) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-logging</groupId>
|
||||||
|
<artifactId>commons-logging</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter.common</groupId>
|
||||||
|
<artifactId>objectsize</artifactId>
|
||||||
|
<version>0.0.12</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-auth</artifactId>
|
<artifactId>hadoop-auth</artifactId>
|
||||||
@@ -64,52 +84,41 @@
|
|||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-jdbc</artifactId>
|
<artifactId>hive-jdbc</artifactId>
|
||||||
<version>${hive.version}</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>commons-logging</groupId>
|
|
||||||
<artifactId>commons-logging</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-exec</artifactId>
|
<artifactId>hive-exec</artifactId>
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-logging</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>commons-logging</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
</dependency>
|
<version>${project.version}</version>
|
||||||
<dependency>
|
<classifier>tests</classifier>
|
||||||
<groupId>org.apache.parquet</groupId>
|
<type>test-jar</type>
|
||||||
<artifactId>parquet-avro</artifactId>
|
<scope>test</scope>
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter.common</groupId>
|
|
||||||
<artifactId>objectsize</artifactId>
|
|
||||||
<version>0.0.12</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>kryo</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
|||||||
@@ -31,6 +31,81 @@
|
|||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hadoop-mr</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet (Twitter) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.thrift</groupId>
|
||||||
|
<artifactId>libthrift</artifactId>
|
||||||
|
<version>${thrift.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>joda-time</groupId>
|
||||||
|
<artifactId>joda-time</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-pool</groupId>
|
||||||
|
<artifactId>commons-pool</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpcore</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
@@ -48,66 +123,17 @@
|
|||||||
<artifactId>hadoop-auth</artifactId>
|
<artifactId>hadoop-auth</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.thrift</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>libthrift</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
<version>${thrift.version}</version>
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Hive -->
|
||||||
<groupId>joda-time</groupId>
|
|
||||||
<artifactId>joda-time</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Apache commons -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-pool</groupId>
|
|
||||||
<artifactId>commons-pool</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-io</groupId>
|
|
||||||
<artifactId>commons-io</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Logging -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-log4j12</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Hadoop Testing -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-service</artifactId>
|
<artifactId>hive-service</artifactId>
|
||||||
@@ -139,64 +165,53 @@
|
|||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
<classifier>tests</classifier>
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-hadoop-mr</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware.kryo</groupId>
|
<groupId>com.esotericsoftware.kryo</groupId>
|
||||||
<artifactId>kryo</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
<version>2.21</version>
|
<version>2.21</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
<artifactId>jetty-all</artifactId>
|
<artifactId>jetty-all</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop - Test -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@@ -213,7 +228,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-jar-plugin</artifactId>
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
<version>2.5</version>
|
<version>${maven-jar-plugin.version}</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<goals>
|
<goals>
|
||||||
|
|||||||
@@ -9,32 +9,57 @@
|
|||||||
<artifactId>hudi-integ-test</artifactId>
|
<artifactId>hudi-integ-test</artifactId>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<!-- Glassfish -->
|
||||||
<groupId>org.glassfish.jersey.connectors</groupId>
|
<!-- needs to be at top -->
|
||||||
<artifactId>jersey-apache-connector</artifactId>
|
|
||||||
<version>2.17</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.glassfish.jersey.core</groupId>
|
<groupId>org.glassfish.jersey.core</groupId>
|
||||||
<artifactId>jersey-server</artifactId>
|
<artifactId>jersey-server</artifactId>
|
||||||
<version>2.17</version>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.glassfish.jersey.connectors</groupId>
|
||||||
|
<artifactId>jersey-apache-connector</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.glassfish.jersey.containers</groupId>
|
<groupId>org.glassfish.jersey.containers</groupId>
|
||||||
<artifactId>jersey-container-servlet-core</artifactId>
|
<artifactId>jersey-container-servlet-core</artifactId>
|
||||||
<version>2.17</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Docker-Java - Test-->
|
||||||
|
<!-- needs to be at top to force javax.ws version -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.docker-java</groupId>
|
||||||
|
<artifactId>docker-java</artifactId>
|
||||||
|
<version>3.1.2</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark</artifactId>
|
<artifactId>hudi-spark</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.glassfish.**</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Import -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hadoop-sparkworker-docker</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<type>pom</type>
|
||||||
|
<scope>import</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
@@ -43,12 +68,6 @@
|
|||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.awaitility</groupId>
|
|
||||||
<artifactId>awaitility</artifactId>
|
|
||||||
<version>3.1.2</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark</artifactId>
|
<artifactId>hudi-spark</artifactId>
|
||||||
@@ -56,56 +75,39 @@
|
|||||||
<classifier>tests</classifier>
|
<classifier>tests</classifier>
|
||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.glassfish.**</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml - Test-->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||||
|
<artifactId>jackson-datatype-guava</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.awaitility</groupId>
|
||||||
|
<artifactId>awaitility</artifactId>
|
||||||
|
<version>3.1.2</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.guava</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>guava</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>20.0</version>
|
<version>20.0</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-annotations</artifactId>
|
|
||||||
<version>2.6.4</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-databind</artifactId>
|
|
||||||
<version>2.6.4</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
|
||||||
<artifactId>jackson-datatype-guava</artifactId>
|
|
||||||
<version>2.9.4</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.github.docker-java</groupId>
|
|
||||||
<artifactId>docker-java</artifactId>
|
|
||||||
<version>3.1.0-rc-3</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.glassfish.**</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-hadoop-sparkworker-docker</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<type>pom</type>
|
|
||||||
<scope>import</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
@@ -150,12 +152,25 @@
|
|||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-failsafe-plugin</artifactId>
|
<artifactId>maven-failsafe-plugin</artifactId>
|
||||||
<version>2.22.0</version>
|
<version>2.22.0</version>
|
||||||
|
<configuration>
|
||||||
|
<includes>
|
||||||
|
<include>**/ITT*.java</include>
|
||||||
|
</includes>
|
||||||
|
</configuration>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
|
<phase>integration-test</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>integration-test</goal>
|
<goal>integration-test</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>verify</id>
|
||||||
|
<phase>verify</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>verify</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
@@ -179,7 +194,7 @@
|
|||||||
</execution>
|
</execution>
|
||||||
<execution>
|
<execution>
|
||||||
<id>down</id>
|
<id>down</id>
|
||||||
<phase>integration-test</phase>
|
<phase>post-integration-test</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>down</goal>
|
<goal>down</goal>
|
||||||
</goals>
|
</goals>
|
||||||
|
|||||||
@@ -31,14 +31,14 @@ import com.github.dockerjava.core.DockerClientBuilder;
|
|||||||
import com.github.dockerjava.core.DockerClientConfig;
|
import com.github.dockerjava.core.DockerClientConfig;
|
||||||
import com.github.dockerjava.core.command.ExecStartResultCallback;
|
import com.github.dockerjava.core.command.ExecStartResultCallback;
|
||||||
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
|
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
|
||||||
import com.google.common.collect.ImmutableList;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang3.tuple.Pair;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
@@ -52,38 +52,64 @@ public abstract class ITTestBase {
|
|||||||
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
|
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
|
||||||
protected static final String HIVESERVER = "/hiveserver";
|
protected static final String HIVESERVER = "/hiveserver";
|
||||||
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
||||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
|
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_app.sh";
|
||||||
protected static final String HUDI_HADOOP_BUNDLE =
|
protected static final String HUDI_HADOOP_BUNDLE =
|
||||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
||||||
protected static final String HUDI_HIVE_BUNDLE =
|
protected static final String HUDI_HIVE_BUNDLE =
|
||||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
|
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
|
||||||
protected static final String HUDI_SPARK_BUNDLE =
|
protected static final String HUDI_SPARK_BUNDLE =
|
||||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
|
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
|
||||||
|
protected static final String HUDI_UTILITIES_BUNDLE =
|
||||||
|
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-utilities.jar";
|
||||||
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
|
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
|
||||||
|
protected static final String HADOOP_CONF_DIR = "/etc/hadoop";
|
||||||
|
|
||||||
// Skip these lines when capturing output from hive
|
// Skip these lines when capturing output from hive
|
||||||
protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
|
|
||||||
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
|
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
|
||||||
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
|
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
|
||||||
protected DockerClient dockerClient;
|
protected DockerClient dockerClient;
|
||||||
protected Map<String, Container> runningContainers;
|
protected Map<String, Container> runningContainers;
|
||||||
|
|
||||||
protected static String[] getHiveConsoleCommand(String rawCommand) {
|
static String[] getHiveConsoleCommand(String rawCommand) {
|
||||||
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
|
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
|
||||||
String fullCommand = jarCommand + rawCommand;
|
String fullCommand = jarCommand + rawCommand;
|
||||||
|
|
||||||
List<String> cmd = new ImmutableList.Builder().add("hive")
|
List<String> cmd = new ArrayList<>();
|
||||||
.add("--hiveconf")
|
cmd.add("hive");
|
||||||
.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
|
cmd.add("--hiveconf");
|
||||||
.add("--hiveconf")
|
cmd.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
|
||||||
.add("hive.stats.autogather=false")
|
cmd.add("--hiveconf");
|
||||||
.add("-e")
|
cmd.add("hive.stats.autogather=false");
|
||||||
.add("\"" + fullCommand + "\"")
|
cmd.add("-e");
|
||||||
.build();
|
cmd.add("\"" + fullCommand + "\"");
|
||||||
return cmd.stream().toArray(String[]::new);
|
return cmd.stream().toArray(String[]::new);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
|
||||||
|
StringBuilder builder = new StringBuilder()
|
||||||
|
.append("beeline -u " + HIVE_SERVER_JDBC_URL)
|
||||||
|
.append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
|
||||||
|
.append(" --hiveconf hive.stats.autogather=false ")
|
||||||
|
.append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
|
||||||
|
|
||||||
|
if (additionalVar != null) {
|
||||||
|
builder.append(" --hivevar " + additionalVar + " ");
|
||||||
|
}
|
||||||
|
return builder.append(" -f ").append(commandFile).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
static String getSparkShellCommand(String commandFile) {
|
||||||
|
return new StringBuilder()
|
||||||
|
.append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
|
||||||
|
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
|
||||||
|
.append(" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
||||||
|
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ")
|
||||||
|
.append(" -i ").append(commandFile)
|
||||||
|
.toString();
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void init() throws IOException {
|
public void init() {
|
||||||
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
|
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
|
||||||
//Assuming insecure docker engine
|
//Assuming insecure docker engine
|
||||||
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
|
||||||
@@ -104,7 +130,7 @@ public abstract class ITTestBase {
|
|||||||
List<Container> containerList = dockerClient.listContainersCmd().exec();
|
List<Container> containerList = dockerClient.listContainersCmd().exec();
|
||||||
for (Container c : containerList) {
|
for (Container c : containerList) {
|
||||||
if (!c.getState().equalsIgnoreCase("running")) {
|
if (!c.getState().equalsIgnoreCase("running")) {
|
||||||
System.out.println("Container : " + Arrays.toString(c.getNames())
|
LOG.info("Container : " + Arrays.toString(c.getNames())
|
||||||
+ "not in running state, Curr State :" + c.getState());
|
+ "not in running state, Curr State :" + c.getState());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -114,10 +140,12 @@ public abstract class ITTestBase {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
private String singleSpace(String str) {
|
||||||
boolean expectedToSucceed)
|
return str.replaceAll("[\\s]+"," ");
|
||||||
throws Exception {
|
}
|
||||||
LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
|
|
||||||
|
private TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
|
||||||
|
boolean expectedToSucceed) throws Exception {
|
||||||
Container sparkWorkerContainer = runningContainers.get(containerName);
|
Container sparkWorkerContainer = runningContainers.get(containerName);
|
||||||
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
|
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
|
||||||
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
|
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
|
||||||
@@ -128,12 +156,10 @@ public abstract class ITTestBase {
|
|||||||
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
|
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
|
||||||
.exec(callback).awaitCompletion();
|
.exec(callback).awaitCompletion();
|
||||||
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
|
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
|
||||||
LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
|
LOG.info("Exit code for command : " + exitCode);
|
||||||
if (exitCode != 0) {
|
LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
|
||||||
LOG.error("Command (" + Arrays.toString(command) + ") failed.");
|
LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
|
||||||
LOG.error("Stdout is :" + callback.getStdout().toString());
|
|
||||||
LOG.error("Stderr is :" + callback.getStderr().toString());
|
|
||||||
}
|
|
||||||
if (expectedToSucceed) {
|
if (expectedToSucceed) {
|
||||||
Assert.assertTrue("Command (" + Arrays.toString(command)
|
Assert.assertTrue("Command (" + Arrays.toString(command)
|
||||||
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
|
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
|
||||||
@@ -145,6 +171,71 @@ public abstract class ITTestBase {
|
|||||||
return callback;
|
return callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void executeCommandStringsInDocker(String containerName, List<String> commands) throws Exception {
|
||||||
|
for (String cmd : commands) {
|
||||||
|
executeCommandStringInDocker(containerName, cmd, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd,
|
||||||
|
boolean expectedToSucceed) throws Exception {
|
||||||
|
LOG.info("\n\n#################################################################################################");
|
||||||
|
LOG.info("Container : " + containerName + ", Running command :" + cmd);
|
||||||
|
LOG.info("\n#################################################################################################");
|
||||||
|
|
||||||
|
String[] cmdSplits = singleSpace(cmd).split(" ");
|
||||||
|
return executeCommandInDocker(containerName, cmdSplits, expectedToSucceed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pair<String, String> executeHiveCommand(String hiveCommand) throws Exception {
|
||||||
|
|
||||||
|
LOG.info("\n\n#################################################################################################");
|
||||||
|
LOG.info("Running hive command :" + hiveCommand);
|
||||||
|
LOG.info("\n#################################################################################################");
|
||||||
|
|
||||||
|
String[] hiveCmd = getHiveConsoleCommand(hiveCommand);
|
||||||
|
TestExecStartResultCallback callback = executeCommandInDocker(HIVESERVER, hiveCmd, true);
|
||||||
|
return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
Pair<String, String> executeHiveCommandFile(String commandFile) throws Exception {
|
||||||
|
return executeHiveCommandFile(commandFile, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
Pair<String, String> executeHiveCommandFile(String commandFile, String additionalVar) throws Exception {
|
||||||
|
String hiveCmd = getHiveConsoleCommandFile(commandFile, additionalVar);
|
||||||
|
TestExecStartResultCallback callback = executeCommandStringInDocker(HIVESERVER, hiveCmd, true);
|
||||||
|
return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
Pair<String, String> executeSparkSQLCommand(String commandFile, boolean expectedToSucceed) throws Exception {
|
||||||
|
String sparkShellCmd = getSparkShellCommand(commandFile);
|
||||||
|
TestExecStartResultCallback callback = executeCommandStringInDocker(ADHOC_1_CONTAINER,
|
||||||
|
sparkShellCmd, expectedToSucceed);
|
||||||
|
return Pair.of(callback.getStdout().toString(), callback.getStderr().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput) {
|
||||||
|
assertStdOutContains(stdOutErr, expectedOutput, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput, int times) {
|
||||||
|
// this is so that changes in padding don't affect comparison
|
||||||
|
String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", "");
|
||||||
|
expectedOutput = singleSpace(expectedOutput).replaceAll(" ", "");
|
||||||
|
|
||||||
|
int lastIndex = 0;
|
||||||
|
int count = 0;
|
||||||
|
while(lastIndex != -1){
|
||||||
|
lastIndex = stdOutSingleSpaced.indexOf(expectedOutput, lastIndex);
|
||||||
|
if(lastIndex != -1){
|
||||||
|
count ++;
|
||||||
|
lastIndex += expectedOutput.length();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals("Did not find output the expected number of times", times, count);
|
||||||
|
}
|
||||||
|
|
||||||
public class TestExecStartResultCallback extends ExecStartResultCallback {
|
public class TestExecStartResultCallback extends ExecStartResultCallback {
|
||||||
|
|
||||||
// Storing the reference in subclass to expose to clients
|
// Storing the reference in subclass to expose to clients
|
||||||
|
|||||||
@@ -0,0 +1,271 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.integ;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
import java.util.List;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html
|
||||||
|
*
|
||||||
|
* To run this as a standalone test in the IDE or command line. First bring up the demo setup using
|
||||||
|
* `docker/setup_demo.sh` and then run the test class as you would do normally.
|
||||||
|
*/
|
||||||
|
public class ITTestHoodieDemo extends ITTestBase {
|
||||||
|
|
||||||
|
private static String HDFS_DATA_DIR = "/usr/hive/data/input";
|
||||||
|
private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json";
|
||||||
|
private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json";
|
||||||
|
|
||||||
|
private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT +
|
||||||
|
"/docker/demo/data/batch_1.json";
|
||||||
|
private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT +
|
||||||
|
"/docker/demo/data/batch_2.json";
|
||||||
|
|
||||||
|
private static String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
|
||||||
|
private static String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
|
||||||
|
private static String COW_TABLE_NAME = "stock_ticks_cow";
|
||||||
|
private static String MOR_TABLE_NAME = "stock_ticks_mor";
|
||||||
|
|
||||||
|
private static String DEMO_CONTAINER_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/setup_demo_container.sh";
|
||||||
|
private static String MIN_COMMIT_TIME_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/get_min_commit_time.sh";
|
||||||
|
private static String HUDI_CLI_TOOL = HOODIE_WS_ROOT + "/hudi-cli/hudi-cli.sh";
|
||||||
|
private static String COMPACTION_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/compaction.commands";
|
||||||
|
private static String SPARKSQL_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch1.commands";
|
||||||
|
private static String SPARKSQL_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch2.commands";
|
||||||
|
private static String SPARKSQL_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-incremental.commands";
|
||||||
|
private static String HIVE_TBLCHECK_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-table-check.commands";
|
||||||
|
private static String HIVE_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch1.commands";
|
||||||
|
private static String HIVE_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch2-after-compaction.commands";
|
||||||
|
private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands";
|
||||||
|
|
||||||
|
|
||||||
|
private static String HIVE_SYNC_CMD_FMT = " --enable-hive-sync "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.database=default "
|
||||||
|
+ " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDemo() throws Exception {
|
||||||
|
setupDemo();
|
||||||
|
|
||||||
|
// batch 1
|
||||||
|
ingestFirstBatchAndHiveSync();
|
||||||
|
testHiveAfterFirstBatch();
|
||||||
|
testSparkSQLAfterFirstBatch();
|
||||||
|
|
||||||
|
// batch 2
|
||||||
|
ingestSecondBatchAndHiveSync();
|
||||||
|
testHiveAfterSecondBatch();
|
||||||
|
testSparkSQLAfterSecondBatch();
|
||||||
|
testIncrementalHiveQuery();
|
||||||
|
testIncrementalSparkSQLQuery();
|
||||||
|
|
||||||
|
// compaction
|
||||||
|
scheduleAndRunCompaction();
|
||||||
|
testHiveAfterSecondBatchAfterCompaction();
|
||||||
|
testIncrementalHiveQueryAfterCompaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupDemo() throws Exception {
|
||||||
|
List<String> cmds = new ImmutableList.Builder<String>()
|
||||||
|
.add("hdfs dfsadmin -safemode wait") // handle NN going into safe mode at times
|
||||||
|
.add("hdfs dfs -mkdir -p " + HDFS_DATA_DIR)
|
||||||
|
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1)
|
||||||
|
.add("/bin/bash " + DEMO_CONTAINER_SCRIPT)
|
||||||
|
.build();
|
||||||
|
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ingestFirstBatchAndHiveSync() throws Exception {
|
||||||
|
List<String> cmds = new ImmutableList.Builder<String>()
|
||||||
|
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||||
|
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||||
|
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||||
|
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||||
|
+ " --props /var/demo/config/dfs-source.properties "
|
||||||
|
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||||
|
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||||
|
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||||
|
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||||
|
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||||
|
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||||
|
+ " --props /var/demo/config/dfs-source.properties "
|
||||||
|
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||||
|
+ " --disable-compaction "
|
||||||
|
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testHiveAfterFirstBatch() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_TBLCHECK_COMMANDS);
|
||||||
|
assertStdOutContains(stdOutErrPair, "| stock_ticks_cow |");
|
||||||
|
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor |");
|
||||||
|
assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
|
||||||
|
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| partition |\n"
|
||||||
|
+ "+----------------+\n"
|
||||||
|
+ "| dt=2018-08-31 |\n"
|
||||||
|
+ "+----------------+\n", 3);
|
||||||
|
|
||||||
|
stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||||
|
assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n"
|
||||||
|
+ "+---------+----------------------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:29:00 |\n", 3);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | ts | volume | open | close |\n"
|
||||||
|
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSparkSQLAfterFirstBatch() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"|default |stock_ticks_cow |false |\n"
|
||||||
|
+ "|default |stock_ticks_mor |false |\n"
|
||||||
|
+ "|default |stock_ticks_mor_rt |false |");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"+------+-------------------+\n"
|
||||||
|
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||||
|
+ "+------+-------------------+", 3);
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ingestSecondBatchAndHiveSync() throws Exception {
|
||||||
|
List<String> cmds = new ImmutableList.Builder<String>()
|
||||||
|
.add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
|
||||||
|
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||||
|
+ HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
|
||||||
|
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||||
|
+ " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
|
||||||
|
+ " --props /var/demo/config/dfs-source.properties "
|
||||||
|
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||||
|
+ String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
|
||||||
|
.add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
|
||||||
|
+ HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
|
||||||
|
+ " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
|
||||||
|
+ " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
|
||||||
|
+ " --props /var/demo/config/dfs-source.properties "
|
||||||
|
+ " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
|
||||||
|
+ " --disable-compaction "
|
||||||
|
+ String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
|
||||||
|
.build();
|
||||||
|
executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testHiveAfterSecondBatch() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | _c1 |\n"
|
||||||
|
+ "+---------+----------------------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:29:00 |\n");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | _c1 |\n"
|
||||||
|
+ "+---------+----------------------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:59:00 |\n", 2);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | ts | volume | open | close |\n"
|
||||||
|
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | ts | volume | open | close |\n"
|
||||||
|
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | _c1 |\n"
|
||||||
|
+ "+---------+----------------------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:59:00 |", 2);
|
||||||
|
assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n"
|
||||||
|
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSparkSQLAfterSecondBatch() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"+------+-------------------+\n"
|
||||||
|
+ "|GOOG |2018-08-31 10:59:00|\n"
|
||||||
|
+ "+------+-------------------+", 2);
|
||||||
|
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2);
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"+------+-------------------+\n"
|
||||||
|
+ "|GOOG |2018-08-31 10:29:00|\n"
|
||||||
|
+ "+------+-------------------+");
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testIncrementalHiveQuery() throws Exception {
|
||||||
|
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||||
|
.getStdout().toString();
|
||||||
|
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||||
|
"min.commit.time=" + minCommitTime +"`");
|
||||||
|
assertStdOutContains(stdOutErrPair, "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testIncrementalHiveQueryAfterCompaction() throws Exception {
|
||||||
|
String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
|
||||||
|
.getStdout().toString();
|
||||||
|
Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
|
||||||
|
"min.commit.time=" + minCommitTime +"`");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"| symbol | ts | volume | open | close |\n"
|
||||||
|
+ "+---------+----------------------+---------+------------+-----------+\n"
|
||||||
|
+ "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testIncrementalSparkSQLQuery() throws Exception {
|
||||||
|
Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
|
||||||
|
assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"|default |stock_ticks_cow |false |\n"
|
||||||
|
+ "|default |stock_ticks_derived_mor |false |\n"
|
||||||
|
+ "|default |stock_ticks_derived_mor_rt|false |\n"
|
||||||
|
+ "|default |stock_ticks_mor |false |\n"
|
||||||
|
+ "|default |stock_ticks_mor_rt |false |\n"
|
||||||
|
+ "| |stock_ticks_cow_incr |true |");
|
||||||
|
assertStdOutContains(stdOutErrPair,
|
||||||
|
"|count(1)|\n"
|
||||||
|
+ "+--------+\n"
|
||||||
|
+ "|99 |", 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void scheduleAndRunCompaction() throws Exception {
|
||||||
|
executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + COMPACTION_COMMANDS, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hudi.integ;
|
package org.apache.hudi.integ;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@@ -33,17 +33,6 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
NON_PARTITIONED,
|
NON_PARTITIONED,
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testRunEcho() throws Exception {
|
|
||||||
String[] cmd = new String[]{"echo", "Happy Testing"};
|
|
||||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
|
||||||
cmd, true);
|
|
||||||
String stdout = callback.getStdout().toString();
|
|
||||||
String stderr = callback.getStderr().toString();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
/**
|
/**
|
||||||
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
|
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
|
||||||
@@ -53,6 +42,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
|
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
|
||||||
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
|
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
|
||||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.SINGLE_KEY_PARTITIONED);
|
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.SINGLE_KEY_PARTITIONED);
|
||||||
|
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -64,6 +54,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
|
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
|
||||||
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
|
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
|
||||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.MULTI_KEYS_PARTITIONED);
|
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.MULTI_KEYS_PARTITIONED);
|
||||||
|
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@@ -75,6 +66,7 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
|
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
|
||||||
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
|
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
|
||||||
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.NON_PARTITIONED);
|
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.NON_PARTITIONED);
|
||||||
|
executeHiveCommand("drop table if exists " + hiveTableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -89,109 +81,54 @@ public class ITTestHoodieSanity extends ITTestBase {
|
|||||||
String hdfsUrl = "hdfs://namenode" + hdfsPath;
|
String hdfsUrl = "hdfs://namenode" + hdfsPath;
|
||||||
|
|
||||||
// Drop Table if it exists
|
// Drop Table if it exists
|
||||||
{
|
String hiveDropCmd = "drop table if exists " + hiveTableName;
|
||||||
String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
|
try {
|
||||||
executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
|
executeHiveCommand(hiveDropCmd);
|
||||||
|
} catch (AssertionError ex) {
|
||||||
|
// In travis, sometimes, the hivemetastore is not ready even though we wait for the port to be up
|
||||||
|
// Workaround to sleep for 5 secs and retry
|
||||||
|
Thread.sleep(5000);
|
||||||
|
executeHiveCommand(hiveDropCmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure table does not exist
|
// Ensure table does not exist
|
||||||
{
|
Pair<String, String> stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
Assert.assertTrue("Dropped table " + hiveTableName + " exists!", stdOutErr.getLeft().isEmpty());
|
||||||
TestExecStartResultCallback callback =
|
|
||||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
|
||||||
String stderr = callback.getStderr().toString();
|
|
||||||
String stdout = callback.getStdout().toString();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
|
|
||||||
Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Run Hoodie Java App
|
// Run Hoodie Java App
|
||||||
{
|
String cmd;
|
||||||
String[] cmd = null;
|
|
||||||
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
|
||||||
cmd = new String[]{
|
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||||
HOODIE_JAVA_APP,
|
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName;
|
||||||
"--hive-sync",
|
|
||||||
"--table-path", hdfsUrl,
|
|
||||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
|
||||||
"--hive-table", hiveTableName
|
|
||||||
};
|
|
||||||
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
} else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
|
||||||
cmd = new String[]{
|
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||||
HOODIE_JAVA_APP,
|
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||||
"--hive-sync",
|
+ " --use-multi-partition-keys";
|
||||||
"--table-path", hdfsUrl,
|
|
||||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
|
||||||
"--use-multi-partition-keys",
|
|
||||||
"--hive-table", hiveTableName
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
cmd = new String[]{
|
cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
|
||||||
HOODIE_JAVA_APP,
|
+ " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
|
||||||
"--hive-sync",
|
+ " --non-partitioned";
|
||||||
"--table-path", hdfsUrl,
|
|
||||||
"--hive-url", HIVE_SERVER_JDBC_URL,
|
|
||||||
"--non-partitioned",
|
|
||||||
"--hive-table", hiveTableName
|
|
||||||
};
|
|
||||||
}
|
|
||||||
TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
|
|
||||||
cmd, true);
|
|
||||||
String stdout = callback.getStdout().toString().trim();
|
|
||||||
String stderr = callback.getStderr().toString().trim();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
|
|
||||||
}
|
}
|
||||||
|
executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
|
||||||
|
|
||||||
// Ensure table does exist
|
// Ensure table does exist
|
||||||
{
|
stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
|
||||||
String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
|
Assert.assertEquals("Table exists", hiveTableName, stdOutErr.getLeft());
|
||||||
TestExecStartResultCallback callback =
|
|
||||||
executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
|
|
||||||
String stderr = callback.getStderr().toString().trim();
|
|
||||||
String stdout = callback.getStdout().toString().trim();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
|
||||||
Assert.assertEquals("Table exists", hiveTableName, stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure row count is 100 (without duplicates)
|
// Ensure row count is 100 (without duplicates)
|
||||||
{
|
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||||
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
|
||||||
TestExecStartResultCallback callback =
|
|
||||||
executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
|
||||||
String stderr = callback.getStderr().toString().trim();
|
|
||||||
String stdout = callback.getStdout().toString().trim();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
|
||||||
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
|
||||||
Integer.parseInt(stdout.trim()));
|
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||||
}
|
|
||||||
|
|
||||||
// Make the HDFS dataset non-hoodie and run the same query
|
// Make the HDFS dataset non-hoodie and run the same query
|
||||||
// Checks for interoperability with non-hoodie tables
|
// Checks for interoperability with non-hoodie tables
|
||||||
{
|
|
||||||
// Delete Hoodie directory to make it non-hoodie dataset
|
// Delete Hoodie directory to make it non-hoodie dataset
|
||||||
String[] cmd = new String[]{
|
executeCommandStringInDocker(ADHOC_1_CONTAINER, "hdfs dfs -rm -r " + hdfsPath + "/.hoodie", true);
|
||||||
"hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie"
|
|
||||||
};
|
|
||||||
TestExecStartResultCallback callback =
|
|
||||||
executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true);
|
|
||||||
String stderr = callback.getStderr().toString().trim();
|
|
||||||
String stdout = callback.getStdout().toString().trim();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")");
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")");
|
|
||||||
|
|
||||||
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
// Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
|
||||||
String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
|
stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
|
||||||
callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
|
|
||||||
stderr = callback.getStderr().toString().trim();
|
|
||||||
stdout = callback.getStdout().toString().trim();
|
|
||||||
LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
|
|
||||||
LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
|
|
||||||
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
|
Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
|
||||||
Integer.parseInt(stdout.trim()));
|
Integer.parseInt(stdOutErr.getLeft().trim()));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,13 +23,10 @@
|
|||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-spark</artifactId>
|
<artifactId>hudi-spark</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<log4j.version>1.2.17</log4j.version>
|
|
||||||
<junit.version>4.10</junit.version>
|
|
||||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
@@ -52,12 +49,11 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>net.alchim31.maven</groupId>
|
<groupId>net.alchim31.maven</groupId>
|
||||||
<artifactId>scala-maven-plugin</artifactId>
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
<version>3.3.1</version>
|
<version>${scala-maven-plugin.version}</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>2.0.2</version>
|
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
@@ -156,95 +152,14 @@
|
|||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Scala -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scala-lang</groupId>
|
<groupId>org.scala-lang</groupId>
|
||||||
<artifactId>scala-library</artifactId>
|
<artifactId>scala-library</artifactId>
|
||||||
<version>${scala.version}</version>
|
<version>${scala.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.scalatest</groupId>
|
|
||||||
<artifactId>scalatest_2.11</artifactId>
|
|
||||||
<version>3.0.1</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.databricks</groupId>
|
|
||||||
<artifactId>spark-avro_2.11</artifactId>
|
|
||||||
<version>4.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-annotations</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
|
||||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
<version>${log4j.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-configuration2</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-service</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-jdbc</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-metastore</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-common</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-client</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
@@ -265,12 +180,93 @@
|
|||||||
<artifactId>hudi-hive</artifactId>
|
<artifactId>hudi-hive</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>log4j</groupId>
|
||||||
<artifactId>junit-dep</artifactId>
|
<artifactId>log4j</artifactId>
|
||||||
<version>${junit.version}</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
|
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark (Packages) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.databricks</groupId>
|
||||||
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
|
<version>4.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-configuration2</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-service</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-metastore</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-common</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-client</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
@@ -287,5 +283,19 @@
|
|||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scalatest</groupId>
|
||||||
|
<artifactId>scalatest_2.11</artifactId>
|
||||||
|
<version>${scalatest.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit-dep</artifactId>
|
||||||
|
<version>${junit-dep.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -121,40 +121,57 @@
|
|||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<!-- Hoodie -->
|
||||||
<groupId>io.javalin</groupId>
|
|
||||||
<artifactId>javalin</artifactId>
|
|
||||||
<version>2.8.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>fluent-hc</artifactId>
|
|
||||||
<version>4.3.2</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-annotations</artifactId>
|
<artifactId>jackson-annotations</artifactId>
|
||||||
<version>2.9.7</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-core</artifactId>
|
<artifactId>jackson-core</artifactId>
|
||||||
<version>2.9.7</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
<version>2.9.7</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>fluent-hc</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.javalin</groupId>
|
||||||
|
<artifactId>javalin</artifactId>
|
||||||
|
<version>2.8.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
<version>1.48</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.rocksdb</groupId>
|
<groupId>org.rocksdb</groupId>
|
||||||
<artifactId>rocksdbjni</artifactId>
|
<artifactId>rocksdbjni</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
@@ -194,25 +211,6 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-common</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
<type>test-jar</type>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
<version>1.48</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Parent dependencies -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hadoop-client</artifactId>
|
||||||
@@ -223,20 +221,34 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>junit</groupId>
|
||||||
|
<artifactId>junit</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mockito</groupId>
|
||||||
|
<artifactId>mockito-all</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>kryo</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.mockito</groupId>
|
|
||||||
<artifactId>mockito-all</artifactId>
|
|
||||||
<version>1.10.19</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -76,6 +76,124 @@
|
|||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Jetty -->
|
||||||
|
<dependency>
|
||||||
|
<!-- Needs to be at the top to ensure we get the correct dependency versions for jetty-server -->
|
||||||
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
|
<artifactId>jetty-all</artifactId>
|
||||||
|
<version>7.6.0.v20120127</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
|
<artifactId>jetty-server</artifactId>
|
||||||
|
<version>7.6.0.v20120127</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hive</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
|
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro-mapred</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-hadoop</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-streaming_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Dropwizard Metrics -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.javalin</groupId>
|
<groupId>io.javalin</groupId>
|
||||||
<artifactId>javalin</artifactId>
|
<artifactId>javalin</artifactId>
|
||||||
@@ -89,43 +207,83 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
<groupId>com.yammer.metrics</groupId>
|
||||||
<artifactId>metrics-core</artifactId>
|
<artifactId>metrics-core</artifactId>
|
||||||
|
<version>2.2.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Used for SQL templating -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.antlr</groupId>
|
||||||
|
<artifactId>stringtemplate</artifactId>
|
||||||
|
<version>4.0.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
<groupId>com.beust</groupId>
|
||||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
<artifactId>jcommander</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>com.twitter</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>bijection-avro_2.11</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>0.9.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Kafka -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>io.confluent</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>kafka-avro-serializer</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>3.0.0</version>
|
||||||
<classifier>tests</classifier>
|
</dependency>
|
||||||
<type>test-jar</type>
|
<dependency>
|
||||||
<scope>test</scope>
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>common-config</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>common-utils</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>kafka-schema-registry-client</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>commons-codec</groupId>
|
||||||
<artifactId>hudi-hive</artifactId>
|
<artifactId>commons-codec</artifactId>
|
||||||
<version>${project.version}</version>
|
</dependency>
|
||||||
<classifier>tests</classifier>
|
<dependency>
|
||||||
<type>test-jar</type>
|
<groupId>commons-dbcp</groupId>
|
||||||
<scope>test</scope>
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-lang</groupId>
|
||||||
|
<artifactId>commons-lang</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-pool</groupId>
|
||||||
|
<artifactId>commons-pool</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>hudi-spark</artifactId>
|
<artifactId>httpcore</artifactId>
|
||||||
<version>${project.version}</version>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>javax.servlet</groupId>
|
<groupId>javax.servlet</groupId>
|
||||||
@@ -133,13 +291,6 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty</groupId>
|
|
||||||
<artifactId>jetty-server</artifactId>
|
|
||||||
<version>7.6.0.v20120127</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
@@ -165,6 +316,7 @@
|
|||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-jdbc</artifactId>
|
<artifactId>hive-jdbc</artifactId>
|
||||||
@@ -180,38 +332,13 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-exec</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-service</artifactId>
|
<artifactId>hive-service</artifactId>
|
||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Hoodie - Test -->
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-hive</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>servlet-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-client</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-client</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
@@ -220,163 +347,35 @@
|
|||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-codec</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>commons-codec</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-dbcp</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>commons-dbcp</artifactId>
|
<artifactId>hudi-hive</artifactId>
|
||||||
</dependency>
|
<version>${project.version}</version>
|
||||||
<dependency>
|
<classifier>tests</classifier>
|
||||||
<groupId>commons-lang</groupId>
|
<type>test-jar</type>
|
||||||
<artifactId>commons-lang</artifactId>
|
<scope>test</scope>
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-pool</groupId>
|
|
||||||
<artifactId>commons-pool</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>log4j</artifactId>
|
<artifactId>hive-exec</artifactId>
|
||||||
</dependency>
|
<version>${hive.version}</version>
|
||||||
<dependency>
|
<scope>test</scope>
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>servlet-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.yammer.metrics</groupId>
|
|
||||||
<artifactId>metrics-core</artifactId>
|
|
||||||
<version>2.2.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-streaming_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Used for SQL templating -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.antlr</groupId>
|
|
||||||
<artifactId>stringtemplate</artifactId>
|
|
||||||
<version>4.0.2</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
<version>1.10.19</version>
|
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro-mapred</artifactId>
|
|
||||||
<version>1.7.7</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-hadoop</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>bijection-avro_2.11</artifactId>
|
|
||||||
<version>0.9.2</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>kafka-avro-serializer</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>common-config</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>common-utils</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>kafka-schema-registry-client</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
|
||||||
<artifactId>jetty-all</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ public class HoodieDeltaStreamer implements Serializable {
|
|||||||
public String propsFilePath =
|
public String propsFilePath =
|
||||||
"file://" + System.getProperty("user.dir") + "/src/test/resources/delta-streamer-config/dfs-source.properties";
|
"file://" + System.getProperty("user.dir") + "/src/test/resources/delta-streamer-config/dfs-source.properties";
|
||||||
|
|
||||||
@Parameter(names = {"--hudi-conf"}, description = "Any configuration that can be set in the properties file "
|
@Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
|
||||||
+ "(using the CLI parameter \"--propsFilePath\") can also be passed command line using this parameter")
|
+ "(using the CLI parameter \"--propsFilePath\") can also be passed command line using this parameter")
|
||||||
public List<String> configs = new ArrayList<>();
|
public List<String> configs = new ArrayList<>();
|
||||||
|
|
||||||
|
|||||||
@@ -27,45 +27,90 @@
|
|||||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-hadoop-mr</artifactId>
|
<artifactId>hudi-hadoop-mr</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<!-- other hudi deps will come from hudi-hive-bundle -->
|
<!-- other hoodie deps will come from hoodie-hive-bundle -->
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>*</artifactId>
|
<artifactId>*</artifactId>
|
||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet (Twitter) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-logging</groupId>
|
||||||
|
<artifactId>commons-logging</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-codec</groupId>
|
||||||
|
<artifactId>commons-codec</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter.common</groupId>
|
||||||
|
<artifactId>objectsize</artifactId>
|
||||||
|
<version>0.0.12</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-auth</artifactId>
|
<artifactId>hadoop-auth</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-jdbc</artifactId>
|
<artifactId>hive-jdbc</artifactId>
|
||||||
@@ -109,49 +154,9 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>kryo</artifactId>
|
||||||
</dependency>
|
<scope>test</scope>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-logging</groupId>
|
|
||||||
<artifactId>commons-logging</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-io</groupId>
|
|
||||||
<artifactId>commons-io</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-codec</groupId>
|
|
||||||
<artifactId>commons-codec</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter.common</groupId>
|
|
||||||
<artifactId>objectsize</artifactId>
|
|
||||||
<version>0.0.12</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
@@ -159,6 +164,7 @@
|
|||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
@@ -28,71 +28,28 @@
|
|||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hudi-hive</artifactId>
|
||||||
</dependency>
|
<version>${project.version}</version>
|
||||||
<dependency>
|
<exclusions>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<exclusion>
|
||||||
<artifactId>hadoop-auth</artifactId>
|
<!-- All other hoodie deps will come from hoodie-hadoop-mr-bundle -->
|
||||||
</dependency>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<dependency>
|
<artifactId>*</artifactId>
|
||||||
<groupId>${hive.groupid}</groupId>
|
</exclusion>
|
||||||
<artifactId>hive-service</artifactId>
|
</exclusions>
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-jdbc</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-metastore</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-common</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.thrift</groupId>
|
|
||||||
<artifactId>libthrift</artifactId>
|
|
||||||
<version>${thrift.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.thrift</groupId>
|
|
||||||
<artifactId>libfb303</artifactId>
|
|
||||||
<version>0.9.3</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>joda-time</groupId>
|
|
||||||
<artifactId>joda-time</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Apache commons -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-io</groupId>
|
|
||||||
<artifactId>commons-io</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Logging -->
|
<!-- Logging -->
|
||||||
@@ -105,43 +62,93 @@
|
|||||||
<artifactId>slf4j-log4j12</artifactId>
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Parquet (Twitter) -->
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.twitter</groupId>
|
<groupId>com.twitter</groupId>
|
||||||
<artifactId>parquet-avro</artifactId>
|
<artifactId>parquet-avro</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Thrift -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.thrift</groupId>
|
||||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
<artifactId>libthrift</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${thrift.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.thrift</groupId>
|
||||||
|
<artifactId>libfb303</artifactId>
|
||||||
|
<version>0.9.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>com.google.guava</groupId>
|
||||||
<artifactId>hudi-hive</artifactId>
|
<artifactId>guava</artifactId>
|
||||||
<version>${project.version}</version>
|
</dependency>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
<dependency>
|
||||||
<!-- All other hudi deps will come from hudi-hadoop-mr-bundle -->
|
<groupId>joda-time</groupId>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<artifactId>joda-time</artifactId>
|
||||||
<artifactId>*</artifactId>
|
</dependency>
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpcore</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-auth</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-service</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-metastore</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|||||||
@@ -28,46 +28,16 @@
|
|||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
||||||
</dependency>
|
<version>${project.version}</version>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-auth</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.thrift</groupId>
|
|
||||||
<artifactId>libthrift</artifactId>
|
|
||||||
<version>${thrift.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>joda-time</groupId>
|
|
||||||
<artifactId>joda-time</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Apache commons -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-io</groupId>
|
|
||||||
<artifactId>commons-io</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Logging -->
|
<!-- Logging -->
|
||||||
@@ -80,30 +50,70 @@
|
|||||||
<artifactId>slf4j-log4j12</artifactId>
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Parquet (Twitter) -->
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.twitter</groupId>
|
<groupId>com.twitter</groupId>
|
||||||
<artifactId>parquet-avro</artifactId>
|
<artifactId>parquet-avro</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Thrift -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.thrift</groupId>
|
||||||
<artifactId>hudi-hadoop-mr-bundle</artifactId>
|
<artifactId>libthrift</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${thrift.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>joda-time</groupId>
|
||||||
|
<artifactId>joda-time</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents-->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpcore</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-auth</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|||||||
@@ -127,6 +127,8 @@
|
|||||||
<pattern>parquet.schema</pattern>
|
<pattern>parquet.schema</pattern>
|
||||||
<shadedPattern>org.apache.hudi.parquet.schema</shadedPattern>
|
<shadedPattern>org.apache.hudi.parquet.schema</shadedPattern>
|
||||||
</relocation>
|
</relocation>
|
||||||
|
<!-- TODO: Revisit GH ISSUE #533 & PR#633-->
|
||||||
|
<!--
|
||||||
<relocation>
|
<relocation>
|
||||||
<pattern>org.apache.hive.jdbc.</pattern>
|
<pattern>org.apache.hive.jdbc.</pattern>
|
||||||
<shadedPattern>org.apache.hudi.org.apache.hive.jdbc.</shadedPattern>
|
<shadedPattern>org.apache.hudi.org.apache.hive.jdbc.</shadedPattern>
|
||||||
@@ -155,6 +157,11 @@
|
|||||||
<pattern>org.apache.hadoop.hive.service.</pattern>
|
<pattern>org.apache.hadoop.hive.service.</pattern>
|
||||||
<shadedPattern>org.apache.hudi.org.apache.hadoop_hive.service.</shadedPattern>
|
<shadedPattern>org.apache.hudi.org.apache.hadoop_hive.service.</shadedPattern>
|
||||||
</relocation>
|
</relocation>
|
||||||
|
<relocation>
|
||||||
|
<pattern>org.apache.hadoop.hive.serde2.</pattern>
|
||||||
|
<shadedPattern>org.apache.hudi.org.apache.hadoop_hive.serde2.</shadedPattern>
|
||||||
|
</relocation>
|
||||||
|
-->
|
||||||
<relocation>
|
<relocation>
|
||||||
<pattern>com.esotericsoftware.kryo.</pattern>
|
<pattern>com.esotericsoftware.kryo.</pattern>
|
||||||
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
|
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
|
||||||
@@ -177,9 +184,8 @@
|
|||||||
<exclude>org.apache.derby:derby</exclude>
|
<exclude>org.apache.derby:derby</exclude>
|
||||||
<exclude>org.apache.hadoop:*</exclude>
|
<exclude>org.apache.hadoop:*</exclude>
|
||||||
<exclude>org.apache.hbase:*</exclude>
|
<exclude>org.apache.hbase:*</exclude>
|
||||||
<!-- Just include hive-common, hive-service, hive-metastore and hive-jdbc -->
|
<!-- Just include hive-common, hive-serde, hive-service, hive-metastore and hive-jdbc -->
|
||||||
<exclude>org.apache.hive:hive-exec</exclude>
|
<exclude>org.apache.hive:hive-exec</exclude>
|
||||||
<exclude>org.apache.hive:hive-serde</exclude>
|
|
||||||
<exclude>org.apache.hive:hive-shims</exclude>
|
<exclude>org.apache.hive:hive-shims</exclude>
|
||||||
<exclude>org.apache.spark:*</exclude>
|
<exclude>org.apache.spark:*</exclude>
|
||||||
</excludes>
|
</excludes>
|
||||||
@@ -206,102 +212,18 @@
|
|||||||
</build>
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<!-- Scala -->
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-codec</groupId>
|
|
||||||
<artifactId>commons-codec</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scala-lang</groupId>
|
<groupId>org.scala-lang</groupId>
|
||||||
<artifactId>scala-library</artifactId>
|
<artifactId>scala-library</artifactId>
|
||||||
<version>${scala.version}</version>
|
<version>${scala.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.scalatest</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>scalatest_2.11</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
<version>3.0.1</version>
|
<version>${project.version}</version>
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.databricks</groupId>
|
|
||||||
<artifactId>spark-avro_2.11</artifactId>
|
|
||||||
<version>4.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-annotations</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-common</artifactId>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
<version>${log4j.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-service</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-jdbc</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-metastore</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-common</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-configuration2</artifactId>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
@@ -317,17 +239,142 @@
|
|||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-hive</artifactId>
|
<artifactId>hudi-hive</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-client</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark</artifactId>
|
<artifactId>hudi-spark</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
<version>${log4j.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-annotations</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark (Packages) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.databricks</groupId>
|
||||||
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
|
<version>4.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.beust</groupId>
|
||||||
|
<artifactId>jcommander</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-codec</groupId>
|
||||||
|
<artifactId>commons-codec</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-configuration2</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-common</artifactId>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive - Compile -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<classifier>standalone</classifier>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<!-- TODO: Reinvestigate PR 633 -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-service</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-serde</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-metastore</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-common</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<!-- TODO: Reinvestigate PR 633 -->
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scalatest</groupId>
|
||||||
|
<artifactId>scalatest_2.11</artifactId>
|
||||||
|
<version>${scalatest.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|
||||||
|
|||||||
@@ -28,8 +28,6 @@
|
|||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<log4j.version>1.2.17</log4j.version>
|
|
||||||
<junit.version>4.10</junit.version>
|
|
||||||
<checkstyle.skip>true</checkstyle.skip>
|
<checkstyle.skip>true</checkstyle.skip>
|
||||||
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
|
||||||
<notice.file>HUDI_NOTICE.txt</notice.file>
|
<notice.file>HUDI_NOTICE.txt</notice.file>
|
||||||
@@ -205,6 +203,98 @@
|
|||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Hoodie -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hive</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-utilities</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Logging -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
|
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro-mapred</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-hadoop</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-streaming_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Dropwizard Metrics -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.javalin</groupId>
|
<groupId>io.javalin</groupId>
|
||||||
<artifactId>javalin</artifactId>
|
<artifactId>javalin</artifactId>
|
||||||
@@ -212,21 +302,114 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
<groupId>com.yammer.metrics</groupId>
|
||||||
<artifactId>metrics-core</artifactId>
|
<artifactId>metrics-core</artifactId>
|
||||||
|
<version>2.2.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Used for SQL templating -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.antlr</groupId>
|
||||||
|
<artifactId>stringtemplate</artifactId>
|
||||||
|
<version>4.0.2</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
<groupId>com.beust</groupId>
|
||||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
<artifactId>jcommander</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>bijection-avro_2.11</artifactId>
|
||||||
|
<version>0.9.2</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Kafka -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>kafka-avro-serializer</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>common-config</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>common-utils</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.confluent</groupId>
|
||||||
|
<artifactId>kafka-schema-registry-client</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-codec</groupId>
|
||||||
|
<artifactId>commons-codec</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-pool</groupId>
|
||||||
|
<artifactId>commons-pool</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpcore</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-client</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
<artifactId>hive-jdbc</artifactId>
|
||||||
|
<version>${hive.version}</version>
|
||||||
|
<classifier>standalone</classifier>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.servlet</groupId>
|
||||||
|
<artifactId>servlet-api</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-client</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-common</artifactId>
|
<artifactId>hudi-common</artifactId>
|
||||||
@@ -235,7 +418,6 @@
|
|||||||
<type>test-jar</type>
|
<type>test-jar</type>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-hive</artifactId>
|
<artifactId>hudi-hive</artifactId>
|
||||||
@@ -245,17 +427,7 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Hadoop - Test -->
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-spark</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
@@ -275,7 +447,13 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hadoop</groupId>
|
||||||
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hive - Test -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-exec</artifactId>
|
<artifactId>hive-exec</artifactId>
|
||||||
@@ -283,191 +461,11 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>${hive.groupid}</groupId>
|
|
||||||
<artifactId>hive-jdbc</artifactId>
|
|
||||||
<version>${hive.version}</version>
|
|
||||||
<classifier>standalone</classifier>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>servlet-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-hive</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>servlet-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-client</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-utilities</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hudi</groupId>
|
|
||||||
<artifactId>hudi-client</artifactId>
|
|
||||||
<version>${project.version}</version>
|
|
||||||
<classifier>tests</classifier>
|
|
||||||
<type>test-jar</type>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-codec</groupId>
|
|
||||||
<artifactId>commons-codec</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-pool</groupId>
|
|
||||||
<artifactId>commons-pool</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-mapreduce-client-common</artifactId>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.servlet</groupId>
|
|
||||||
<artifactId>servlet-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.hadoop</groupId>
|
|
||||||
<artifactId>hadoop-client</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.yammer.metrics</groupId>
|
|
||||||
<artifactId>metrics-core</artifactId>
|
|
||||||
<version>2.2.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-streaming_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
<scope>provided</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Used for SQL templating -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.antlr</groupId>
|
|
||||||
<artifactId>stringtemplate</artifactId>
|
|
||||||
<version>4.0.2</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.beust</groupId>
|
|
||||||
<artifactId>jcommander</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
<version>1.10.19</version>
|
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro-mapred</artifactId>
|
|
||||||
<version>1.7.7</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-hadoop</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>bijection-avro_2.11</artifactId>
|
|
||||||
<version>0.9.2</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>kafka-avro-serializer</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>common-config</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>common-utils</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.confluent</groupId>
|
|
||||||
<artifactId>kafka-schema-registry-client</artifactId>
|
|
||||||
<version>3.0.0</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|
||||||
|
|||||||
594
pom.xml
594
pom.xml
@@ -24,11 +24,9 @@
|
|||||||
<artifactId>hudi</artifactId>
|
<artifactId>hudi</artifactId>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<version>0.5.0-SNAPSHOT</version>
|
<version>0.5.0-SNAPSHOT</version>
|
||||||
<description>Hoodie is a Apache Spark library that provides the ability to efficiently do
|
<description>Apache Hudi brings stream style processing on big data</description>
|
||||||
incremental processing on datasets in HDFS
|
<url>https://github.com/apache/incubator-hudi</url>
|
||||||
</description>
|
<name>Hudi</name>
|
||||||
<url>https://github.com/uber/hudi</url>
|
|
||||||
<name>Hoodie</name>
|
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
<module>hudi-common</module>
|
<module>hudi-common</module>
|
||||||
@@ -58,15 +56,15 @@
|
|||||||
</licenses>
|
</licenses>
|
||||||
|
|
||||||
<organization>
|
<organization>
|
||||||
<name>Uber Technologies Inc.</name>
|
<name>The Apache Software Foundation</name>
|
||||||
<url>http://www.uber.com/</url>
|
<url>https://www.apache.org</url>
|
||||||
</organization>
|
</organization>
|
||||||
|
|
||||||
<developers>
|
<developers>
|
||||||
<developer>
|
<developer>
|
||||||
<id>vinothchandar</id>
|
<id>vinothchandar</id>
|
||||||
<name>Vinoth Chandar</name>
|
<name>Vinoth Chandar</name>
|
||||||
<organization>Uber</organization>
|
<organization>Confluent Inc</organization>
|
||||||
</developer>
|
</developer>
|
||||||
<developer>
|
<developer>
|
||||||
<id>prasannarajaperumal</id>
|
<id>prasannarajaperumal</id>
|
||||||
@@ -129,10 +127,13 @@
|
|||||||
<maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
|
<maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
|
||||||
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
|
<maven-compiler-plugin.version>3.8.1</maven-compiler-plugin.version>
|
||||||
<fasterxml.version>2.6.7</fasterxml.version>
|
<fasterxml.version>2.6.7</fasterxml.version>
|
||||||
|
<glassfish.version>2.17</glassfish.version>
|
||||||
<parquet.version>1.8.1</parquet.version>
|
<parquet.version>1.8.1</parquet.version>
|
||||||
<junit.version>4.11</junit.version>
|
<junit.version>4.11</junit.version>
|
||||||
<mockito.version>1.9.5</mockito.version>
|
<junit-dep.version>4.10</junit-dep.version>
|
||||||
|
<mockito.version>1.10.19</mockito.version>
|
||||||
<log4j.version>1.2.17</log4j.version>
|
<log4j.version>1.2.17</log4j.version>
|
||||||
|
<slf4j.version>1.7.5</slf4j.version>
|
||||||
<joda.version>2.9.9</joda.version>
|
<joda.version>2.9.9</joda.version>
|
||||||
<hadoop.version>2.7.3</hadoop.version>
|
<hadoop.version>2.7.3</hadoop.version>
|
||||||
<hive.groupid>org.apache.hive</hive.groupid>
|
<hive.groupid>org.apache.hive</hive.groupid>
|
||||||
@@ -142,12 +143,17 @@
|
|||||||
<avro.version>1.7.7</avro.version>
|
<avro.version>1.7.7</avro.version>
|
||||||
<scala.version>2.11.8</scala.version>
|
<scala.version>2.11.8</scala.version>
|
||||||
<scala.libversion>2.11</scala.libversion>
|
<scala.libversion>2.11</scala.libversion>
|
||||||
|
<scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
|
||||||
|
<scalatest.version>3.0.1</scalatest.version>
|
||||||
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
|
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire.properties</surefire-log4j.file>
|
||||||
<thrift.version>0.12.0</thrift.version>
|
<thrift.version>0.12.0</thrift.version>
|
||||||
<hbase.version>1.2.3</hbase.version>
|
<hbase.version>1.2.3</hbase.version>
|
||||||
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
|
<codehaus-jackson.version>1.9.13</codehaus-jackson.version>
|
||||||
<notice.dir>${project.basedir}</notice.dir>
|
<notice.dir>${project.basedir}</notice.dir>
|
||||||
<notice.file>NOTICE.txt</notice.file>
|
<notice.file>NOTICE.txt</notice.file>
|
||||||
|
<skipTests>false</skipTests>
|
||||||
|
<skipITs>${skipTests}</skipITs>
|
||||||
|
<skipUTs>${skipTests}</skipUTs>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<scm>
|
<scm>
|
||||||
@@ -257,7 +263,7 @@
|
|||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.8.1</version>
|
<version>${maven-compiler-plugin.version}</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<source>1.8</source>
|
<source>1.8</source>
|
||||||
<target>1.8</target>
|
<target>1.8</target>
|
||||||
@@ -292,6 +298,7 @@
|
|||||||
<artifactId>maven-surefire-plugin</artifactId>
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
<version>${maven-surefire-plugin.version}</version>
|
<version>${maven-surefire-plugin.version}</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
|
<skip>${skipUTs}</skip>
|
||||||
<!-- Sets the VM argument line used when unit tests are run. -->
|
<!-- Sets the VM argument line used when unit tests are run. -->
|
||||||
<argLine>${surefireArgLine}</argLine>
|
<argLine>${surefireArgLine}</argLine>
|
||||||
<systemPropertyVariables>
|
<systemPropertyVariables>
|
||||||
@@ -452,18 +459,152 @@
|
|||||||
|
|
||||||
<dependencyManagement>
|
<dependencyManagement>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
<!-- Logging -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.google.code.gson</groupId>
|
<groupId>log4j</groupId>
|
||||||
<artifactId>gson</artifactId>
|
<artifactId>log4j</artifactId>
|
||||||
<version>2.3.1</version>
|
<version>${log4j.version}</version>
|
||||||
<scope>test</scope>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
<version>${slf4j.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
<version>${slf4j.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Fasterxml -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>jackson-annotations</artifactId>
|
||||||
<version>${junit.version}</version>
|
<version>${fasterxml.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-core</artifactId>
|
||||||
|
<version>${fasterxml.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
|
<artifactId>jackson-databind</artifactId>
|
||||||
|
<version>${fasterxml.version}.1</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||||
|
<artifactId>jackson-datatype-guava</artifactId>
|
||||||
|
<version>${fasterxml.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
|
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||||
|
<version>${fasterxml.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Glassfish -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.glassfish.jersey.core</groupId>
|
||||||
|
<artifactId>jersey-server</artifactId>
|
||||||
|
<version>${glassfish.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.glassfish.jersey.connectors</groupId>
|
||||||
|
<artifactId>jersey-apache-connector</artifactId>
|
||||||
|
<version>${glassfish.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.glassfish.jersey.containers</groupId>
|
||||||
|
<artifactId>jersey-container-servlet-core</artifactId>
|
||||||
|
<version>${glassfish.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<!-- Needed for running HiveServer for Tests -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
|
<artifactId>jetty-all</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
|
<version>7.6.0.v20120127</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Avro -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro</artifactId>
|
||||||
|
<version>${avro.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.avro</groupId>
|
||||||
|
<artifactId>avro-mapred</artifactId>
|
||||||
|
<version>${avro.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
<version>${parquet.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-hadoop</artifactId>
|
||||||
|
<version>${parquet.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.parquet</groupId>
|
||||||
|
<artifactId>parquet-hive-bundle</artifactId>
|
||||||
|
<version>${parquet.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Parquet (Twitter) -->
|
||||||
|
<!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-hadoop-bundle</artifactId>
|
||||||
|
<version>1.6.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-hive-bundle</artifactId>
|
||||||
|
<version>1.6.0</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.twitter</groupId>
|
||||||
|
<artifactId>parquet-avro</artifactId>
|
||||||
|
<version>1.6.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.11</artifactId>
|
||||||
|
<version>${spark.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Spark (Packages) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.databricks</groupId>
|
||||||
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
|
<version>4.0.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Dropwizard Metrics -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-graphite</artifactId>
|
||||||
|
<version>${metrics.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.dropwizard.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
<version>${metrics.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
@@ -473,28 +614,119 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>log4j</groupId>
|
|
||||||
<artifactId>log4j</artifactId>
|
|
||||||
<version>${log4j.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<!-- Used by hudi-hive -->
|
|
||||||
<groupId>joda-time</groupId>
|
<groupId>joda-time</groupId>
|
||||||
<artifactId>joda-time</artifactId>
|
<artifactId>joda-time</artifactId>
|
||||||
<version>${joda.version}</version>
|
<version>${joda.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- we have to stay at <= 16.0, due to issues with HBase client -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>15.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>xerces</groupId>
|
||||||
|
<artifactId>xercesImpl</artifactId>
|
||||||
|
<version>2.9.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>xalan</groupId>
|
||||||
|
<artifactId>xalan</artifactId>
|
||||||
|
<version>2.7.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.rocksdb</groupId>
|
||||||
|
<artifactId>rocksdbjni</artifactId>
|
||||||
|
<version>5.17.2</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Apache Commons -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-codec</groupId>
|
||||||
|
<artifactId>commons-codec</artifactId>
|
||||||
|
<version>1.4</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
<version>2.6</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-lang</groupId>
|
||||||
|
<artifactId>commons-lang</artifactId>
|
||||||
|
<version>2.6</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-logging</groupId>
|
||||||
|
<artifactId>commons-logging</artifactId>
|
||||||
|
<version>1.2</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-dbcp</groupId>
|
||||||
|
<artifactId>commons-dbcp</artifactId>
|
||||||
|
<version>1.4</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-pool</groupId>
|
||||||
|
<artifactId>commons-pool</artifactId>
|
||||||
|
<version>1.4</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-configuration2</artifactId>
|
||||||
|
<version>2.1.1</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Httpcomponents -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>fluent-hc</artifactId>
|
||||||
|
<version>4.3.2</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpcore</artifactId>
|
||||||
|
<version>4.3.2</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
<version>4.3.2</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Jackson -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-core-asl</artifactId>
|
||||||
|
<version>${codehaus-jackson.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-mapper-asl</artifactId>
|
||||||
|
<version>${codehaus-jackson.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-jaxrs</artifactId>
|
||||||
|
<version>${codehaus-jackson.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-xc</artifactId>
|
||||||
|
<version>${codehaus-jackson.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hadoop -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-client</artifactId>
|
<artifactId>hadoop-client</artifactId>
|
||||||
<version>${hadoop.version}</version>
|
<version>${hadoop.version}</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
|
||||||
<groupId>com.fasterxml.jackson.*</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>javax.servlet</groupId>
|
<groupId>javax.servlet</groupId>
|
||||||
<artifactId>servlet-api</artifactId>
|
<artifactId>servlet-api</artifactId>
|
||||||
@@ -505,48 +737,11 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
<version>${parquet.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-hadoop</artifactId>
|
|
||||||
<version>${parquet.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro-mapred</artifactId>
|
|
||||||
<version>${avro.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- we have to stay at <= 16.0, due to issues with HBase client -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.google.guava</groupId>
|
|
||||||
<artifactId>guava</artifactId>
|
|
||||||
<version>15.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Hadoop Libraries -->
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
<version>${hadoop.version}</version>
|
<version>${hadoop.version}</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>jdk.tools</groupId>
|
|
||||||
<artifactId>jdk.tools</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.xml.bind</groupId>
|
|
||||||
<artifactId>jaxb-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
@@ -559,12 +754,6 @@
|
|||||||
<artifactId>hadoop-auth</artifactId>
|
<artifactId>hadoop-auth</artifactId>
|
||||||
<version>${hadoop.version}</version>
|
<version>${hadoop.version}</version>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>com.fasterxml.jackson.*</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
@@ -591,214 +780,36 @@
|
|||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.rocksdb</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>rocksdbjni</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
<version>5.17.2</version>
|
<classifier>tests</classifier>
|
||||||
|
<version>${hadoop.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-codec</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>commons-codec</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
<version>1.4</version>
|
<classifier>tests</classifier>
|
||||||
</dependency>
|
<version>${hadoop.version}</version>
|
||||||
<dependency>
|
|
||||||
<groupId>commons-lang</groupId>
|
|
||||||
<artifactId>commons-lang</artifactId>
|
|
||||||
<version>2.6</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-logging</groupId>
|
|
||||||
<artifactId>commons-logging</artifactId>
|
|
||||||
<version>1.2</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-io</groupId>
|
|
||||||
<artifactId>commons-io</artifactId>
|
|
||||||
<version>2.6</version>
|
|
||||||
</dependency>
|
|
||||||
<!-- Storage formats -->
|
|
||||||
<!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-hadoop-bundle</artifactId>
|
|
||||||
<version>1.6.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-hive-bundle</artifactId>
|
|
||||||
<version>1.6.0</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.twitter</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
<version>1.6.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-hive-bundle</artifactId>
|
|
||||||
<version>${parquet.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
<scope>provided</scope>
|
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>com.fasterxml.jackson.**</groupId>
|
<groupId>jdk.tools</groupId>
|
||||||
<artifactId>*</artifactId>
|
<artifactId>jdk.tools</artifactId>
|
||||||
</exclusion>
|
</exclusion>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
<groupId>javax.servlet</groupId>
|
<groupId>javax.xml.bind</groupId>
|
||||||
<artifactId>servlet-api</artifactId>
|
<artifactId>jaxb-api</artifactId>
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.spark</groupId>
|
|
||||||
<artifactId>spark-sql_2.11</artifactId>
|
|
||||||
<version>${spark.version}</version>
|
|
||||||
<scope>provided</scope>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>com.fasterxml.jackson.**</groupId>
|
|
||||||
<artifactId>*</artifactId>
|
|
||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<!-- HBase -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hbase</groupId>
|
<groupId>org.apache.hbase</groupId>
|
||||||
<artifactId>hbase-client</artifactId>
|
<artifactId>hbase-client</artifactId>
|
||||||
<version>${hbase.version}</version>
|
<version>${hbase.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<!-- Hive -->
|
||||||
<groupId>org.apache.avro</groupId>
|
|
||||||
<artifactId>avro</artifactId>
|
|
||||||
<version>${avro.version}</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Metrics -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
|
||||||
<artifactId>metrics-graphite</artifactId>
|
|
||||||
<version>${metrics.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>io.dropwizard.metrics</groupId>
|
|
||||||
<artifactId>metrics-core</artifactId>
|
|
||||||
<version>${metrics.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>xerces</groupId>
|
|
||||||
<artifactId>xercesImpl</artifactId>
|
|
||||||
<version>2.9.1</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>xalan</groupId>
|
|
||||||
<artifactId>xalan</artifactId>
|
|
||||||
<version>2.7.1</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-dbcp</groupId>
|
|
||||||
<artifactId>commons-dbcp</artifactId>
|
|
||||||
<version>1.4</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>commons-pool</groupId>
|
|
||||||
<artifactId>commons-pool</artifactId>
|
|
||||||
<version>1.4</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>fluent-hc</artifactId>
|
|
||||||
<version>4.3.2</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpcore</artifactId>
|
|
||||||
<version>4.3.2</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
|
||||||
<artifactId>httpclient</artifactId>
|
|
||||||
<version>4.3.6</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-api</artifactId>
|
|
||||||
<version>1.7.5</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.slf4j</groupId>
|
|
||||||
<artifactId>slf4j-log4j12</artifactId>
|
|
||||||
<version>1.7.5</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-configuration2</artifactId>
|
|
||||||
<version>2.1.1</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-annotations</artifactId>
|
|
||||||
<version>${fasterxml.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-core</artifactId>
|
|
||||||
<version>${fasterxml.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-databind</artifactId>
|
|
||||||
<version>${fasterxml.version}.1</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
|
||||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
|
||||||
<version>${fasterxml.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.codehaus.jackson</groupId>
|
|
||||||
<artifactId>jackson-core-asl</artifactId>
|
|
||||||
<version>${codehaus-jackson.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.codehaus.jackson</groupId>
|
|
||||||
<artifactId>jackson-mapper-asl</artifactId>
|
|
||||||
<version>${codehaus-jackson.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.codehaus.jackson</groupId>
|
|
||||||
<artifactId>jackson-jaxrs</artifactId>
|
|
||||||
<version>${codehaus-jackson.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.codehaus.jackson</groupId>
|
|
||||||
<artifactId>jackson-xc</artifactId>
|
|
||||||
<version>${codehaus-jackson.version}</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-service</artifactId>
|
<artifactId>hive-service</artifactId>
|
||||||
@@ -919,34 +930,28 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>com.google.code.gson</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>gson</artifactId>
|
||||||
<classifier>tests</classifier>
|
<version>2.3.1</version>
|
||||||
<version>${hadoop.version}</version>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<classifier>tests</classifier>
|
<version>${junit.version}</version>
|
||||||
<version>${hadoop.version}</version>
|
<scope>test</scope>
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>jdk.tools</groupId>
|
|
||||||
<artifactId>jdk.tools</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>javax.xml.bind</groupId>
|
|
||||||
<artifactId>jaxb-api</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.mockito</groupId>
|
<groupId>org.mockito</groupId>
|
||||||
<artifactId>mockito-all</artifactId>
|
<artifactId>mockito-all</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<version>1.10.19</version>
|
<version>${mockito.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<!--Used to test execution in task executor after de-serializing-->
|
<!--Used to test execution in task executor after de-serializing-->
|
||||||
<groupId>com.esotericsoftware</groupId>
|
<groupId>com.esotericsoftware</groupId>
|
||||||
@@ -954,24 +959,29 @@
|
|||||||
<version>4.0.0</version>
|
<version>4.0.0</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- Needed for running HiveServer for Tests -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
|
||||||
<artifactId>jetty-all</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
<version>7.6.0.v20120127</version>
|
|
||||||
</dependency>
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</dependencyManagement>
|
</dependencyManagement>
|
||||||
<repositories>
|
<repositories>
|
||||||
<repository>
|
<repository>
|
||||||
<id>Maven repository</id>
|
<id>Maven Central</id>
|
||||||
<url>https://central.maven.org/maven2/</url>
|
<name>Maven Repository</name>
|
||||||
|
<url>https://repo.maven.apache.org/maven2</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
<repository>
|
<repository>
|
||||||
<id>cloudera-repo-releases</id>
|
<id>cloudera-repo-releases</id>
|
||||||
<url>https://repository.cloudera.com/artifactory/public/</url>
|
<url>https://repository.cloudera.com/artifactory/public/</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</snapshots>
|
||||||
</repository>
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
|
|
||||||
|
|||||||
17
tools/run_travis_tests.sh
Executable file
17
tools/run_travis_tests.sh
Executable file
@@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
mode=$1
|
||||||
|
|
||||||
|
if [ "$mode" = "unit" ];
|
||||||
|
then
|
||||||
|
echo "Running Unit Tests"
|
||||||
|
mvn test -DskipITs=true -B
|
||||||
|
elif [ "$mode" = "integration" ];
|
||||||
|
then
|
||||||
|
echo "Running Integration Tests"
|
||||||
|
mvn verify -DskipUTs=true -B
|
||||||
|
else
|
||||||
|
echo "Unknown mode $mode"
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
Reference in New Issue
Block a user