diff --git a/.gitignore b/.gitignore
index f4f680ade..9e726f4f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ target/
*.war
*.ear
*.db
+*.patch
######################
# OSX
@@ -74,5 +75,4 @@ dependency-reduced-pom.xml
#######################################
# Docker
#######################################
-hoodie-integ-test/compose_env
-
+hudi-integ-test/compose_env
diff --git a/.travis.yml b/.travis.yml
index 2cad58f17..e34d3565c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,20 @@
language: java
jdk:
-- oraclejdk8
+ - oraclejdk8
sudo: required
env:
-- HUDI_QUIETER_LOGGING=1
+ - HUDI_QUIETER_LOGGING=1 TEST_SUITE=unit
+ - TEST_SUITE=integration
+install: true
services:
-- docker
+ - docker
cache:
directories:
- - "$HOME/.m2"
+ - "$HOME/.m2"
notifications:
slack:
rooms:
- secure: WNIZPBY//xf/xTJL1YUPzvPUDwjawaMM4IJ6IqxjRGcZCmuhNVu2XTJ3aL1g6X7ZcJKxJuwoU/TbSO8Dl6rgWSo/2OfyzBd4ks+hgeCsdycccTcvO8giQO1DOUGUSRdvUzOvKjWVK7iARYzQhoZawAYwI09UJLlwhYRCJ1IKc1ZksrEt964GeEmPyJbwMoZOJVUU84jJIAZPIpOFGTKM652FMermg9yaY2W5oSjDXaV98z0/mJV4Ry++J2v0fvoDs5HxkXYhZJP+dpWR82KDr6Q6LGL5/IlJ+b+IH3pF8LyKR4nCH6l1EZ8KpoFZapyYWYQpXMfQoF2K/JEQkpz1EqBCeEDSJ2+j1PPLhOWXd7ok4DsS26S8BP2ImvyXwua51THN1/r1fCGSIdxiQ5C8aeYmPCSr+oLChCVivEG2eeU34Z1nQJ5aDymNGeFE9qUUpjS0ETfFcjI/WQaA+FiYiPkDfeAoT1+6ySdY7l9gJhMygupILjq57IHbqx4nEr/8AB3Rqb8iIDTWDXgUBI9xKmty36zjIGcVOsCT/SGPccxvEJBXQk8uQqs/rDhaA/ErJPMLX/2b7ElSSObKFdjpMaxVvZIE6wvMLJpIYfChDoXwgfhN6zlAFZrEib7PFI4dGkS8u4wkkHkBS7C+uz2e92EhsAB+BIhUR1M3NQ33+Is=
on_pull_requests: false
+script:
+ tools/run_travis_tests.sh $TEST_SUITE
\ No newline at end of file
diff --git a/docker/demo/compaction.commands b/docker/demo/compaction.commands
new file mode 100644
index 000000000..dd8ffae31
--- /dev/null
+++ b/docker/demo/compaction.commands
@@ -0,0 +1,5 @@
+connect --path /user/hive/warehouse/stock_ticks_mor
+compactions show all
+compaction schedule
+compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
+
diff --git a/docker/demo/config/dfs-source.properties b/docker/demo/config/dfs-source.properties
new file mode 100644
index 000000000..3e17661da
--- /dev/null
+++ b/docker/demo/config/dfs-source.properties
@@ -0,0 +1,27 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+include=base.properties
+# Key fields, for kafka example
+hoodie.datasource.write.recordkey.field=key
+hoodie.datasource.write.partitionpath.field=date
+# Schema provider props (change to absolute path based on your installation)
+hoodie.deltastreamer.schemaprovider.source.schema.file=/var/demo/config/schema.avsc
+hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.avsc
+# DFS Source
+hoodie.deltastreamer.source.dfs.root=/usr/hive/data/input/
diff --git a/docker/demo/get_min_commit_time.sh b/docker/demo/get_min_commit_time.sh
new file mode 100755
index 000000000..11bc773c5
--- /dev/null
+++ b/docker/demo/get_min_commit_time.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+MIN_COMMIT_TIME=`hdfs dfs -ls -t /user/hive/warehouse/stock_ticks_cow/.hoodie/*.commit | head -1 | awk -F'/' ' { print $7 } ' | awk -F'.' ' { print $1 } '`
+echo $MIN_COMMIT_TIME;
diff --git a/docker/demo/hive-batch1.commands b/docker/demo/hive-batch1.commands
new file mode 100644
index 000000000..3176dea91
--- /dev/null
+++ b/docker/demo/hive-batch1.commands
@@ -0,0 +1,11 @@
+add jar ${hudi.hadoop.bundle};
+
+select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
+
+!quit
diff --git a/docker/demo/hive-batch2-after-compaction.commands b/docker/demo/hive-batch2-after-compaction.commands
new file mode 100644
index 000000000..4cf6eee00
--- /dev/null
+++ b/docker/demo/hive-batch2-after-compaction.commands
@@ -0,0 +1,9 @@
+add jar ${hudi.hadoop.bundle};
+
+select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
+
+!quit
diff --git a/docker/demo/hive-incremental.commands b/docker/demo/hive-incremental.commands
new file mode 100644
index 000000000..3c764ad9c
--- /dev/null
+++ b/docker/demo/hive-incremental.commands
@@ -0,0 +1,10 @@
+add jar ${hudi.hadoop.bundle};
+
+set hoodie.stock_ticks_cow.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_cow.consume.max.commits=3;
+set hoodie.stock_ticks_cow.consume.start.timestamp=${min.commit.time};
+
+select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
+
+!quit
+
diff --git a/docker/demo/hive-table-check.commands b/docker/demo/hive-table-check.commands
new file mode 100644
index 000000000..4484ec43d
--- /dev/null
+++ b/docker/demo/hive-table-check.commands
@@ -0,0 +1,10 @@
+add jar ${hudi.hadoop.bundle};
+show tables;
+
+show partitions stock_ticks_cow;
+show partitions stock_ticks_mor;
+show partitions stock_ticks_mor_rt;
+
+!quit
+
+
diff --git a/docker/demo/sparksql-batch1.commands b/docker/demo/sparksql-batch1.commands
new file mode 100644
index 000000000..e220cb0bd
--- /dev/null
+++ b/docker/demo/sparksql-batch1.commands
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+spark.sql("show tables").show(100, false)
+// Copy-On-Write table
+spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
+
+// Merge-On-Read table
+spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
+
+System.exit(0)
diff --git a/docker/demo/sparksql-batch2.commands b/docker/demo/sparksql-batch2.commands
new file mode 100644
index 000000000..521a4ebe6
--- /dev/null
+++ b/docker/demo/sparksql-batch2.commands
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ // Copy-On-Write table
+spark.sql("select symbol, max(ts) from stock_ticks_cow group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG'").show(100, false)
+
+// Merge-On-Read table
+spark.sql("select symbol, max(ts) from stock_ticks_mor group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor where symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
+
+System.exit(0)
diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands
new file mode 100644
index 000000000..ceec945f9
--- /dev/null
+++ b/docker/demo/sparksql-incremental.commands
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.hudi.DataSourceReadOptions;
+import org.apache.hudi.DataSourceWriteOptions;
+import org.apache.spark.sql.SaveMode;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.HoodieDataSourceHelpers;
+import org.apache.hadoop.fs.FileSystem;
+
+val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
+val beginInstantTime = HoodieDataSourceHelpers.listCommitsSince(fs, "/user/hive/warehouse/stock_ticks_cow", "00000").get(0)
+val hoodieIncViewDF = spark.read.format("org.apache.hudi").
+ option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY, DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL).
+ option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, beginInstantTime).
+ load("/user/hive/warehouse/stock_ticks_cow");
+hoodieIncViewDF.registerTempTable("stock_ticks_cow_incr")
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_incr where symbol = 'GOOG'").show(100, false);
+
+spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_incr").
+ write.format("org.apache.hudi").
+ option("hoodie.insert.shuffle.parallelism", "2").
+ option("hoodie.upsert.shuffle.parallelism","2").
+ option(DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_STORAGE_TYPE_OPT_VAL).
+ option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
+ option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
+ option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
+ option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+ option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor").
+ option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor").
+ option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
+ option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
+ option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
+ option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
+ option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
+ option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
+ mode(SaveMode.Overwrite).
+ save("/user/hive/warehouse/stock_ticks_derived_mor");
+
+spark.sql("show tables").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)
+
+System.exit(0);
\ No newline at end of file
diff --git a/hudi-cli/hudi-cli.sh b/hudi-cli/hudi-cli.sh
index 188c16331..8b12f6e61 100755
--- a/hudi-cli/hudi-cli.sh
+++ b/hudi-cli/hudi-cli.sh
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-HUDI_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
+HOODIE_JAR=`ls $DIR/target/hudi-cli-*-SNAPSHOT.jar | grep -v source | grep -v javadoc`
if [ -z "$HADOOP_CONF_DIR" ]; then
echo "setting hadoop conf dir"
HADOOP_CONF_DIR="/etc/hadoop/conf"
@@ -13,5 +13,4 @@ fi
if [ -z "$CLIENT_JAR" ]; then
echo "client jar location not set"
fi
-echo "java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap"
-java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HUDI_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap
+java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 0ba33dcba..3b66ff2f2 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -29,8 +29,6 @@
1.2.0.RELEASE
org.springframework.shell.Bootstrap
- 1.2.17
- 4.10
${project.basedir}/src/main/resources/META-INF
@@ -61,7 +59,7 @@
net.alchim31.maven
scala-maven-plugin
- 3.2.1
+ ${scala-maven-plugin.version}
@@ -133,23 +131,42 @@
-
+
org.scala-lang
scala-library
${scala.version}
+
- org.springframework.shell
- spring-shell
- ${spring.shell.version}
+ org.apache.hudi
+ hudi-client
+ ${project.version}
- de.vandermeer
- asciitable
- 0.2.5
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+
+ org.apache.hudi
+ hudi-hive
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-utilities
+ ${project.version}
+
+
+
+
+ log4j
+ log4j
+
+
+
org.apache.spark
spark-core_2.11
@@ -159,6 +176,24 @@
spark-sql_2.11
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+
+ org.springframework.shell
+ spring-shell
+ ${spring.shell.version}
+
+
+
+ de.vandermeer
+ asciitable
+ 0.2.5
+
+
com.jakewharton.fliptables
fliptables
@@ -166,60 +201,25 @@
- log4j
- log4j
- ${log4j.version}
-
-
-
- org.apache.hudi
- hudi-hive
- ${project.version}
-
-
-
- org.apache.hudi
- hudi-client
- ${project.version}
+ joda-time
+ joda-time
+
org.apache.hadoop
hadoop-common
-
org.apache.hadoop
hadoop-hdfs
-
- org.apache.hudi
- hudi-common
- ${project.version}
-
junit
junit-dep
- ${junit.version}
+ ${junit-dep.version}
test
-
-
- commons-dbcp
- commons-dbcp
-
-
- joda-time
- joda-time
- 2.9.6
-
-
- org.apache.hudi
- hudi-utilities
- ${project.version}
-
-
-
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
index 592d52864..6a32edc99 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
@@ -217,12 +217,23 @@ public class CompactionCommand implements CommandMarker {
final String sparkMemory,
@CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
final String retry,
- @CliOption(key = "compactionInstant", mandatory = true, help = "Base path for the target hoodie dataset")
- final String compactionInstantTime) throws Exception {
+ @CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
+ String compactionInstantTime) throws Exception {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
+ if (null == compactionInstantTime) {
+ // pick outstanding one with lowest timestamp
+ Option firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
+ .filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
+ .equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
+ if (!firstPendingInstant.isPresent()) {
+ return "NO PENDING COMPACTION TO RUN";
+ }
+ compactionInstantTime = firstPendingInstant.get();
+ }
+
String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
diff --git a/hudi-client/pom.xml b/hudi-client/pom.xml
index 95959ef5c..5e1512d8d 100644
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -68,6 +68,7 @@
+
org.apache.hudi
hudi-common
@@ -78,6 +79,71 @@
hudi-timeline-service
${project.version}
+
+
+
+ log4j
+ log4j
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+ org.apache.parquet
+ parquet-hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+
+
+ io.dropwizard.metrics
+ metrics-graphite
+
+
+ io.dropwizard.metrics
+ metrics-core
+
+
+
+ com.google.guava
+ guava
+
+
+
+ com.beust
+ jcommander
+ 1.48
+
+
+
+ org.htrace
+ htrace-core
+ 3.0.4
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ javax.servlet
+ *
+
+
+
org.apache.hadoop
hadoop-hdfs
@@ -117,6 +183,14 @@
+
+
+
+ org.apache.hbase
+ hbase-client
+
+
+
org.apache.hudi
hudi-common
@@ -126,61 +200,35 @@
test
- io.dropwizard.metrics
- metrics-graphite
-
-
- io.dropwizard.metrics
- metrics-core
-
-
- com.beust
- jcommander
- 1.48
+ org.apache.hudi
+ hudi-hadoop-mr
+ ${project.version}
+ test
-
+
- log4j
- log4j
-
-
-
- org.apache.hadoop
- hadoop-client
+ org.apache.hbase
+ hbase-testing-util
+ ${hbase.version}
+ test
- javax.servlet
+ org.codehaus.jackson
+ jackson-mapper-asl
+
+
+ org.codehaus.jackson
+ jackson-core-asl
+
+
+ javax.xml.bind
*
-
- org.apache.parquet
- parquet-avro
-
-
-
- org.apache.parquet
- parquet-hadoop
-
-
-
- com.google.guava
- guava
-
-
-
- org.apache.spark
- spark-core_2.11
-
-
-
- org.apache.spark
- spark-sql_2.11
-
-
+
${hive.groupid}
hive-exec
@@ -191,46 +239,7 @@
org.mockito
mockito-all
- 1.10.19
test
-
- org.apache.hudi
- hudi-hadoop-mr
- ${project.version}
- test
-
-
-
- org.apache.hbase
- hbase-client
-
-
-
- org.htrace
- htrace-core
- 3.0.4
-
-
-
- org.apache.hbase
- hbase-testing-util
- 1.2.3
- test
-
-
- org.codehaus.jackson
- jackson-mapper-asl
-
-
- org.codehaus.jackson
- jackson-core-asl
-
-
- javax.xml.bind
- *
-
-
-
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index fdde6022f..f418ab64d 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -43,7 +43,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 2.5
+ ${maven-jar-plugin.version}
@@ -79,77 +79,20 @@
-
- org.rocksdb
- rocksdbjni
-
-
- org.apache.avro
- avro
-
-
- org.apache.hadoop
- hadoop-client
-
-
- javax.servlet
- *
-
-
-
-
- junit
- junit
- ${junit.version}
- test
-
+
com.fasterxml.jackson.core
jackson-annotations
- ${fasterxml.version}
com.fasterxml.jackson.core
jackson-databind
+
+
- org.apache.parquet
- parquet-avro
- ${parquet.version}
-
-
- org.mockito
- mockito-all
- 1.10.19
- test
-
-
- org.apache.hadoop
- hadoop-hdfs
- tests
-
-
- org.apache.hadoop
- hadoop-common
- tests
-
-
- org.apache.httpcomponents
- httpclient
- 4.5.4
-
-
- commons-codec
- commons-codec
-
-
- org.apache.httpcomponents
- fluent-hc
- 4.5.4
-
-
- com.esotericsoftware
- kryo
+ org.apache.avro
+ avro
org.apache.avro
@@ -161,21 +104,91 @@
+
+
- com.github.stefanbirkner
- system-rules
- 1.16.0
- test
+ org.apache.parquet
+ parquet-avro
+
+
+
+
+ com.twitter.common
+ objectsize
+ 0.0.12
+
+
+
+
+ commons-codec
+ commons-codec
+
+
+
+
+ org.apache.httpcomponents
+ fluent-hc
- com.twitter.common
- objectsize
- 0.0.12
+ org.apache.httpcomponents
+ httpclient
+
+
+
+ org.rocksdb
+ rocksdbjni
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ javax.servlet
+ *
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ tests
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ tests
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+
+ com.esotericsoftware
+ kryo
+ test
com.esotericsoftware
kryo-shaded
4.0.2
+
+
+ com.github.stefanbirkner
+ system-rules
+ 1.16.0
+ test
+
diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml
index cc6cfa2c7..66f2fe65a 100644
--- a/hudi-hadoop-mr/pom.xml
+++ b/hudi-hadoop-mr/pom.xml
@@ -30,32 +30,52 @@
+
org.apache.hudi
hudi-common
${project.version}
+
+
- org.apache.hudi
- hudi-common
- ${project.version}
- tests
- test-jar
- test
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+
+
+ com.twitter
+ parquet-avro
+
+
+ com.twitter
+ parquet-hadoop-bundle
+
+
+
+
+ commons-logging
+ commons-logging
+
+ com.twitter.common
+ objectsize
+ 0.0.12
+
+
+
org.apache.hadoop
hadoop-common
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
-
org.apache.hadoop
hadoop-auth
@@ -64,52 +84,41 @@
org.apache.hadoop
hadoop-hdfs
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+
+
+
${hive.groupid}
hive-jdbc
- ${hive.version}
-
-
- commons-logging
- commons-logging
-
-
${hive.groupid}
hive-exec
- ${hive.version}
+
+
- commons-logging
- commons-logging
-
-
- org.apache.parquet
- parquet-avro
-
-
- com.twitter
- parquet-avro
-
-
- com.twitter
- parquet-hadoop-bundle
-
-
- com.twitter.common
- objectsize
- 0.0.12
-
-
- org.apache.avro
- avro
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+ tests
+ test-jar
+ test
+
com.esotericsoftware
kryo
test
+
junit
junit
diff --git a/hudi-hive/pom.xml b/hudi-hive/pom.xml
index b9f6e01f7..758860d34 100644
--- a/hudi-hive/pom.xml
+++ b/hudi-hive/pom.xml
@@ -31,6 +31,81 @@
+
+
+
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-hadoop-mr
+ ${project.version}
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+ com.twitter
+ parquet-avro
+
+
+
+ com.google.guava
+ guava
+
+
+
+ org.apache.thrift
+ libthrift
+ ${thrift.version}
+
+
+
+ joda-time
+ joda-time
+
+
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ commons-pool
+ commons-pool
+
+
+ commons-io
+ commons-io
+
+
+
+ com.beust
+ jcommander
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+
org.apache.hadoop
hadoop-common
@@ -48,66 +123,17 @@
hadoop-auth
- com.google.guava
- guava
+ org.apache.hadoop
+ hadoop-common
+ tests
- org.apache.thrift
- libthrift
- ${thrift.version}
+ org.apache.hadoop
+ hadoop-hdfs
+ tests
-
- joda-time
- joda-time
-
-
-
-
- commons-dbcp
- commons-dbcp
-
-
-
- commons-pool
- commons-pool
-
-
-
- commons-io
- commons-io
-
-
-
-
- org.slf4j
- slf4j-api
-
-
- org.slf4j
- slf4j-log4j12
-
-
-
- com.beust
- jcommander
-
-
-
- org.apache.httpcomponents
- httpcore
-
-
-
- org.apache.httpcomponents
- httpclient
-
-
-
-
- junit
- junit
-
+
${hive.groupid}
hive-service
@@ -139,64 +165,53 @@
${hive.version}
+
- org.apache.hadoop
- hadoop-hdfs
+ org.apache.hudi
+ hudi-common
+ ${project.version}
tests
-
-
- org.apache.hadoop
- hadoop-common
- tests
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
- test
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-core
test
+
org.mockito
mockito-all
test
-
- com.twitter
- parquet-avro
-
-
- org.apache.hudi
- hudi-hadoop-mr
- ${project.version}
-
-
- org.apache.hudi
- hudi-common
- ${project.version}
-
-
- org.apache.hudi
- hudi-common
- ${project.version}
- tests
- test
-
com.esotericsoftware.kryo
kryo
2.21
test
+
+
+ junit
+ junit
+ test
+
+
org.eclipse.jetty.aggregate
jetty-all
test
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ test
+
+
@@ -213,7 +228,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 2.5
+ ${maven-jar-plugin.version}
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 6b7e29e78..30dd42c9b 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -9,32 +9,57 @@
hudi-integ-test
4.0.0
-
- org.glassfish.jersey.connectors
- jersey-apache-connector
- 2.17
-
+
+
org.glassfish.jersey.core
jersey-server
- 2.17
+
+
+ org.glassfish.jersey.connectors
+ jersey-apache-connector
org.glassfish.jersey.containers
jersey-container-servlet-core
- 2.17
+
+
+
+
+ com.github.docker-java
+ docker-java
+ 3.1.2
+ test
+
+
+
org.apache.hudi
hudi-spark
${project.version}
-
-
- org.glassfish.**
- *
-
-
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+ org.apache.hudi
+ hudi-hadoop-sparkworker-docker
+ ${project.version}
+ pom
+ import
+
+
+
org.apache.hudi
hudi-common
@@ -43,12 +68,6 @@
test-jar
test
-
- org.awaitility
- awaitility
- 3.1.2
- test
-
org.apache.hudi
hudi-spark
@@ -56,56 +75,39 @@
tests
test-jar
test
-
-
- org.glassfish.**
- *
-
-
+
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ test
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ test
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-guava
+ test
+
+
+
+ org.awaitility
+ awaitility
+ 3.1.2
+ test
+
+
com.google.guava
guava
20.0
test
-
- com.fasterxml.jackson.core
- jackson-annotations
- 2.6.4
- test
-
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.6.4
- test
-
-
- com.fasterxml.jackson.datatype
- jackson-datatype-guava
- 2.9.4
- test
-
-
- com.github.docker-java
- docker-java
- 3.1.0-rc-3
- test
-
-
- org.glassfish.**
- *
-
-
-
-
- org.apache.hudi
- hudi-hadoop-sparkworker-docker
- ${project.version}
- pom
- import
-
+
junit
junit
@@ -150,12 +152,25 @@
org.apache.maven.plugins
maven-failsafe-plugin
2.22.0
+
+
+ **/ITT*.java
+
+
+ integration-test
integration-test
+
+ verify
+ verify
+
+ verify
+
+
@@ -179,7 +194,7 @@
down
- integration-test
+ post-integration-test
down
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index 9751c11ff..c12cf6d05 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -31,14 +31,14 @@ import com.github.dockerjava.core.DockerClientBuilder;
import com.github.dockerjava.core.DockerClientConfig;
import com.github.dockerjava.core.command.ExecStartResultCallback;
import com.github.dockerjava.jaxrs.JerseyDockerCmdExecFactory;
-import com.google.common.collect.ImmutableList;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
-import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hudi.common.util.collection.Pair;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.junit.Assert;
@@ -52,38 +52,64 @@ public abstract class ITTestBase {
protected static final String ADHOC_2_CONTAINER = "/adhoc-2";
protected static final String HIVESERVER = "/hiveserver";
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
- protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hoodie-spark/run_hoodie_app.sh";
+ protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_app.sh";
protected static final String HUDI_HADOOP_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
protected static final String HUDI_HIVE_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hive-bundle.jar";
protected static final String HUDI_SPARK_BUNDLE =
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-spark-bundle.jar";
+ protected static final String HUDI_UTILITIES_BUNDLE =
+ HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-utilities.jar";
protected static final String HIVE_SERVER_JDBC_URL = "jdbc:hive2://hiveserver:10000";
+ protected static final String HADOOP_CONF_DIR = "/etc/hadoop";
+
// Skip these lines when capturing output from hive
- protected static final Integer SLF4J_WARNING_LINE_COUNT_IN_HIVE_CMD = 9;
private static final String DEFAULT_DOCKER_HOST = "unix:///var/run/docker.sock";
private static final String OVERRIDDEN_DOCKER_HOST = System.getenv("DOCKER_HOST");
protected DockerClient dockerClient;
protected Map runningContainers;
- protected static String[] getHiveConsoleCommand(String rawCommand) {
+ static String[] getHiveConsoleCommand(String rawCommand) {
String jarCommand = "add jar " + HUDI_HADOOP_BUNDLE + ";";
String fullCommand = jarCommand + rawCommand;
- List cmd = new ImmutableList.Builder().add("hive")
- .add("--hiveconf")
- .add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat")
- .add("--hiveconf")
- .add("hive.stats.autogather=false")
- .add("-e")
- .add("\"" + fullCommand + "\"")
- .build();
+ List cmd = new ArrayList<>();
+ cmd.add("hive");
+ cmd.add("--hiveconf");
+ cmd.add("hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
+ cmd.add("--hiveconf");
+ cmd.add("hive.stats.autogather=false");
+ cmd.add("-e");
+ cmd.add("\"" + fullCommand + "\"");
return cmd.stream().toArray(String[]::new);
}
+ private static String getHiveConsoleCommandFile(String commandFile, String additionalVar) {
+ StringBuilder builder = new StringBuilder()
+ .append("beeline -u " + HIVE_SERVER_JDBC_URL)
+ .append(" --hiveconf hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat ")
+ .append(" --hiveconf hive.stats.autogather=false ")
+ .append(" --hivevar hudi.hadoop.bundle=" + HUDI_HADOOP_BUNDLE);
+
+ if (additionalVar != null) {
+ builder.append(" --hivevar " + additionalVar + " ");
+ }
+ return builder.append(" -f ").append(commandFile).toString();
+ }
+
+ static String getSparkShellCommand(String commandFile) {
+ return new StringBuilder()
+ .append("spark-shell --jars ").append(HUDI_SPARK_BUNDLE)
+ .append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
+ .append(" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
+ .append(" --packages com.databricks:spark-avro_2.11:4.0.0 ")
+ .append(" -i ").append(commandFile)
+ .toString();
+ }
+
@Before
- public void init() throws IOException {
+ public void init() {
String dockerHost = (OVERRIDDEN_DOCKER_HOST != null) ? OVERRIDDEN_DOCKER_HOST : DEFAULT_DOCKER_HOST;
//Assuming insecure docker engine
DockerClientConfig config = DefaultDockerClientConfig.createDefaultConfigBuilder()
@@ -104,7 +130,7 @@ public abstract class ITTestBase {
List containerList = dockerClient.listContainersCmd().exec();
for (Container c : containerList) {
if (!c.getState().equalsIgnoreCase("running")) {
- System.out.println("Container : " + Arrays.toString(c.getNames())
+ LOG.info("Container : " + Arrays.toString(c.getNames())
+ "not in running state, Curr State :" + c.getState());
return false;
}
@@ -114,10 +140,12 @@ public abstract class ITTestBase {
return true;
}
- protected TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
- boolean expectedToSucceed)
- throws Exception {
- LOG.info("Executing command (" + Arrays.toString(command) + ") in container " + containerName);
+ private String singleSpace(String str) {
+ return str.replaceAll("[\\s]+"," ");
+ }
+
+ private TestExecStartResultCallback executeCommandInDocker(String containerName, String[] command,
+ boolean expectedToSucceed) throws Exception {
Container sparkWorkerContainer = runningContainers.get(containerName);
ExecCreateCmd cmd = dockerClient.execCreateCmd(sparkWorkerContainer.getId())
.withCmd(command).withAttachStdout(true).withAttachStderr(true);
@@ -128,12 +156,10 @@ public abstract class ITTestBase {
dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false)
.exec(callback).awaitCompletion();
int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
- LOG.info("Exit code for command (" + Arrays.toString(command) + ") is " + exitCode);
- if (exitCode != 0) {
- LOG.error("Command (" + Arrays.toString(command) + ") failed.");
- LOG.error("Stdout is :" + callback.getStdout().toString());
- LOG.error("Stderr is :" + callback.getStderr().toString());
- }
+ LOG.info("Exit code for command : " + exitCode);
+ LOG.error("\n\n ###### Stdout #######\n" + callback.getStdout().toString());
+ LOG.error("\n\n ###### Stderr #######\n" + callback.getStderr().toString());
+
if (expectedToSucceed) {
Assert.assertTrue("Command (" + Arrays.toString(command)
+ ") expected to succeed. Exit (" + exitCode + ")", exitCode == 0);
@@ -145,6 +171,71 @@ public abstract class ITTestBase {
return callback;
}
+ void executeCommandStringsInDocker(String containerName, List commands) throws Exception {
+ for (String cmd : commands) {
+ executeCommandStringInDocker(containerName, cmd, true);
+ }
+ }
+
+ TestExecStartResultCallback executeCommandStringInDocker(String containerName, String cmd,
+ boolean expectedToSucceed) throws Exception {
+ LOG.info("\n\n#################################################################################################");
+ LOG.info("Container : " + containerName + ", Running command :" + cmd);
+ LOG.info("\n#################################################################################################");
+
+ String[] cmdSplits = singleSpace(cmd).split(" ");
+ return executeCommandInDocker(containerName, cmdSplits, expectedToSucceed);
+ }
+
+ Pair executeHiveCommand(String hiveCommand) throws Exception {
+
+ LOG.info("\n\n#################################################################################################");
+ LOG.info("Running hive command :" + hiveCommand);
+ LOG.info("\n#################################################################################################");
+
+ String[] hiveCmd = getHiveConsoleCommand(hiveCommand);
+ TestExecStartResultCallback callback = executeCommandInDocker(HIVESERVER, hiveCmd, true);
+ return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
+ }
+
+ Pair executeHiveCommandFile(String commandFile) throws Exception {
+ return executeHiveCommandFile(commandFile, null);
+ }
+
+ Pair executeHiveCommandFile(String commandFile, String additionalVar) throws Exception {
+ String hiveCmd = getHiveConsoleCommandFile(commandFile, additionalVar);
+ TestExecStartResultCallback callback = executeCommandStringInDocker(HIVESERVER, hiveCmd, true);
+ return Pair.of(callback.getStdout().toString().trim(), callback.getStderr().toString().trim());
+ }
+
+ Pair executeSparkSQLCommand(String commandFile, boolean expectedToSucceed) throws Exception {
+ String sparkShellCmd = getSparkShellCommand(commandFile);
+ TestExecStartResultCallback callback = executeCommandStringInDocker(ADHOC_1_CONTAINER,
+ sparkShellCmd, expectedToSucceed);
+ return Pair.of(callback.getStdout().toString(), callback.getStderr().toString());
+ }
+
+ void assertStdOutContains(Pair stdOutErr, String expectedOutput) {
+ assertStdOutContains(stdOutErr, expectedOutput, 1);
+ }
+
+ void assertStdOutContains(Pair stdOutErr, String expectedOutput, int times) {
+ // this is so that changes in padding don't affect comparison
+ String stdOutSingleSpaced = singleSpace(stdOutErr.getLeft()).replaceAll(" ", "");
+ expectedOutput = singleSpace(expectedOutput).replaceAll(" ", "");
+
+ int lastIndex = 0;
+ int count = 0;
+ while(lastIndex != -1){
+ lastIndex = stdOutSingleSpaced.indexOf(expectedOutput, lastIndex);
+ if(lastIndex != -1){
+ count ++;
+ lastIndex += expectedOutput.length();
+ }
+ }
+ Assert.assertEquals("Did not find output the expected number of times", times, count);
+ }
+
public class TestExecStartResultCallback extends ExecStartResultCallback {
// Storing the reference in subclass to expose to clients
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
new file mode 100644
index 000000000..ee3855a83
--- /dev/null
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.integ;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.hudi.common.util.collection.Pair;
+import java.util.List;
+import org.junit.Test;
+
+/**
+ * Goes through steps described in https://hudi.incubator.apache.org/docker_demo.html
+ *
+ * To run this as a standalone test in the IDE or command line. First bring up the demo setup using
+ * `docker/setup_demo.sh` and then run the test class as you would do normally.
+ */
+public class ITTestHoodieDemo extends ITTestBase {
+
+ private static String HDFS_DATA_DIR = "/usr/hive/data/input";
+ private static String HDFS_BATCH_PATH1 = HDFS_DATA_DIR + "/" + "batch_1.json";
+ private static String HDFS_BATCH_PATH2 = HDFS_DATA_DIR + "/" + "batch_2.json";
+
+ private static String INPUT_BATCH_PATH1 = HOODIE_WS_ROOT +
+ "/docker/demo/data/batch_1.json";
+ private static String INPUT_BATCH_PATH2 = HOODIE_WS_ROOT +
+ "/docker/demo/data/batch_2.json";
+
+ private static String COW_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow";
+ private static String MOR_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor";
+ private static String COW_TABLE_NAME = "stock_ticks_cow";
+ private static String MOR_TABLE_NAME = "stock_ticks_mor";
+
+ private static String DEMO_CONTAINER_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/setup_demo_container.sh";
+ private static String MIN_COMMIT_TIME_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/get_min_commit_time.sh";
+ private static String HUDI_CLI_TOOL = HOODIE_WS_ROOT + "/hudi-cli/hudi-cli.sh";
+ private static String COMPACTION_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/compaction.commands";
+ private static String SPARKSQL_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch1.commands";
+ private static String SPARKSQL_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch2.commands";
+ private static String SPARKSQL_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-incremental.commands";
+ private static String HIVE_TBLCHECK_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-table-check.commands";
+ private static String HIVE_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch1.commands";
+ private static String HIVE_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-batch2-after-compaction.commands";
+ private static String HIVE_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/hive-incremental.commands";
+
+
+ private static String HIVE_SYNC_CMD_FMT = " --enable-hive-sync "
+ + " --hoodie-conf hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000 "
+ + " --hoodie-conf hoodie.datasource.hive_sync.username=hive "
+ + " --hoodie-conf hoodie.datasource.hive_sync.password=hive "
+ + " --hoodie-conf hoodie.datasource.hive_sync.partition_fields=%s "
+ + " --hoodie-conf hoodie.datasource.hive_sync.database=default "
+ + " --hoodie-conf hoodie.datasource.hive_sync.table=%s";
+
+
+ @Test
+ public void testDemo() throws Exception {
+ setupDemo();
+
+ // batch 1
+ ingestFirstBatchAndHiveSync();
+ testHiveAfterFirstBatch();
+ testSparkSQLAfterFirstBatch();
+
+ // batch 2
+ ingestSecondBatchAndHiveSync();
+ testHiveAfterSecondBatch();
+ testSparkSQLAfterSecondBatch();
+ testIncrementalHiveQuery();
+ testIncrementalSparkSQLQuery();
+
+ // compaction
+ scheduleAndRunCompaction();
+ testHiveAfterSecondBatchAfterCompaction();
+ testIncrementalHiveQueryAfterCompaction();
+ }
+
+ private void setupDemo() throws Exception {
+ List cmds = new ImmutableList.Builder()
+ .add("hdfs dfsadmin -safemode wait") // handle NN going into safe mode at times
+ .add("hdfs dfs -mkdir -p " + HDFS_DATA_DIR)
+ .add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH1 + " " + HDFS_BATCH_PATH1)
+ .add("/bin/bash " + DEMO_CONTAINER_SCRIPT)
+ .build();
+ executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
+ }
+
+ private void ingestFirstBatchAndHiveSync() throws Exception {
+ List cmds = new ImmutableList.Builder()
+ .add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ + HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
+ + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ + " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ + " --props /var/demo/config/dfs-source.properties "
+ + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ + String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
+ .add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ + HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
+ + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ + " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ + " --props /var/demo/config/dfs-source.properties "
+ + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ + " --disable-compaction "
+ + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
+ .build();
+
+ executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
+ }
+
+ private void testHiveAfterFirstBatch() throws Exception {
+ Pair stdOutErrPair = executeHiveCommandFile(HIVE_TBLCHECK_COMMANDS);
+ assertStdOutContains(stdOutErrPair, "| stock_ticks_cow |");
+ assertStdOutContains(stdOutErrPair, "| stock_ticks_mor |");
+ assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt |");
+
+ assertStdOutContains(stdOutErrPair,
+ "| partition |\n"
+ + "+----------------+\n"
+ + "| dt=2018-08-31 |\n"
+ + "+----------------+\n", 3);
+
+ stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
+ assertStdOutContains(stdOutErrPair, "| symbol | _c1 |\n"
+ + "+---------+----------------------+\n"
+ + "| GOOG | 2018-08-31 10:29:00 |\n", 3);
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | ts | volume | open | close |\n"
+ + "+---------+----------------------+---------+------------+-----------+\n"
+ + "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ + "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n", 3);
+ }
+
+ private void testSparkSQLAfterFirstBatch() throws Exception {
+ Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
+ assertStdOutContains(stdOutErrPair,
+ "|default |stock_ticks_cow |false |\n"
+ + "|default |stock_ticks_mor |false |\n"
+ + "|default |stock_ticks_mor_rt |false |");
+ assertStdOutContains(stdOutErrPair,
+ "+------+-------------------+\n"
+ + "|GOOG |2018-08-31 10:29:00|\n"
+ + "+------+-------------------+", 3);
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|", 3);
+ }
+
+ private void ingestSecondBatchAndHiveSync() throws Exception {
+ List cmds = new ImmutableList.Builder()
+ .add("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2)
+ .add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ + HUDI_UTILITIES_BUNDLE + " --storage-type COPY_ON_WRITE "
+ + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ + " --target-base-path " + COW_BASE_PATH + " --target-table " + COW_TABLE_NAME
+ + " --props /var/demo/config/dfs-source.properties "
+ + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ + String.format(HIVE_SYNC_CMD_FMT, "dt", COW_TABLE_NAME))
+ .add("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer "
+ + HUDI_UTILITIES_BUNDLE + " --storage-type MERGE_ON_READ "
+ + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+ + " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
+ + " --props /var/demo/config/dfs-source.properties "
+ + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+ + " --disable-compaction "
+ + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME))
+ .build();
+ executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
+ }
+
+ private void testHiveAfterSecondBatch() throws Exception {
+ Pair stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | _c1 |\n"
+ + "+---------+----------------------+\n"
+ + "| GOOG | 2018-08-31 10:29:00 |\n");
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | _c1 |\n"
+ + "+---------+----------------------+\n"
+ + "| GOOG | 2018-08-31 10:59:00 |\n", 2);
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | ts | volume | open | close |\n"
+ + "+---------+----------------------+---------+------------+-----------+\n"
+ + "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ + "| GOOG | 2018-08-31 10:29:00 | 3391 | 1230.1899 | 1230.085 |\n");
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | ts | volume | open | close |\n"
+ + "+---------+----------------------+---------+------------+-----------+\n"
+ + "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ + "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |\n", 2);
+ }
+
+ private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
+ Pair stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | _c1 |\n"
+ + "+---------+----------------------+\n"
+ + "| GOOG | 2018-08-31 10:59:00 |", 2);
+ assertStdOutContains(stdOutErrPair, "| symbol | ts | volume | open | close |\n"
+ + "+---------+----------------------+---------+------------+-----------+\n"
+ + "| GOOG | 2018-08-31 09:59:00 | 6330 | 1230.5 | 1230.02 |\n"
+ + "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |", 2);
+ }
+
+ private void testSparkSQLAfterSecondBatch() throws Exception {
+ Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
+ assertStdOutContains(stdOutErrPair,
+ "+------+-------------------+\n"
+ + "|GOOG |2018-08-31 10:59:00|\n"
+ + "+------+-------------------+", 2);
+
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 09:59:00|6330 |1230.5 |1230.02 |", 3);
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|", 2);
+ assertStdOutContains(stdOutErrPair,
+ "+------+-------------------+\n"
+ + "|GOOG |2018-08-31 10:29:00|\n"
+ + "+------+-------------------+");
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:29:00|3391 |1230.1899|1230.085|");
+ }
+
+ private void testIncrementalHiveQuery() throws Exception {
+ String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
+ .getStdout().toString();
+ Pair stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
+ "min.commit.time=" + minCommitTime +"`");
+ assertStdOutContains(stdOutErrPair, "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
+ }
+
+ private void testIncrementalHiveQueryAfterCompaction() throws Exception {
+ String minCommitTime = executeCommandStringInDocker(ADHOC_2_CONTAINER, MIN_COMMIT_TIME_SCRIPT, true)
+ .getStdout().toString();
+ Pair stdOutErrPair = executeHiveCommandFile(HIVE_INCREMENTAL_COMMANDS,
+ "min.commit.time=" + minCommitTime +"`");
+ assertStdOutContains(stdOutErrPair,
+ "| symbol | ts | volume | open | close |\n"
+ + "+---------+----------------------+---------+------------+-----------+\n"
+ + "| GOOG | 2018-08-31 10:59:00 | 9021 | 1227.1993 | 1227.215 |");
+ }
+
+ private void testIncrementalSparkSQLQuery() throws Exception {
+ Pair stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
+ assertStdOutContains(stdOutErrPair, "|GOOG |2018-08-31 10:59:00|9021 |1227.1993|1227.215|");
+ assertStdOutContains(stdOutErrPair,
+ "|default |stock_ticks_cow |false |\n"
+ + "|default |stock_ticks_derived_mor |false |\n"
+ + "|default |stock_ticks_derived_mor_rt|false |\n"
+ + "|default |stock_ticks_mor |false |\n"
+ + "|default |stock_ticks_mor_rt |false |\n"
+ + "| |stock_ticks_cow_incr |true |");
+ assertStdOutContains(stdOutErrPair,
+ "|count(1)|\n"
+ + "+--------+\n"
+ + "|99 |", 2);
+ }
+
+ private void scheduleAndRunCompaction() throws Exception {
+ executeCommandStringInDocker(ADHOC_1_CONTAINER, HUDI_CLI_TOOL + " --cmdfile " + COMPACTION_COMMANDS, true);
+ }
+}
\ No newline at end of file
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
index 89eb56960..c8e112624 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java
@@ -18,7 +18,7 @@
package org.apache.hudi.integ;
-import java.util.Arrays;
+import org.apache.hudi.common.util.collection.Pair;
import org.junit.Assert;
import org.junit.Test;
@@ -33,17 +33,6 @@ public class ITTestHoodieSanity extends ITTestBase {
NON_PARTITIONED,
}
- @Test
- public void testRunEcho() throws Exception {
- String[] cmd = new String[]{"echo", "Happy Testing"};
- TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
- cmd, true);
- String stdout = callback.getStdout().toString();
- String stderr = callback.getStderr().toString();
- LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
- LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
- }
-
@Test
/**
* A basic integration test that runs HoodieJavaApp to create a sample COW Hoodie with single partition key
@@ -53,6 +42,7 @@ public class ITTestHoodieSanity extends ITTestBase {
public void testRunHoodieJavaAppOnSinglePartitionKeyCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_single_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.SINGLE_KEY_PARTITIONED);
+ executeHiveCommand("drop table if exists " + hiveTableName);
}
@Test
@@ -64,6 +54,7 @@ public class ITTestHoodieSanity extends ITTestBase {
public void testRunHoodieJavaAppOnMultiPartitionKeysCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_multi_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.MULTI_KEYS_PARTITIONED);
+ executeHiveCommand("drop table if exists " + hiveTableName);
}
@Test
@@ -75,6 +66,7 @@ public class ITTestHoodieSanity extends ITTestBase {
public void testRunHoodieJavaAppOnNonPartitionedCOWTable() throws Exception {
String hiveTableName = "docker_hoodie_non_partition_key_cow_test";
testRunHoodieJavaAppOnCOWTable(hiveTableName, PartitionType.NON_PARTITIONED);
+ executeHiveCommand("drop table if exists " + hiveTableName);
}
/**
@@ -89,109 +81,54 @@ public class ITTestHoodieSanity extends ITTestBase {
String hdfsUrl = "hdfs://namenode" + hdfsPath;
// Drop Table if it exists
- {
- String[] hiveDropCmd = getHiveConsoleCommand("drop table if exists " + hiveTableName);
- executeCommandInDocker(HIVESERVER, hiveDropCmd, true);
+ String hiveDropCmd = "drop table if exists " + hiveTableName;
+ try {
+ executeHiveCommand(hiveDropCmd);
+ } catch (AssertionError ex) {
+ // In travis, sometimes, the hivemetastore is not ready even though we wait for the port to be up
+ // Workaround to sleep for 5 secs and retry
+ Thread.sleep(5000);
+ executeHiveCommand(hiveDropCmd);
}
// Ensure table does not exist
- {
- String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
- TestExecStartResultCallback callback =
- executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
- String stderr = callback.getStderr().toString();
- String stdout = callback.getStdout().toString();
- LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") :" + stdout);
- LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") :" + stderr);
- Assert.assertTrue("Result :" + callback.getStdout().toString(), stdout.trim().isEmpty());
- }
+ Pair stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
+ Assert.assertTrue("Dropped table " + hiveTableName + " exists!", stdOutErr.getLeft().isEmpty());
// Run Hoodie Java App
- {
- String[] cmd = null;
- if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
- cmd = new String[]{
- HOODIE_JAVA_APP,
- "--hive-sync",
- "--table-path", hdfsUrl,
- "--hive-url", HIVE_SERVER_JDBC_URL,
- "--hive-table", hiveTableName
- };
- } else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
- cmd = new String[]{
- HOODIE_JAVA_APP,
- "--hive-sync",
- "--table-path", hdfsUrl,
- "--hive-url", HIVE_SERVER_JDBC_URL,
- "--use-multi-partition-keys",
- "--hive-table", hiveTableName
- };
- } else {
- cmd = new String[]{
- HOODIE_JAVA_APP,
- "--hive-sync",
- "--table-path", hdfsUrl,
- "--hive-url", HIVE_SERVER_JDBC_URL,
- "--non-partitioned",
- "--hive-table", hiveTableName
- };
- }
- TestExecStartResultCallback callback = executeCommandInDocker(ADHOC_1_CONTAINER,
- cmd, true);
- String stdout = callback.getStdout().toString().trim();
- String stderr = callback.getStderr().toString().trim();
- LOG.info("Got output for (" + Arrays.toString(cmd) + ") :" + stdout);
- LOG.info("Got error output for (" + Arrays.toString(cmd) + ") :" + stderr);
+ String cmd;
+ if (partitionType == PartitionType.SINGLE_KEY_PARTITIONED) {
+ cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ + " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName;
+ } else if (partitionType == PartitionType.MULTI_KEYS_PARTITIONED) {
+ cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ + " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
+ + " --use-multi-partition-keys";
+ } else {
+ cmd = HOODIE_JAVA_APP + " --hive-sync --table-path " + hdfsUrl
+ + " --hive-url " + HIVE_SERVER_JDBC_URL + " --hive-table " + hiveTableName
+ + " --non-partitioned";
}
+ executeCommandStringInDocker(ADHOC_1_CONTAINER, cmd, true);
// Ensure table does exist
- {
- String[] hiveTableCheck = getHiveConsoleCommand("show tables like '" + hiveTableName + "'");
- TestExecStartResultCallback callback =
- executeCommandInDocker(HIVESERVER, hiveTableCheck, true);
- String stderr = callback.getStderr().toString().trim();
- String stdout = callback.getStdout().toString().trim();
- LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
- LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
- Assert.assertEquals("Table exists", hiveTableName, stdout);
- }
+ stdOutErr = executeHiveCommand("show tables like '" + hiveTableName + "'");
+ Assert.assertEquals("Table exists", hiveTableName, stdOutErr.getLeft());
// Ensure row count is 100 (without duplicates)
- {
- String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
- TestExecStartResultCallback callback =
- executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
- String stderr = callback.getStderr().toString().trim();
- String stdout = callback.getStdout().toString().trim();
- LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
- LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
- Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
- Integer.parseInt(stdout.trim()));
- }
+ stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
+ Assert.assertEquals("Expecting 100 rows to be present in the new table", 100,
+ Integer.parseInt(stdOutErr.getLeft().trim()));
// Make the HDFS dataset non-hoodie and run the same query
// Checks for interoperability with non-hoodie tables
- {
- // Delete Hoodie directory to make it non-hoodie dataset
- String[] cmd = new String[]{
- "hadoop", "fs", "-rm", "-r", hdfsPath + "/.hoodie"
- };
- TestExecStartResultCallback callback =
- executeCommandInDocker(ADHOC_1_CONTAINER, cmd, true);
- String stderr = callback.getStderr().toString().trim();
- String stdout = callback.getStdout().toString().trim();
- LOG.info("Got output for (" + Arrays.toString(cmd) + ") : (" + stdout + ")");
- LOG.info("Got error output for (" + Arrays.toString(cmd) + ") : (" + stderr + ")");
- // Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
- String[] hiveTableCheck = getHiveConsoleCommand("select count(1) from " + hiveTableName);
- callback = executeCommandInDocker(ADHOC_1_CONTAINER, hiveTableCheck, true);
- stderr = callback.getStderr().toString().trim();
- stdout = callback.getStdout().toString().trim();
- LOG.info("Got output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stdout + ")");
- LOG.info("Got error output for (" + Arrays.toString(hiveTableCheck) + ") : (" + stderr + ")");
- Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
- Integer.parseInt(stdout.trim()));
- }
+ // Delete Hoodie directory to make it non-hoodie dataset
+ executeCommandStringInDocker(ADHOC_1_CONTAINER, "hdfs dfs -rm -r " + hdfsPath + "/.hoodie", true);
+
+ // Run the count query again. Without Hoodie, all versions are included. So we get a wrong count
+ stdOutErr = executeHiveCommand("select count(1) from " + hiveTableName);
+ Assert.assertEquals("Expecting 200 rows to be present in the new table", 200,
+ Integer.parseInt(stdOutErr.getLeft().trim()));
}
}
diff --git a/hudi-spark/pom.xml b/hudi-spark/pom.xml
index 2ca54201d..60686f65a 100644
--- a/hudi-spark/pom.xml
+++ b/hudi-spark/pom.xml
@@ -23,13 +23,10 @@
4.0.0
- org.apache.hudi
hudi-spark
jar
- 1.2.17
- 4.10
${project.basedir}/src/main/resources/META-INF
@@ -52,12 +49,11 @@
net.alchim31.maven
scala-maven-plugin
- 3.3.1
+ ${scala-maven-plugin.version}
org.apache.maven.plugins
maven-compiler-plugin
- 2.0.2
@@ -156,95 +152,14 @@
+
org.scala-lang
scala-library
${scala.version}
-
- org.scalatest
- scalatest_2.11
- 3.0.1
- test
-
-
- org.apache.spark
- spark-core_2.11
-
-
- org.apache.spark
- spark-sql_2.11
-
-
- com.databricks
- spark-avro_2.11
- 4.0.0
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.module
- jackson-module-scala_2.11
-
-
- org.apache.hadoop
- hadoop-client
-
-
- javax.servlet
- *
-
-
- provided
-
-
-
- org.apache.hadoop
- hadoop-common
- provided
-
-
-
- log4j
- log4j
- ${log4j.version}
-
-
- org.apache.avro
- avro
-
-
-
- org.apache.commons
- commons-configuration2
-
-
-
- ${hive.groupid}
- hive-service
- ${hive.version}
-
-
-
- ${hive.groupid}
- hive-jdbc
- ${hive.version}
-
-
-
- ${hive.groupid}
- hive-metastore
- ${hive.version}
-
-
-
- ${hive.groupid}
- hive-common
- ${hive.version}
-
+
org.apache.hudi
hudi-client
@@ -265,12 +180,93 @@
hudi-hive
${project.version}
+
+
- junit
- junit-dep
- ${junit.version}
- test
+ log4j
+ log4j
+
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.module
+ jackson-module-scala_2.11
+
+
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+
+
+ com.databricks
+ spark-avro_2.11
+ 4.0.0
+
+
+
+
+ org.apache.commons
+ commons-configuration2
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ javax.servlet
+ *
+
+
+ provided
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ ${hive.groupid}
+ hive-service
+ ${hive.version}
+
+
+ ${hive.groupid}
+ hive-jdbc
+ ${hive.version}
+
+
+ ${hive.groupid}
+ hive-metastore
+ ${hive.version}
+
+
+ ${hive.groupid}
+ hive-common
+ ${hive.version}
+
+
+
org.apache.hudi
hudi-client
@@ -287,5 +283,19 @@
test-jar
test
+
+
+ org.scalatest
+ scalatest_2.11
+ ${scalatest.version}
+ test
+
+
+
+ junit
+ junit-dep
+ ${junit-dep.version}
+ test
+
diff --git a/hudi-timeline-service/pom.xml b/hudi-timeline-service/pom.xml
index f15833880..353a40f6f 100644
--- a/hudi-timeline-service/pom.xml
+++ b/hudi-timeline-service/pom.xml
@@ -121,40 +121,57 @@
-
- io.javalin
- javalin
- 2.8.0
-
-
- org.apache.httpcomponents
- fluent-hc
- 4.3.2
-
+
org.apache.hudi
hudi-common
${project.version}
+
+
+
+ log4j
+ log4j
+
+
+
com.fasterxml.jackson.core
jackson-annotations
- 2.9.7
com.fasterxml.jackson.core
jackson-core
- 2.9.7
com.fasterxml.jackson.core
jackson-databind
- 2.9.7
+
+
+
+ org.apache.httpcomponents
+ fluent-hc
+
+
+
+ io.javalin
+ javalin
+ 2.8.0
+
+
+
+ com.beust
+ jcommander
+ 1.48
+
+
org.rocksdb
rocksdbjni
+
+
org.apache.hadoop
hadoop-hdfs
@@ -194,25 +211,6 @@
-
- org.apache.hudi
- hudi-common
- ${project.version}
- tests
- test-jar
- test
-
-
- com.beust
- jcommander
- 1.48
-
-
-
-
- log4j
- log4j
-
org.apache.hadoop
hadoop-client
@@ -223,20 +221,34 @@
+
+
+
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+ tests
+ test-jar
+ test
+
+
+
+ junit
+ junit
+ test
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
com.esotericsoftware
kryo
test
-
- org.mockito
- mockito-all
- 1.10.19
- test
-
-
- junit
- junit
-
+
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index a571345b2..612afdb63 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -76,6 +76,124 @@
+
+
+
+ org.eclipse.jetty.aggregate
+ jetty-all
+ 7.6.0.v20120127
+
+
+
+ org.eclipse.jetty
+ jetty-server
+ 7.6.0.v20120127
+
+
+
+
+ org.apache.hudi
+ hudi-client
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-hive
+ ${project.version}
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hudi
+ hudi-spark
+ ${project.version}
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ com.fasterxml.jackson.module
+ jackson-module-scala_2.11
+
+
+
+
+ org.apache.avro
+ avro-mapred
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+ org.apache.parquet
+ parquet-hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ javax.servlet
+ *
+
+
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+ javax.servlet
+ *
+
+
+
+
+ org.apache.spark
+ spark-streaming_2.11
+ ${spark.version}
+
+
+ org.apache.spark
+ spark-streaming-kafka-0-8_2.11
+ ${spark.version}
+
+
+
+
+ io.dropwizard.metrics
+ metrics-core
+
+
io.javalin
javalin
@@ -89,43 +207,83 @@
- io.dropwizard.metrics
+ com.yammer.metrics
metrics-core
+ 2.2.0
+
+
+
+
+ org.antlr
+ stringtemplate
+ 4.0.2
- com.fasterxml.jackson.module
- jackson-module-scala_2.11
+ com.beust
+ jcommander
- org.apache.hudi
- hudi-common
- ${project.version}
+ com.twitter
+ bijection-avro_2.11
+ 0.9.2
+
- org.apache.hudi
- hudi-common
- ${project.version}
- tests
- test-jar
- test
+ io.confluent
+ kafka-avro-serializer
+ 3.0.0
+
+
+ io.confluent
+ common-config
+ 3.0.0
+
+
+ io.confluent
+ common-utils
+ 3.0.0
+
+
+ io.confluent
+ kafka-schema-registry-client
+ 3.0.0
+
- org.apache.hudi
- hudi-hive
- ${project.version}
- tests
- test-jar
- test
+ commons-codec
+ commons-codec
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ commons-lang
+ commons-lang
+
+
+ commons-pool
+ commons-pool
+
- org.apache.hudi
- hudi-spark
- ${project.version}
+ org.apache.httpcomponents
+ httpcore
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
javax.servlet
@@ -133,13 +291,6 @@
-
-
- org.eclipse.jetty
- jetty-server
- 7.6.0.v20120127
-
-
org.apache.hadoop
hadoop-hdfs
@@ -165,6 +316,7 @@
+
${hive.groupid}
hive-jdbc
@@ -180,38 +332,13 @@
-
-
- ${hive.groupid}
- hive-exec
- ${hive.version}
- test
-
-
${hive.groupid}
hive-service
${hive.version}
-
- org.apache.hudi
- hudi-hive
- ${project.version}
-
-
- javax.servlet
- servlet-api
-
-
-
-
-
- org.apache.hudi
- hudi-client
- ${project.version}
-
-
+
org.apache.hudi
hudi-client
@@ -220,163 +347,35 @@
test-jar
test
-
- commons-codec
- commons-codec
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+ tests
+ test-jar
+ test
- commons-dbcp
- commons-dbcp
-
-
- commons-lang
- commons-lang
-
-
- commons-pool
- commons-pool
-
-
- org.apache.httpcomponents
- httpcore
+ org.apache.hudi
+ hudi-hive
+ ${project.version}
+ tests
+ test-jar
+ test
+
- log4j
- log4j
-
-
- org.slf4j
- slf4j-api
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
-
-
- javax.servlet
- servlet-api
-
-
-
-
-
- org.apache.hadoop
- hadoop-client
-
-
-
- org.apache.spark
- spark-core_2.11
-
-
- javax.servlet
- *
-
-
-
-
-
- org.apache.spark
- spark-sql_2.11
-
-
- javax.servlet
- *
-
-
-
-
-
- com.yammer.metrics
- metrics-core
- 2.2.0
-
-
-
- org.apache.spark
- spark-streaming_2.11
- ${spark.version}
- provided
-
-
-
- org.apache.spark
- spark-streaming-kafka-0-8_2.11
- ${spark.version}
-
-
-
-
- org.antlr
- stringtemplate
- 4.0.2
-
-
-
- com.beust
- jcommander
+ ${hive.groupid}
+ hive-exec
+ ${hive.version}
+ test
org.mockito
mockito-all
- 1.10.19
test
-
- org.apache.avro
- avro-mapred
- 1.7.7
-
-
-
- org.apache.parquet
- parquet-avro
-
-
-
- org.apache.parquet
- parquet-hadoop
-
-
-
- com.twitter
- bijection-avro_2.11
- 0.9.2
-
-
-
- io.confluent
- kafka-avro-serializer
- 3.0.0
-
-
-
- io.confluent
- common-config
- 3.0.0
-
-
-
- io.confluent
- common-utils
- 3.0.0
-
-
-
- io.confluent
- kafka-schema-registry-client
- 3.0.0
-
-
-
- org.eclipse.jetty.aggregate
- jetty-all
- test
-
-
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
index cff6a8bfd..434638875 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -166,7 +166,7 @@ public class HoodieDeltaStreamer implements Serializable {
public String propsFilePath =
"file://" + System.getProperty("user.dir") + "/src/test/resources/delta-streamer-config/dfs-source.properties";
- @Parameter(names = {"--hudi-conf"}, description = "Any configuration that can be set in the properties file "
+ @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+ "(using the CLI parameter \"--propsFilePath\") can also be passed command line using this parameter")
public List configs = new ArrayList<>();
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index 64a3bbff2..1b68296b7 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -27,45 +27,90 @@
hudi-hadoop-mr-bundle
+
org.apache.hudi
hudi-common
${project.version}
-
org.apache.hudi
hudi-hadoop-mr
${project.version}
-
+
org.apache.hudi
*
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+
+
+ com.twitter
+ parquet-avro
+
+
+ com.twitter
+ parquet-hadoop-bundle
+
+
+
+
+ commons-logging
+ commons-logging
+
+
+ commons-io
+ commons-io
+
+
+ commons-codec
+ commons-codec
+
+
+
+ com.twitter.common
+ objectsize
+ 0.0.12
+
+
+
org.apache.hadoop
hadoop-common
-
org.apache.hadoop
hadoop-mapreduce-client-core
-
org.apache.hadoop
hadoop-mapreduce-client-common
-
org.apache.hadoop
hadoop-auth
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
${hive.groupid}
hive-jdbc
@@ -92,7 +137,7 @@
hive-shims
${hive.version}
-
+
${hive.groupid}
hive-serde
${hive.version}
@@ -109,49 +154,9 @@
- org.apache.hadoop
- hadoop-hdfs
-
-
-
- commons-logging
- commons-logging
-
-
-
- commons-io
- commons-io
-
-
-
- commons-codec
- commons-codec
-
-
-
- org.apache.parquet
- parquet-avro
-
-
-
- com.twitter
- parquet-avro
-
-
-
- com.twitter
- parquet-hadoop-bundle
-
-
-
- com.twitter.common
- objectsize
- 0.0.12
-
-
-
- org.apache.avro
- avro
+ com.esotericsoftware
+ kryo
+ test
@@ -159,6 +164,7 @@
junit
test
+
diff --git a/packaging/hudi-hive-bundle/pom.xml b/packaging/hudi-hive-bundle/pom.xml
index 4565b39bc..29594f4fe 100644
--- a/packaging/hudi-hive-bundle/pom.xml
+++ b/packaging/hudi-hive-bundle/pom.xml
@@ -28,71 +28,28 @@
jar
+
- org.apache.hadoop
- hadoop-common
+ org.apache.hudi
+ hudi-common
+ ${project.version}
- org.apache.hadoop
- hadoop-client
+ org.apache.hudi
+ hudi-hadoop-mr-bundle
+ ${project.version}
- org.apache.hadoop
- hadoop-hdfs
-
-
- org.apache.hadoop
- hadoop-auth
-
-
- ${hive.groupid}
- hive-service
- ${hive.version}
-
-
- ${hive.groupid}
- hive-jdbc
- ${hive.version}
-
-
- ${hive.groupid}
- hive-metastore
- ${hive.version}
-
-
- ${hive.groupid}
- hive-common
- ${hive.version}
-
-
- com.google.guava
- guava
-
-
- org.apache.thrift
- libthrift
- ${thrift.version}
-
-
- org.apache.thrift
- libfb303
- 0.9.3
-
-
-
- joda-time
- joda-time
-
-
-
-
- commons-dbcp
- commons-dbcp
-
-
-
- commons-io
- commons-io
+ org.apache.hudi
+ hudi-hive
+ ${project.version}
+
+
+
+ org.apache.hudi
+ *
+
+
@@ -105,43 +62,93 @@
slf4j-log4j12
-
- com.beust
- jcommander
-
-
-
- org.apache.httpcomponents
- httpcore
-
-
-
- org.apache.httpcomponents
- httpclient
-
-
+
com.twitter
parquet-avro
+
- org.apache.hudi
- hudi-hadoop-mr-bundle
- ${project.version}
+ org.apache.thrift
+ libthrift
+ ${thrift.version}
-
- org.apache.hudi
- hudi-hive
- ${project.version}
-
-
-
- org.apache.hudi
- *
-
-
+ org.apache.thrift
+ libfb303
+ 0.9.3
+
+
+
+ com.google.guava
+ guava
+
+
+
+ joda-time
+ joda-time
+
+
+
+ com.beust
+ jcommander
+
+
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ commons-io
+ commons-io
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
+ org.apache.hadoop
+ hadoop-auth
+
+
+
+
+ ${hive.groupid}
+ hive-service
+
+
+ ${hive.groupid}
+ hive-jdbc
+
+
+ ${hive.groupid}
+ hive-metastore
+
+
+ ${hive.groupid}
+ hive-common
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index b9b3a38fa..54b966d32 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -28,46 +28,16 @@
jar
+
- org.apache.hadoop
- hadoop-common
+ org.apache.hudi
+ hudi-common
+ ${project.version}
- org.apache.hadoop
- hadoop-client
-
-
- org.apache.hadoop
- hadoop-hdfs
-
-
- org.apache.hadoop
- hadoop-auth
-
-
- com.google.guava
- guava
-
-
- org.apache.thrift
- libthrift
- ${thrift.version}
-
-
-
- joda-time
- joda-time
-
-
-
-
- commons-dbcp
- commons-dbcp
-
-
-
- commons-io
- commons-io
+ org.apache.hudi
+ hudi-hadoop-mr-bundle
+ ${project.version}
@@ -80,30 +50,70 @@
slf4j-log4j12
-
- com.beust
- jcommander
-
-
-
- org.apache.httpcomponents
- httpcore
-
-
-
- org.apache.httpcomponents
- httpclient
-
-
+
com.twitter
parquet-avro
+
- org.apache.hudi
- hudi-hadoop-mr-bundle
- ${project.version}
+ org.apache.thrift
+ libthrift
+ ${thrift.version}
+
+
+
+ joda-time
+ joda-time
+
+
+
+ com.google.guava
+ guava
+
+
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ commons-io
+ commons-io
+
+
+
+ com.beust
+ jcommander
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+ org.apache.httpcomponents
+ httpclient
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
+ org.apache.hadoop
+ hadoop-auth
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index 304cea40d..f46b94b17 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -127,6 +127,8 @@
parquet.schema
org.apache.hudi.parquet.schema
+
+
com.esotericsoftware.kryo.
org.apache.hudi.com.esotericsoftware.kryo.
@@ -177,9 +184,8 @@
org.apache.derby:derby
org.apache.hadoop:*
org.apache.hbase:*
-
+
org.apache.hive:hive-exec
- org.apache.hive:hive-serde
org.apache.hive:hive-shims
org.apache.spark:*
@@ -206,102 +212,18 @@
-
- com.beust
- jcommander
-
-
- commons-dbcp
- commons-dbcp
-
-
- org.apache.avro
- avro
-
-
- commons-codec
- commons-codec
-
+
org.scala-lang
scala-library
${scala.version}
+
+
- org.scalatest
- scalatest_2.11
- 3.0.1
- test
-
-
- org.apache.spark
- spark-core_2.11
-
-
- org.apache.spark
- spark-sql_2.11
-
-
- com.databricks
- spark-avro_2.11
- 4.0.0
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- org.apache.hadoop
- hadoop-client
-
-
- javax.servlet
- *
-
-
- provided
-
-
- org.apache.hadoop
- hadoop-common
- provided
-
-
- log4j
- log4j
- ${log4j.version}
-
-
- org.apache.avro
- avro
-
-
- ${hive.groupid}
- hive-service
- ${hive.version}
- compile
-
-
- ${hive.groupid}
- hive-jdbc
- ${hive.version}
- compile
-
-
- ${hive.groupid}
- hive-metastore
- ${hive.version}
- compile
-
-
- ${hive.groupid}
- hive-common
- ${hive.version}
- compile
-
-
- org.apache.commons
- commons-configuration2
+ org.apache.hudi
+ hudi-client
+ ${project.version}
org.apache.hudi
@@ -318,16 +240,141 @@
hudi-hive
${project.version}
-
- org.apache.hudi
- hudi-client
- ${project.version}
-
org.apache.hudi
hudi-spark
${project.version}
-
+
+
+
+
+ log4j
+ log4j
+ ${log4j.version}
+
+
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+
+
+ org.apache.avro
+ avro
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+
+
+ com.databricks
+ spark-avro_2.11
+ 4.0.0
+
+
+
+ com.beust
+ jcommander
+
+
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ org.apache.commons
+ commons-configuration2
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ javax.servlet
+ *
+
+
+ provided
+
+
+ org.apache.hadoop
+ hadoop-common
+ provided
+
+
+
+
+ ${hive.groupid}
+ hive-jdbc
+ ${hive.version}
+ standalone
+
+
+ org.slf4j
+ slf4j-api
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
+ ${hive.groupid}
+ hive-service
+ ${hive.version}
+ compile
+
+
+ ${hive.groupid}
+ hive-jdbc
+ ${hive.version}
+ compile
+
+
+ ${hive.groupid}
+ hive-serde
+ ${hive.version}
+ compile
+
+
+ ${hive.groupid}
+ hive-metastore
+ ${hive.version}
+ compile
+
+
+ ${hive.groupid}
+ hive-common
+ ${hive.version}
+ compile
+
+
+
+
+ org.scalatest
+ scalatest_2.11
+ ${scalatest.version}
+ test
+
diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml
index c8dc51890..530b12fba 100644
--- a/packaging/hudi-utilities-bundle/pom.xml
+++ b/packaging/hudi-utilities-bundle/pom.xml
@@ -28,8 +28,6 @@
jar
- 1.2.17
- 4.10
true
${project.basedir}/src/main/resources/META-INF
HUDI_NOTICE.txt
@@ -205,6 +203,98 @@
+
+
+ org.apache.hudi
+ hudi-client
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-common
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-hive
+ ${project.version}
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+ org.apache.hudi
+ hudi-spark
+ ${project.version}
+
+
+ org.apache.hudi
+ hudi-utilities
+ ${project.version}
+
+
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ com.fasterxml.jackson.module
+ jackson-module-scala_2.11
+
+
+
+
+ org.apache.avro
+ avro-mapred
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+
+
+ org.apache.parquet
+ parquet-hadoop
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+
+
+ org.apache.spark
+ spark-sql_2.11
+
+
+ org.apache.spark
+ spark-streaming_2.11
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-streaming-kafka-0-8_2.11
+ ${spark.version}
+
+
+
+
+ io.dropwizard.metrics
+ metrics-core
+
+
io.javalin
javalin
@@ -212,21 +302,114 @@
- io.dropwizard.metrics
+ com.yammer.metrics
metrics-core
+ 2.2.0
+
+
+
+
+ org.antlr
+ stringtemplate
+ 4.0.2
- com.fasterxml.jackson.module
- jackson-module-scala_2.11
+ com.beust
+ jcommander
+
+ com.twitter
+ bijection-avro_2.11
+ 0.9.2
+
+
+
+
+ io.confluent
+ kafka-avro-serializer
+ 3.0.0
+
+
+ io.confluent
+ common-config
+ 3.0.0
+
+
+ io.confluent
+ common-utils
+ 3.0.0
+
+
+ io.confluent
+ kafka-schema-registry-client
+ 3.0.0
+
+
+
+
+ commons-codec
+ commons-codec
+
+
+ commons-dbcp
+ commons-dbcp
+
+
+ commons-pool
+ commons-pool
+
+
+
+
+ org.apache.httpcomponents
+ httpcore
+
+
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
+
+ ${hive.groupid}
+ hive-jdbc
+ ${hive.version}
+ standalone
+
+
+ org.slf4j
+ slf4j-api
+
+
+ javax.servlet
+ servlet-api
+
+
+
+
+
org.apache.hudi
- hudi-common
+ hudi-client
${project.version}
+ tests
+ test-jar
+ test
-
org.apache.hudi
hudi-common
@@ -235,7 +418,6 @@
test-jar
test
-
org.apache.hudi
hudi-hive
@@ -245,17 +427,7 @@
test
-
- org.apache.hudi
- hudi-spark
- ${project.version}
-
-
-
- org.apache.hadoop
- hadoop-hdfs
- tests
-
+
org.apache.hadoop
hadoop-common
@@ -275,7 +447,13 @@
+
+ org.apache.hadoop
+ hadoop-hdfs
+ tests
+
+
${hive.groupid}
hive-exec
@@ -283,191 +461,11 @@
test
-
- ${hive.groupid}
- hive-jdbc
- ${hive.version}
- standalone
-
-
- org.slf4j
- slf4j-api
-
-
- javax.servlet
- servlet-api
-
-
-
-
-
- org.apache.hudi
- hudi-hive
- ${project.version}
-
-
- javax.servlet
- servlet-api
-
-
-
-
-
- org.apache.hudi
- hudi-client
- ${project.version}
-
-
-
- org.apache.hudi
- hudi-utilities
- ${project.version}
-
-
-
- org.apache.hudi
- hudi-client
- ${project.version}
- tests
- test-jar
- test
-
-
-
- commons-codec
- commons-codec
-
-
- commons-dbcp
- commons-dbcp
-
-
- commons-pool
- commons-pool
-
-
-
- org.apache.httpcomponents
- httpcore
-
-
-
- log4j
- log4j
-
-
- org.slf4j
- slf4j-api
-
-
-
- org.apache.hadoop
- hadoop-mapreduce-client-common
-
-
- javax.servlet
- servlet-api
-
-
-
-
-
- org.apache.hadoop
- hadoop-client
-
-
-
- org.apache.spark
- spark-core_2.11
-
-
-
- org.apache.spark
- spark-sql_2.11
-
-
-
- com.yammer.metrics
- metrics-core
- 2.2.0
-
-
-
- org.apache.spark
- spark-streaming_2.11
- ${spark.version}
- provided
-
-
-
- org.apache.spark
- spark-streaming-kafka-0-8_2.11
- ${spark.version}
-
-
-
-
- org.antlr
- stringtemplate
- 4.0.2
-
-
-
- com.beust
- jcommander
-
-
org.mockito
mockito-all
- 1.10.19
test
-
- org.apache.avro
- avro-mapred
- 1.7.7
-
-
-
- org.apache.parquet
- parquet-avro
-
-
-
- org.apache.parquet
- parquet-hadoop
-
-
-
- com.twitter
- bijection-avro_2.11
- 0.9.2
-
-
-
- io.confluent
- kafka-avro-serializer
- 3.0.0
-
-
-
- io.confluent
- common-config
- 3.0.0
-
-
-
- io.confluent
- common-utils
- 3.0.0
-
-
-
- io.confluent
- kafka-schema-registry-client
- 3.0.0
-
diff --git a/pom.xml b/pom.xml
index 5671d11b8..45b10b600 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,11 +24,9 @@
hudi
pom
0.5.0-SNAPSHOT
- Hoodie is a Apache Spark library that provides the ability to efficiently do
- incremental processing on datasets in HDFS
-
- https://github.com/uber/hudi
- Hoodie
+ Apache Hudi brings stream style processing on big data
+ https://github.com/apache/incubator-hudi
+ Hudi
hudi-common
@@ -58,15 +56,15 @@
- Uber Technologies Inc.
- http://www.uber.com/
+ The Apache Software Foundation
+ https://www.apache.org
vinothchandar
Vinoth Chandar
- Uber
+ Confluent Inc
prasannarajaperumal
@@ -129,10 +127,13 @@
2.19.1
3.8.1
2.6.7
+ 2.17
1.8.1
4.11
- 1.9.5
+ 4.10
+ 1.10.19
1.2.17
+ 1.7.5
2.9.9
2.7.3
org.apache.hive
@@ -142,12 +143,17 @@
1.7.7
2.11.8
2.11
+ 3.3.1
+ 3.0.1
file://${project.basedir}/src/test/resources/log4j-surefire.properties
0.12.0
1.2.3
1.9.13
${project.basedir}
NOTICE.txt
+ false
+ ${skipTests}
+ ${skipTests}
@@ -232,32 +238,32 @@
- org.scalastyle
- scalastyle-maven-plugin
- 1.0.0
-
- false
- true
- true
- false
- ${project.basedir}/src/main/scala
- ${project.basedir}/src/test/scala
- style/scalastyle-config.xml
- UTF-8
-
-
-
- compile
-
- check
-
-
-
+ org.scalastyle
+ scalastyle-maven-plugin
+ 1.0.0
+
+ false
+ true
+ true
+ false
+ ${project.basedir}/src/main/scala
+ ${project.basedir}/src/test/scala
+ style/scalastyle-config.xml
+ UTF-8
+
+
+
+ compile
+
+ check
+
+
+
org.apache.maven.plugins
maven-compiler-plugin
- 3.8.1
+ ${maven-compiler-plugin.version}
1.8
1.8
@@ -292,6 +298,7 @@
maven-surefire-plugin
${maven-surefire-plugin.version}
+ ${skipUTs}
${surefireArgLine}
@@ -452,18 +459,152 @@
+
- com.google.code.gson
- gson
- 2.3.1
- test
+ log4j
+ log4j
+ ${log4j.version}
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+ org.slf4j
+ slf4j-log4j12
+ ${slf4j.version}
+
- junit
- junit
- ${junit.version}
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${fasterxml.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ ${fasterxml.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${fasterxml.version}.1
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-guava
+ ${fasterxml.version}
+
+
+ com.fasterxml.jackson.module
+ jackson-module-scala_2.11
+ ${fasterxml.version}
+
+
+
+
+ org.glassfish.jersey.core
+ jersey-server
+ ${glassfish.version}
+
+
+ org.glassfish.jersey.connectors
+ jersey-apache-connector
+ ${glassfish.version}
+
+
+ org.glassfish.jersey.containers
+ jersey-container-servlet-core
+ ${glassfish.version}
+
+
+
+ org.eclipse.jetty.aggregate
+ jetty-all
test
+ 7.6.0.v20120127
+
+
+
+
+ org.apache.avro
+ avro
+ ${avro.version}
+
+
+ org.apache.avro
+ avro-mapred
+ ${avro.version}
+
+
+
+
+ org.apache.parquet
+ parquet-avro
+ ${parquet.version}
+
+
+ org.apache.parquet
+ parquet-hadoop
+ ${parquet.version}
+
+
+ org.apache.parquet
+ parquet-hive-bundle
+ ${parquet.version}
+
+
+
+
+
+ com.twitter
+ parquet-hadoop-bundle
+ 1.6.0
+
+
+ com.twitter
+ parquet-hive-bundle
+ 1.6.0
+
+
+ com.twitter
+ parquet-avro
+ 1.6.0
+
+
+
+
+ org.apache.spark
+ spark-core_2.11
+ ${spark.version}
+ provided
+
+
+ org.apache.spark
+ spark-sql_2.11
+ ${spark.version}
+ provided
+
+
+
+
+ com.databricks
+ spark-avro_2.11
+ 4.0.0
+
+
+
+
+ io.dropwizard.metrics
+ metrics-graphite
+ ${metrics.version}
+
+
+ io.dropwizard.metrics
+ metrics-core
+ ${metrics.version}
@@ -473,28 +614,119 @@
- log4j
- log4j
- ${log4j.version}
-
-
-
-
joda-time
joda-time
${joda.version}
+
+
+ com.google.guava
+ guava
+ 15.0
+
+
+
+ xerces
+ xercesImpl
+ 2.9.1
+
+
+
+ xalan
+ xalan
+ 2.7.1
+
+
+
+ org.rocksdb
+ rocksdbjni
+ 5.17.2
+
+
+
+
+ commons-codec
+ commons-codec
+ 1.4
+
+
+ commons-io
+ commons-io
+ 2.6
+
+
+ commons-lang
+ commons-lang
+ 2.6
+
+
+ commons-logging
+ commons-logging
+ 1.2
+
+
+ commons-dbcp
+ commons-dbcp
+ 1.4
+
+
+ commons-pool
+ commons-pool
+ 1.4
+
+
+ org.apache.commons
+ commons-configuration2
+ 2.1.1
+
+
+
+
+ org.apache.httpcomponents
+ fluent-hc
+ 4.3.2
+
+
+ org.apache.httpcomponents
+ httpcore
+ 4.3.2
+
+
+ org.apache.httpcomponents
+ httpclient
+ 4.3.2
+
+
+
+
+ org.codehaus.jackson
+ jackson-core-asl
+ ${codehaus-jackson.version}
+
+
+ org.codehaus.jackson
+ jackson-mapper-asl
+ ${codehaus-jackson.version}
+
+
+ org.codehaus.jackson
+ jackson-jaxrs
+ ${codehaus-jackson.version}
+
+
+ org.codehaus.jackson
+ jackson-xc
+ ${codehaus-jackson.version}
+
+
+
org.apache.hadoop
hadoop-client
${hadoop.version}
provided
-
- com.fasterxml.jackson.*
- *
-
javax.servlet
servlet-api
@@ -505,48 +737,11 @@
-
-
- org.apache.parquet
- parquet-avro
- ${parquet.version}
-
-
-
- org.apache.parquet
- parquet-hadoop
- ${parquet.version}
-
-
-
- org.apache.avro
- avro-mapred
- ${avro.version}
-
-
-
-
- com.google.guava
- guava
- 15.0
-
-
-
org.apache.hadoop
hadoop-common
${hadoop.version}
provided
-
-
- jdk.tools
- jdk.tools
-
-
- javax.xml.bind
- jaxb-api
-
-
org.apache.hadoop
@@ -559,12 +754,6 @@
hadoop-auth
${hadoop.version}
provided
-
-
- com.fasterxml.jackson.*
- *
-
-
org.apache.hadoop
@@ -591,214 +780,36 @@
- org.rocksdb
- rocksdbjni
- 5.17.2
+ org.apache.hadoop
+ hadoop-hdfs
+ tests
+ ${hadoop.version}
- commons-codec
- commons-codec
- 1.4
-
-
- commons-lang
- commons-lang
- 2.6
-
-
- commons-logging
- commons-logging
- 1.2
-
-
- commons-io
- commons-io
- 2.6
-
-
-
-
- com.twitter
- parquet-hadoop-bundle
- 1.6.0
-
-
- com.twitter
- parquet-hive-bundle
- 1.6.0
-
-
- com.twitter
- parquet-avro
- 1.6.0
-
-
-
- org.apache.parquet
- parquet-hive-bundle
- ${parquet.version}
-
-
-
- org.apache.spark
- spark-core_2.11
- ${spark.version}
- provided
+ org.apache.hadoop
+ hadoop-common
+ tests
+ ${hadoop.version}
- com.fasterxml.jackson.**
- *
+ jdk.tools
+ jdk.tools
- javax.servlet
- servlet-api
-
-
-
-
- org.apache.spark
- spark-sql_2.11
- ${spark.version}
- provided
-
-
- com.fasterxml.jackson.**
- *
+ javax.xml.bind
+ jaxb-api
+
org.apache.hbase
hbase-client
${hbase.version}
-
- org.apache.avro
- avro
- ${avro.version}
-
-
- org.slf4j
- slf4j-api
-
-
-
-
-
-
- io.dropwizard.metrics
- metrics-graphite
- ${metrics.version}
-
-
- io.dropwizard.metrics
- metrics-core
- ${metrics.version}
-
-
-
- xerces
- xercesImpl
- 2.9.1
-
-
- xalan
- xalan
- 2.7.1
-
-
-
- commons-dbcp
- commons-dbcp
- 1.4
-
-
- commons-pool
- commons-pool
- 1.4
-
-
- org.apache.httpcomponents
- fluent-hc
- 4.3.2
-
-
- org.apache.httpcomponents
- httpcore
- 4.3.2
-
-
- org.apache.httpcomponents
- httpclient
- 4.3.6
-
-
- org.slf4j
- slf4j-api
- 1.7.5
-
-
- org.slf4j
- slf4j-log4j12
- 1.7.5
-
-
-
- org.apache.commons
- commons-configuration2
- 2.1.1
-
-
-
- com.fasterxml.jackson.core
- jackson-annotations
- ${fasterxml.version}
-
-
-
- com.fasterxml.jackson.core
- jackson-core
- ${fasterxml.version}
-
-
-
- com.fasterxml.jackson.core
- jackson-databind
- ${fasterxml.version}.1
-
-
-
- com.fasterxml.jackson.module
- jackson-module-scala_2.11
- ${fasterxml.version}
-
-
-
- org.codehaus.jackson
- jackson-core-asl
- ${codehaus-jackson.version}
-
-
-
- org.codehaus.jackson
- jackson-mapper-asl
- ${codehaus-jackson.version}
-
-
-
- org.codehaus.jackson
- jackson-jaxrs
- ${codehaus-jackson.version}
-
-
-
- org.codehaus.jackson
- jackson-xc
- ${codehaus-jackson.version}
-
-
+
${hive.groupid}
hive-service
@@ -919,34 +930,28 @@
+
- org.apache.hadoop
- hadoop-hdfs
- tests
- ${hadoop.version}
+ com.google.code.gson
+ gson
+ 2.3.1
+ test
+
- org.apache.hadoop
- hadoop-common
- tests
- ${hadoop.version}
-
-
- jdk.tools
- jdk.tools
-
-
- javax.xml.bind
- jaxb-api
-
-
+ junit
+ junit
+ ${junit.version}
+ test
-
+
+
org.mockito
mockito-all
test
- 1.10.19
+ ${mockito.version}
+
com.esotericsoftware
@@ -954,24 +959,29 @@
4.0.0
test
-
-
- org.eclipse.jetty.aggregate
- jetty-all
- test
- 7.6.0.v20120127
-
-
- Maven repository
- https://central.maven.org/maven2/
+ Maven Central
+ Maven Repository
+ https://repo.maven.apache.org/maven2
+
+ true
+
+
+ false
+
cloudera-repo-releases
https://repository.cloudera.com/artifactory/public/
+
+ true
+
+
+ false
+
diff --git a/tools/run_travis_tests.sh b/tools/run_travis_tests.sh
new file mode 100755
index 000000000..51a34e4b9
--- /dev/null
+++ b/tools/run_travis_tests.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+mode=$1
+
+if [ "$mode" = "unit" ];
+then
+ echo "Running Unit Tests"
+ mvn test -DskipITs=true -B
+elif [ "$mode" = "integration" ];
+then
+ echo "Running Integration Tests"
+ mvn verify -DskipUTs=true -B
+else
+ echo "Unknown mode $mode"
+ exit 1;
+fi
+