From adc8cac74378c16f2508adbf16a6a51d241a3e35 Mon Sep 17 00:00:00 2001 From: Balaji Varadarajan Date: Fri, 1 Mar 2019 11:17:53 -0800 Subject: [PATCH] Fix hive sync (libfb version mismatch) and deltastreamer issue (missing cmdline argument) in demo --- docs/quickstart.md | 9 +++++---- hoodie-hive/pom.xml | 2 +- packaging/hoodie-hive-bundle/pom.xml | 7 ++++++- packaging/hoodie-presto-bundle/pom.xml | 2 +- pom.xml | 1 + 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index 70848d099..882e660de 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -450,13 +450,14 @@ automatically initializes the datasets in the file-system if they do not exist y docker exec -it adhoc-2 /bin/bash # Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow dataset in HDFS -spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --props /var/demo/config/kafka-source.properties +spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties .... .... 2018-09-24 22:20:00 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:54 - OutputCommitCoordinator stopped! 2018-09-24 22:20:00 INFO SparkContext:54 - Successfully stopped SparkContext + # Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor dataset in HDFS -spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --props /var/demo/config/kafka-source.properties +spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties .... 2018-09-24 22:22:01 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:54 - OutputCommitCoordinator stopped! 2018-09-24 22:22:01 INFO SparkContext:54 - Successfully stopped SparkContext @@ -724,10 +725,10 @@ cat docker/demo/data/batch_2.json | kafkacat -b kafkabroker -t stock_ticks -P docker exec -it adhoc-2 /bin/bash # Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_cow dataset in HDFS -spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --props /var/demo/config/kafka-source.properties +spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type COPY_ON_WRITE --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_cow --target-table stock_ticks_cow --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties # Run the following spark-submit command to execute the delta-streamer and ingest to stock_ticks_mor dataset in HDFS -spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --props /var/demo/config/kafka-source.properties +spark-submit --class com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer $HUDI_UTILITIES_BUNDLE --storage-type MERGE_ON_READ --source-class com.uber.hoodie.utilities.sources.JsonKafkaSource --source-ordering-field ts --target-base-path /user/hive/warehouse/stock_ticks_mor --target-table stock_ticks_mor --schemaprovider-class com.uber.hoodie.utilities.schema.FilebasedSchemaProvider --props /var/demo/config/kafka-source.properties exit ``` diff --git a/hoodie-hive/pom.xml b/hoodie-hive/pom.xml index ff7487aa8..a856a5e65 100644 --- a/hoodie-hive/pom.xml +++ b/hoodie-hive/pom.xml @@ -50,7 +50,7 @@ org.apache.thrift libthrift - 0.12.0 + ${thrift.version} diff --git a/packaging/hoodie-hive-bundle/pom.xml b/packaging/hoodie-hive-bundle/pom.xml index 52021c309..61462360f 100644 --- a/packaging/hoodie-hive-bundle/pom.xml +++ b/packaging/hoodie-hive-bundle/pom.xml @@ -71,7 +71,12 @@ org.apache.thrift libthrift - 0.9.2 + ${thrift.version} + + + org.apache.thrift + libfb303 + 0.9.3 diff --git a/packaging/hoodie-presto-bundle/pom.xml b/packaging/hoodie-presto-bundle/pom.xml index c4a2bdb99..750fba9a8 100644 --- a/packaging/hoodie-presto-bundle/pom.xml +++ b/packaging/hoodie-presto-bundle/pom.xml @@ -51,7 +51,7 @@ org.apache.thrift libthrift - 0.9.2 + ${thrift.version} diff --git a/pom.xml b/pom.xml index b9e9bb876..d3799207d 100644 --- a/pom.xml +++ b/pom.xml @@ -138,6 +138,7 @@ 2.11.8 2.11 file://${project.basedir}/src/test/resources/log4j-surefire.properties + 0.12.0