Reworking the deltastreamer tool
- Standardize version of jackson - DFSPropertiesConfiguration replaces usage of commons PropertiesConfiguration - Remove dependency on ConstructorUtils - Throw error if ordering value is not present, during key generation - Switch to shade plugin for hoodie-utilities - Added support for consumption for Confluent avro kafka serdes - Support for Confluent schema registry - KafkaSource now deals with skews nicely, by doing round robin allocation of source limit across partitions - Added support for BULK_INSERT operations as well - Pass in the payload class config properly into HoodieWriteClient - Fix documentation based on new usage - Adding tests on deltastreamer, sources and all new util classes.
This commit is contained in:
committed by
vinoth chandar
parent
fb95dbdedb
commit
d58ddbd999
@@ -42,27 +42,43 @@
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.4.1</version>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>src/assembly/src.xml</descriptor>
|
||||
</descriptors>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
|
||||
</configuration>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.1.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>make-assembly</id>
|
||||
<!-- bind to the packaging phase -->
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
|
||||
</dependencyReducedPomLocation>
|
||||
<minimizeJar>true</minimizeJar>
|
||||
<artifactSet>
|
||||
<includes>
|
||||
<include>com.uber.hoodie:hoodie-common</include>
|
||||
<include>com.uber.hoodie:hoodie-client</include>
|
||||
<include>com.uber.hoodie:hoodie-spark</include>
|
||||
<include>com.uber.hoodie:hoodie-hive</include>
|
||||
<include>com.uber.hoodie:hoodie-hadoop-mr</include>
|
||||
<include>com.beust:jcommander</include>
|
||||
<include>com.twitter:bijection-avro_2.11</include>
|
||||
<include>com.twitter:bijection-core_2.11</include>
|
||||
<include>org.apache.parquet:parquet-avro</include>
|
||||
<include>com.twitter:parquet-avro</include>
|
||||
<include>com.twitter.common:objectsize</include>
|
||||
<include>io.confluent:kafka-avro-serializer</include>
|
||||
<include>io.confluent:common-config</include>
|
||||
<include>io.confluent:common-utils</include>
|
||||
<include>io.confluent:kafka-schema-registry-client</include>
|
||||
<include>org.apache.spark:spark-streaming-kafka-0-8_2.11</include>
|
||||
<include>org.apache.kafka:kafka_2.11</include>
|
||||
<include>com.yammer.metrics:metrics-core</include>
|
||||
<include>com.101tec:zkclient</include>
|
||||
<include>org.apache.kafka:kafka-clients</include>
|
||||
</includes>
|
||||
</artifactSet>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
@@ -78,10 +94,18 @@
|
||||
</resources>
|
||||
</build>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>confluent</id>
|
||||
<url>http://packages.confluent.io/maven/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
<groupId>com.fasterxml.jackson.module</groupId>
|
||||
<artifactId>jackson-module-scala_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -108,21 +132,6 @@
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<classifier>tests</classifier>
|
||||
<!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet.jsp</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
@@ -184,6 +193,10 @@
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>com.fasterxml.jackson.*</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
@@ -219,23 +232,16 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.11</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-sql_2.11</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@@ -291,6 +297,30 @@
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-avro-serializer</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-config</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>common-utils</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.confluent</groupId>
|
||||
<artifactId>kafka-schema-registry-client</artifactId>
|
||||
<version>3.0.0</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
Reference in New Issue
Block a user