1
0

Reworking the deltastreamer tool

- Standardize version of jackson
 - DFSPropertiesConfiguration replaces usage of commons PropertiesConfiguration
 - Remove dependency on ConstructorUtils
 - Throw error if ordering value is not present, during key generation
 - Switch to shade plugin for hoodie-utilities
 - Added support for consumption for Confluent avro kafka serdes
 - Support for Confluent schema registry
 - KafkaSource now deals with skews nicely, by doing round robin allocation of source limit across partitions
 - Added support for BULK_INSERT operations as well
 - Pass in the payload class config properly into HoodieWriteClient
 - Fix documentation based on new usage
 - Adding tests on deltastreamer, sources and all new util classes.
This commit is contained in:
Vinoth Chandar
2018-08-04 03:35:30 -07:00
committed by vinoth chandar
parent fb95dbdedb
commit d58ddbd999
49 changed files with 1919 additions and 754 deletions

View File

@@ -42,27 +42,43 @@
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4.1</version>
<configuration>
<descriptors>
<descriptor>src/assembly/src.xml</descriptor>
</descriptors>
<archive>
<manifest>
<mainClass>com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer</mainClass>
</manifest>
</archive>
</configuration>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.1</version>
<executions>
<execution>
<id>make-assembly</id>
<!-- bind to the packaging phase -->
<phase>package</phase>
<goals>
<goal>single</goal>
<goal>shade</goal>
</goals>
<configuration>
<dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
<minimizeJar>true</minimizeJar>
<artifactSet>
<includes>
<include>com.uber.hoodie:hoodie-common</include>
<include>com.uber.hoodie:hoodie-client</include>
<include>com.uber.hoodie:hoodie-spark</include>
<include>com.uber.hoodie:hoodie-hive</include>
<include>com.uber.hoodie:hoodie-hadoop-mr</include>
<include>com.beust:jcommander</include>
<include>com.twitter:bijection-avro_2.11</include>
<include>com.twitter:bijection-core_2.11</include>
<include>org.apache.parquet:parquet-avro</include>
<include>com.twitter:parquet-avro</include>
<include>com.twitter.common:objectsize</include>
<include>io.confluent:kafka-avro-serializer</include>
<include>io.confluent:common-config</include>
<include>io.confluent:common-utils</include>
<include>io.confluent:kafka-schema-registry-client</include>
<include>org.apache.spark:spark-streaming-kafka-0-8_2.11</include>
<include>org.apache.kafka:kafka_2.11</include>
<include>com.yammer.metrics:metrics-core</include>
<include>com.101tec:zkclient</include>
<include>org.apache.kafka:kafka-clients</include>
</includes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
@@ -78,10 +94,18 @@
</resources>
</build>
<repositories>
<repository>
<id>confluent</id>
<url>http://packages.confluent.io/maven/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.11</artifactId>
</dependency>
<dependency>
@@ -108,21 +132,6 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<classifier>tests</classifier>
<!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
<exclusions>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
@@ -184,6 +193,10 @@
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
@@ -219,23 +232,16 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
</dependency>
<dependency>
@@ -291,6 +297,30 @@
<version>0.9.2</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-avro-serializer</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
<artifactId>common-config</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
<artifactId>common-utils</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>io.confluent</groupId>
<artifactId>kafka-schema-registry-client</artifactId>
<version>3.0.0</version>
</dependency>
</dependencies>
</project>