Revamped Deltastreamer (#93)
* Add analytics to site * Fix ugly favicon * New & Improved HoodieDeltaStreamer - Can incrementally consume from HDFS or Kafka, with exactly-once semantics! - Supports Json/Avro data, Source can also do custom things - Source is totally pluggable, via reflection - Key generation is pluggable, currently added SimpleKeyGenerator - Schema provider is pluggable, currently Filebased schemas - Configurable field to break ties during preCombine - Finally, can also plugin the HoodieRecordPayload, to get other merge types than overwriting - Handles efficient avro serialization in Spark Pending : - Rewriting of HiveIncrPullSource - Hive sync via hoodie-hive - Cleanup & tests * Minor fixes from master rebase * Implementation of HiveIncrPullSource - Copies commit by commit from source to target * Adding TimestampBasedKeyGenerator - Supports unix time & date strings
This commit is contained in:
@@ -25,12 +25,21 @@
|
||||
|
||||
<artifactId>hoodie-utilities</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.jacoco</groupId>
|
||||
<artifactId>jacoco-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
@@ -41,7 +50,7 @@
|
||||
</descriptors>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.uber.hoodie.utilities.HoodieDeltaStreamer</mainClass>
|
||||
<mainClass>com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
|
||||
@@ -57,7 +66,6 @@
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
|
||||
<resources>
|
||||
@@ -220,6 +228,19 @@
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming_2.10</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-streaming-kafka-0-8_2.10</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Used for SQL templating -->
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
@@ -238,7 +259,27 @@
|
||||
<version>1.10.19</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro-mapred</artifactId>
|
||||
<version>1.7.6-cdh5.7.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-hadoop</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.twitter</groupId>
|
||||
<artifactId>bijection-avro_2.10</artifactId>
|
||||
<version>0.9.2</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
Reference in New Issue
Block a user