1
0

New Features in DeltaStreamer :

(1) Apply transformation when using delta-streamer to ingest data.
 (2) Add Hudi Incremental Source for Delta Streamer
 (3) Allow delta-streamer config-property to be passed as command-line
 (4) Add Hive Integration to Delta-Streamer and address Review comments
 (5) Ensure MultiPartKeysValueExtractor  handle hive style partition description
 (6) Reuse same spark session on both source and transformer
 (7) Support extracting partition fields from _hoodie_partition_path for HoodieIncrSource
 (8) Reuse Binary Avro coders
 (9) Add push down filter for Incremental source
 (10) Add Hoodie DeltaStreamer metrics to track total time taken
This commit is contained in:
Balaji Varadarajan
2018-10-10 10:31:34 -07:00
committed by vinoth chandar
parent c70dbc13e9
commit 3a0044216c
65 changed files with 2752 additions and 911 deletions

View File

@@ -66,6 +66,48 @@
<artifactId>hadoop-auth</artifactId>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-shims</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
@@ -182,116 +224,4 @@
<properties>
<checkstyle.skip>true</checkstyle.skip>
</properties>
<profiles>
<profile>
<id>hive12</id>
<activation>
<property>
<name>!hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix />
</properties>
<dependencies>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive12.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-shims</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive12.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>hive11</id>
<activation>
<property>
<name>hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix>.hive11</hiveJarSuffix>
</properties>
<dependencies>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-shims</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive11.version}</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-serde</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive11.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@@ -44,6 +44,26 @@
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
@@ -195,73 +215,4 @@
<properties>
<checkstyle.skip>true</checkstyle.skip>
</properties>
<profiles>
<profile>
<id>hive12</id>
<activation>
<property>
<name>!hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix />
</properties>
<dependencies>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive12.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>hive11</id>
<activation>
<property>
<name>hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix>.hive11</hiveJarSuffix>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>${hive11.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>

View File

@@ -239,6 +239,26 @@
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>${hive.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-configuration2</artifactId>
@@ -269,74 +289,5 @@
<version>${project.version}</version>
</dependency>
</dependencies>
<profiles>
<profile>
<id>hive12</id>
<activation>
<property>
<name>!hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix />
</properties>
<dependencies>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive12.version}</version>
</dependency>
<dependency>
<groupId>${hive12.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive12.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>hive11</id>
<activation>
<property>
<name>hive11</name>
</property>
</activation>
<properties>
<hiveJarSuffix>.hive11</hiveJarSuffix>
</properties>
<dependencies>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-service</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive11.version}</version>
</dependency>
<dependency>
<groupId>${hive11.groupid}</groupId>
<artifactId>hive-common</artifactId>
<version>${hive11.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>