(1) Apply transformation when using delta-streamer to ingest data. (2) Add Hudi Incremental Source for Delta Streamer (3) Allow delta-streamer config-property to be passed as command-line (4) Add Hive Integration to Delta-Streamer and address Review comments (5) Ensure MultiPartKeysValueExtractor handle hive style partition description (6) Reuse same spark session on both source and transformer (7) Support extracting partition fields from _hoodie_partition_path for HoodieIncrSource (8) Reuse Binary Avro coders (9) Add push down filter for Incremental source (10) Add Hoodie DeltaStreamer metrics to track total time taken
219 lines
7.1 KiB
XML
219 lines
7.1 KiB
XML
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!--
|
|
~ Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
|
~
|
|
~ Licensed under the Apache License, Version 2.0 (the "License");
|
|
~ you may not use this file except in compliance with the License.
|
|
~ You may obtain a copy of the License at
|
|
~
|
|
~ http://www.apache.org/licenses/LICENSE-2.0
|
|
~
|
|
~ Unless required by applicable law or agreed to in writing, software
|
|
~ distributed under the License is distributed on an "AS IS" BASIS,
|
|
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
~ See the License for the specific language governing permissions and
|
|
~ limitations under the License.
|
|
-->
|
|
|
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
<parent>
|
|
<artifactId>hoodie</artifactId>
|
|
<groupId>com.uber.hoodie</groupId>
|
|
<version>0.4.5-SNAPSHOT</version>
|
|
<relativePath>../../pom.xml</relativePath>
|
|
</parent>
|
|
<modelVersion>4.0.0</modelVersion>
|
|
|
|
<artifactId>hoodie-hive-bundle</artifactId>
|
|
<packaging>jar</packaging>
|
|
|
|
<dependencies>
|
|
<dependency>
|
|
<groupId>org.apache.hadoop</groupId>
|
|
<artifactId>hadoop-common</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.hadoop</groupId>
|
|
<artifactId>hadoop-client</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.hadoop</groupId>
|
|
<artifactId>hadoop-hdfs</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.hadoop</groupId>
|
|
<artifactId>hadoop-auth</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>${hive.groupid}</groupId>
|
|
<artifactId>hive-service</artifactId>
|
|
<version>${hive.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>${hive.groupid}</groupId>
|
|
<artifactId>hive-jdbc</artifactId>
|
|
<version>${hive.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>${hive.groupid}</groupId>
|
|
<artifactId>hive-metastore</artifactId>
|
|
<version>${hive.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>${hive.groupid}</groupId>
|
|
<artifactId>hive-common</artifactId>
|
|
<version>${hive.version}</version>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>com.google.guava</groupId>
|
|
<artifactId>guava</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.apache.thrift</groupId>
|
|
<artifactId>libthrift</artifactId>
|
|
<version>0.9.2</version>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>joda-time</groupId>
|
|
<artifactId>joda-time</artifactId>
|
|
</dependency>
|
|
|
|
<!-- Apache commons -->
|
|
<dependency>
|
|
<groupId>commons-dbcp</groupId>
|
|
<artifactId>commons-dbcp</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>commons-io</groupId>
|
|
<artifactId>commons-io</artifactId>
|
|
</dependency>
|
|
|
|
<!-- Logging -->
|
|
<dependency>
|
|
<groupId>org.slf4j</groupId>
|
|
<artifactId>slf4j-api</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.slf4j</groupId>
|
|
<artifactId>slf4j-log4j12</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>com.beust</groupId>
|
|
<artifactId>jcommander</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>org.apache.httpcomponents</groupId>
|
|
<artifactId>httpcore</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>org.apache.httpcomponents</groupId>
|
|
<artifactId>httpclient</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>com.twitter</groupId>
|
|
<artifactId>parquet-avro</artifactId>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>com.uber.hoodie</groupId>
|
|
<artifactId>hoodie-hadoop-mr-bundle</artifactId>
|
|
<version>${project.version}</version>
|
|
</dependency>
|
|
|
|
<dependency>
|
|
<groupId>com.uber.hoodie</groupId>
|
|
<artifactId>hoodie-hive</artifactId>
|
|
<version>${project.version}</version>
|
|
<exclusions>
|
|
<exclusion>
|
|
<!-- All other hoodie deps will come from hoodie-hadoop-mr-bundle -->
|
|
<groupId>com.uber.hoodie</groupId>
|
|
<artifactId>*</artifactId>
|
|
</exclusion>
|
|
</exclusions>
|
|
</dependency>
|
|
</dependencies>
|
|
|
|
<build>
|
|
<plugins>
|
|
<plugin>
|
|
<groupId>org.apache.rat</groupId>
|
|
<artifactId>apache-rat-plugin</artifactId>
|
|
</plugin>
|
|
<plugin>
|
|
<groupId>org.apache.maven.plugins</groupId>
|
|
<artifactId>maven-shade-plugin</artifactId>
|
|
<version>2.4</version>
|
|
<executions>
|
|
<execution>
|
|
<phase>package</phase>
|
|
<goals>
|
|
<goal>shade</goal>
|
|
</goals>
|
|
<configuration>
|
|
<relocations>
|
|
<relocation>
|
|
<pattern>com.beust.</pattern>
|
|
<shadedPattern>com.uber.hoodie.com.beust.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>org.joda.</pattern>
|
|
<shadedPattern>com.uber.hoodie.org.joda.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>com.google.</pattern>
|
|
<shadedPattern>com.uber.hoodie.com.google.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>org.slf4j.</pattern>
|
|
<shadedPattern>com.uber.hoodie.org.slf4j.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>org.apache.commons.</pattern>
|
|
<shadedPattern>com.uber.hoodie.org.apache.commons.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>parquet.column</pattern>
|
|
<shadedPattern>com.uber.hoodie.parquet.column</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>parquet.format.</pattern>
|
|
<shadedPattern>com.uber.hoodie.parquet.format.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>parquet.hadoop.</pattern>
|
|
<shadedPattern>com.uber.hoodie.parquet.hadoop.</shadedPattern>
|
|
</relocation>
|
|
<relocation>
|
|
<pattern>parquet.schema.</pattern>
|
|
<shadedPattern>com.uber.hoodie.parquet.schema.</shadedPattern>
|
|
</relocation>
|
|
</relocations>
|
|
<createDependencyReducedPom>false</createDependencyReducedPom>
|
|
<artifactSet>
|
|
<excludes>
|
|
<exclude>log4j:log4j</exclude>
|
|
<exclude>org.apache.hadoop:*</exclude>
|
|
<exclude>org.apache.hive:*</exclude>
|
|
<exclude>org.apache.derby:derby</exclude>
|
|
</excludes>
|
|
</artifactSet>
|
|
<finalName>${project.artifactId}-${project.version}${hiveJarSuffix}</finalName>
|
|
</configuration>
|
|
</execution>
|
|
</executions>
|
|
</plugin>
|
|
</plugins>
|
|
</build>
|
|
|
|
<properties>
|
|
<checkstyle.skip>true</checkstyle.skip>
|
|
</properties>
|
|
</project>
|