1
0

[HUDI-159] Redesigning bundles for lighter-weight integrations

- Documented principles applied for redesign at packaging/README.md
 - No longer depends on incl commons-codec, commons-io, commons-pool, commons-dbcp, commons-lang, commons-logging, avro-mapred
 - Introduce new FileIOUtils & added checkstyle rule for illegal import of above
 - Parquet, Avro dependencies moved to provided scope to enable being picked up from Hive/Spark/Presto instead
 - Pickup jackson jars for Hive sync tool from HIVE_HOME & unbundling jackson everywhere
 - Remove hive-jdbc standalone jar from being bundled in Spark/Hive/Utilities bundles
 - 6.5x reduced number of classes across bundles
This commit is contained in:
vinoth chandar
2019-09-02 16:15:55 -07:00
committed by Balaji Varadarajan
parent 0e6f078ec4
commit 7a973a6944
60 changed files with 689 additions and 1380 deletions

View File

@@ -23,100 +23,16 @@
<relativePath>../../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>hudi-presto-bundle</artifactId>
<packaging>jar</packaging>
<dependencies>
<!-- Hoodie -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr-bundle</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</dependency>
<!-- Thrift -->
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libthrift</artifactId>
<version>${thrift.version}</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<!-- Apache Commons -->
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
</dependency>
<!-- Httpcomponents-->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
</dependency>
<!-- Hadoop -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
</dependency>
</dependencies>
<properties>
<checkstyle.skip>true</checkstyle.skip>
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
<notice.file>HUDI_NOTICE.txt</notice.file>
</properties>
<build>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
@@ -125,7 +41,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4</version>
<version>${maven-shade-plugin.version}</version>
<executions>
<execution>
<phase>package</phase>
@@ -134,73 +50,35 @@
</goals>
<configuration>
<createSourcesJar>true</createSourcesJar>
<dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
<artifactSet>
<includes>
<include>org.apache.hudi:hudi-common</include>
<include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.parquet:parquet-avro</include>
<include>com.esotericsoftware:kryo-shaded</include>
<include>org.objenesis:objenesis</include>
<include>com.esotericsoftware:minlog</include>
</includes>
</artifactSet>
<relocations>
<relocation>
<pattern>com.beust.</pattern>
<shadedPattern>org.apache.hudi.com.beust.</shadedPattern>
</relocation>
<relocation>
<pattern>org.joda.</pattern>
<shadedPattern>org.apache.hudi.org.joda.</shadedPattern>
</relocation>
<relocation>
<pattern>com.google.</pattern>
<shadedPattern>org.apache.hudi.com.google.</shadedPattern>
</relocation>
<relocation>
<pattern>org.slf4j.</pattern>
<shadedPattern>org.apache.hudi.org.slf4j.</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.commons.</pattern>
<shadedPattern>org.apache.hudi.org.apache.commons.</shadedPattern>
</relocation>
<relocation>
<pattern>parquet.column</pattern>
<shadedPattern>org.apache.hudi.parquet.column</shadedPattern>
</relocation>
<relocation>
<pattern>parquet.format.</pattern>
<shadedPattern>org.apache.hudi.parquet.format.</shadedPattern>
</relocation>
<relocation>
<pattern>parquet.hadoop.</pattern>
<shadedPattern>org.apache.hudi.parquet.hadoop.</shadedPattern>
</relocation>
<relocation>
<pattern>parquet.schema.</pattern>
<shadedPattern>org.apache.hudi.parquet.schema.</shadedPattern>
</relocation>
<relocation>
<pattern>com.esotericsoftware.kryo.</pattern>
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
</relocation>
<relocation>
<pattern>org.objenesis.</pattern>
<shadedPattern>org.apache.hudi.org.objenesis.</shadedPattern>
</relocation>
<relocation>
<pattern>com.esotericsoftware.minlog.</pattern>
<shadedPattern>org.apache.hudi.com.esotericsoftware.minlog.</shadedPattern>
</relocation>
<relocation>
<pattern>com.esotericsoftware.kryo.</pattern>
<shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern>
</relocation>
<relocation>
<pattern>org.objenesis.</pattern>
<shadedPattern>org.apache.hudi.org.objenesis.</shadedPattern>
</relocation>
<relocation>
<pattern>com.esotericsoftware.minlog.</pattern>
<shadedPattern>org.apache.hudi.com.esotericsoftware.minlog.</shadedPattern>
</relocation>
</relocations>
<createDependencyReducedPom>false</createDependencyReducedPom>
<artifactSet>
<excludes>
<exclude>log4j:log4j</exclude>
<exclude>org.apache.hadoop:*</exclude>
<exclude>org.apache.hive:*</exclude>
<exclude>org.apache.derby:derby</exclude>
<!--Already a dependency in presto-hive connector-->
<exclude>org.apache.thrift:*</exclude>
<!--Provided by aws-java-sdk-core dependency in presto-hive connector-->
<exclude>org.apache.httpcomponents:*</exclude>
<!--Provided by hive-hadoop2-->
<exclude>com.fasterxml.jackson.core:*</exclude>
<exclude>com.fasterxml.jackson.datatype:jackson-datatype-guava</exclude>
<exclude>org.apache.parquet:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
@@ -220,11 +98,27 @@
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
<resource>
<directory>src/test/resources</directory>
</resource>
</resources>
</build>
<properties>
<checkstyle.skip>true</checkstyle.skip>
<notice.dir>${project.basedir}/src/main/resources/META-INF</notice.dir>
<notice.file>HUDI_NOTICE.txt</notice.file>
</properties>
<dependencies>
<!-- Hoodie -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr-bundle</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>