1
0

[HUDI-584] Relocate spark-avro dependency by maven-shade-plugin (#1290)

This commit is contained in:
Bhavani Sudha Saktheeswaran
2020-03-04 11:01:49 -08:00
committed by GitHub
parent 9d46ce380a
commit 5f85c26704
3 changed files with 43 additions and 2 deletions

View File

@@ -55,6 +55,11 @@ Prerequisites for building Apache Hudi:
# Checkout code and build
git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi
mvn clean package -DskipTests -DskipITs
# Start command
spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
```
To build the Javadoc for all Java and Scala classes:
@@ -71,6 +76,22 @@ The default Scala version supported is 2.11. To build for Scala 2.12 version, bu
mvn clean package -DskipTests -DskipITs -Dscala-2.12
```
### Build without spark-avro module
The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile
```
# Checkout code and build
git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi
mvn clean package -DskipTests -DskipITs -Pspark-shade-unbundle-avro
# Start command
spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--packages org.apache.spark:spark-avro_2.11:2.4.4 \
--jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
```
## Quickstart
Please visit [https://hudi.apache.org/docs/quick-start-guide.html](https://hudi.apache.org/docs/quick-start-guide.html) to quickly explore Hudi's capabilities using spark-shell.

View File

@@ -89,6 +89,7 @@
<include>io.dropwizard.metrics:metrics-graphite</include>
<include>com.yammer.metrics:metrics-core</include>
<include>org.apache.spark:spark-avro_${scala.binary.version}</include>
<include>org.apache.hive:hive-common</include>
<include>org.apache.hive:hive-service</include>
<include>org.apache.hive:hive-service-rpc</include>
@@ -101,6 +102,10 @@
<pattern>com.beust.jcommander.</pattern>
<shadedPattern>org.apache.hudi.com.beust.jcommander.</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.spark.sql.avro.</pattern>
<shadedPattern>${spark.bundle.spark.shade.prefix}org.apache.spark.sql.avro.</shadedPattern>
</relocation>
<relocation>
<pattern>org.apache.hive.jdbc.</pattern>
<shadedPattern>${spark.bundle.hive.shade.prefix}org.apache.hive.jdbc.</shadedPattern>
@@ -217,6 +222,13 @@
<version>${project.version}</version>
</dependency>
<!-- Spark (Packages) -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-avro_${scala.binary.version}</artifactId>
<scope>${spark.bundle.avro.scope}</scope>
</dependency>
<!-- Parquet -->
<dependency>
<groupId>org.apache.parquet</groupId>
@@ -271,6 +283,12 @@
<spark.bundle.hive.shade.prefix>org.apache.hudi.</spark.bundle.hive.shade.prefix>
</properties>
</profile>
<profile>
<id>spark-shade-unbundle-avro</id>
<properties>
<spark.bundle.avro.scope>provided</spark.bundle.avro.scope>
<spark.bundle.spark.shade.prefix></spark.bundle.spark.shade.prefix>
</properties>
</profile>
</profiles>
</project>
</project>

View File

@@ -112,6 +112,8 @@
<main.basedir>${project.basedir}</main.basedir>
<spark.bundle.hive.scope>provided</spark.bundle.hive.scope>
<spark.bundle.hive.shade.prefix></spark.bundle.hive.shade.prefix>
<spark.bundle.avro.scope>compile</spark.bundle.avro.scope>
<spark.bundle.spark.shade.prefix>org.apache.hudi.spark.</spark.bundle.spark.shade.prefix>
<utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
<utilities.bundle.hive.shade.prefix></utilities.bundle.hive.shade.prefix>
<argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine>