diff --git a/README.md b/README.md
index 6bb56597b..a4459f743 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,11 @@ Prerequisites for building Apache Hudi:
# Checkout code and build
git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi
mvn clean package -DskipTests -DskipITs
+
+# Start command
+spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
+ --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \
+ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
```
To build the Javadoc for all Java and Scala classes:
@@ -71,6 +76,22 @@ The default Scala version supported is 2.11. To build for Scala 2.12 version, bu
mvn clean package -DskipTests -DskipITs -Dscala-2.12
```
+### Build without spark-avro module
+
+The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile
+
+```
+# Checkout code and build
+git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi
+mvn clean package -DskipTests -DskipITs -Pspark-shade-unbundle-avro
+
+# Start command
+spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
+ --packages org.apache.spark:spark-avro_2.11:2.4.4 \
+ --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \
+ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
+```
+
## Quickstart
Please visit [https://hudi.apache.org/docs/quick-start-guide.html](https://hudi.apache.org/docs/quick-start-guide.html) to quickly explore Hudi's capabilities using spark-shell.
diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml
index c0acaae6e..0222b9264 100644
--- a/packaging/hudi-spark-bundle/pom.xml
+++ b/packaging/hudi-spark-bundle/pom.xml
@@ -89,6 +89,7 @@
io.dropwizard.metrics:metrics-graphite
com.yammer.metrics:metrics-core
+ org.apache.spark:spark-avro_${scala.binary.version}
org.apache.hive:hive-common
org.apache.hive:hive-service
org.apache.hive:hive-service-rpc
@@ -101,6 +102,10 @@
com.beust.jcommander.
org.apache.hudi.com.beust.jcommander.
+
+ org.apache.spark.sql.avro.
+ ${spark.bundle.spark.shade.prefix}org.apache.spark.sql.avro.
+
org.apache.hive.jdbc.
${spark.bundle.hive.shade.prefix}org.apache.hive.jdbc.
@@ -217,6 +222,13 @@
${project.version}
+
+
+ org.apache.spark
+ spark-avro_${scala.binary.version}
+ ${spark.bundle.avro.scope}
+
+
org.apache.parquet
@@ -271,6 +283,12 @@
org.apache.hudi.
+
+ spark-shade-unbundle-avro
+
+ provided
+
+
+
-
-
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 69f566ac8..71979f199 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,6 +112,8 @@
${project.basedir}
provided
+ compile
+ org.apache.hudi.spark.
provided
-Xmx1024m -XX:MaxPermSize=256m