diff --git a/README.md b/README.md index 6bb56597b..a4459f743 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,11 @@ Prerequisites for building Apache Hudi: # Checkout code and build git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi mvn clean package -DskipTests -DskipITs + +# Start command +spark-2.4.4-bin-hadoop2.7/bin/spark-shell \ + --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \ + --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' ``` To build the Javadoc for all Java and Scala classes: @@ -71,6 +76,22 @@ The default Scala version supported is 2.11. To build for Scala 2.12 version, bu mvn clean package -DskipTests -DskipITs -Dscala-2.12 ``` +### Build without spark-avro module + +The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile + +``` +# Checkout code and build +git clone https://github.com/apache/incubator-hudi.git && cd incubator-hudi +mvn clean package -DskipTests -DskipITs -Pspark-shade-unbundle-avro + +# Start command +spark-2.4.4-bin-hadoop2.7/bin/spark-shell \ + --packages org.apache.spark:spark-avro_2.11:2.4.4 \ + --jars `ls packaging/hudi-spark-bundle/target/hudi-spark-bundle_2.11-*.*.*-SNAPSHOT.jar` \ + --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' +``` + ## Quickstart Please visit [https://hudi.apache.org/docs/quick-start-guide.html](https://hudi.apache.org/docs/quick-start-guide.html) to quickly explore Hudi's capabilities using spark-shell. diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index c0acaae6e..0222b9264 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -89,6 +89,7 @@ io.dropwizard.metrics:metrics-graphite com.yammer.metrics:metrics-core + org.apache.spark:spark-avro_${scala.binary.version} org.apache.hive:hive-common org.apache.hive:hive-service org.apache.hive:hive-service-rpc @@ -101,6 +102,10 @@ com.beust.jcommander. org.apache.hudi.com.beust.jcommander. + + org.apache.spark.sql.avro. + ${spark.bundle.spark.shade.prefix}org.apache.spark.sql.avro. + org.apache.hive.jdbc. ${spark.bundle.hive.shade.prefix}org.apache.hive.jdbc. @@ -217,6 +222,13 @@ ${project.version} + + + org.apache.spark + spark-avro_${scala.binary.version} + ${spark.bundle.avro.scope} + + org.apache.parquet @@ -271,6 +283,12 @@ org.apache.hudi. + + spark-shade-unbundle-avro + + provided + + + - - + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 69f566ac8..71979f199 100644 --- a/pom.xml +++ b/pom.xml @@ -112,6 +112,8 @@ ${project.basedir} provided + compile + org.apache.hudi.spark. provided -Xmx1024m -XX:MaxPermSize=256m