1
0

[HUDI-3859] Fix spark profiles and utilities-slim dep (#5297)

This commit is contained in:
Raymond Xu
2022-04-12 15:33:08 -07:00
committed by GitHub
parent 2d46d5287e
commit 2e6e302efe
4 changed files with 28 additions and 60 deletions

View File

@@ -14,51 +14,26 @@ jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
max-parallel: 8
matrix: matrix:
include: include:
# Spark 2.4.4, scala 2.11
- scalaProfile: "scala-2.11" - scalaProfile: "scala-2.11"
sparkProfile: "spark2.4" sparkProfile: "spark2.4"
sparkVersion: "2.4.4"
flinkProfile: "flink1.13" flinkProfile: "flink1.13"
# Spark 2.4.4, scala 2.12 - scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
flinkProfile: "flink1.14"
- scalaProfile: "scala-2.12" - scalaProfile: "scala-2.12"
sparkProfile: "spark2.4" sparkProfile: "spark2.4"
sparkVersion: "2.4.4"
flinkProfile: "flink1.14"
# Spark 3.1.x
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.0"
flinkProfile: "flink1.13" flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12" - scalaProfile: "scala-2.12"
sparkProfile: "spark3.1" sparkProfile: "spark3.1"
sparkVersion: "3.1.1"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.2"
flinkProfile: "flink1.14" flinkProfile: "flink1.14"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.3"
flinkProfile: "flink1.14"
# Spark 3.2.x
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.2"
sparkVersion: "3.2.0"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12" - scalaProfile: "scala-2.12"
sparkProfile: "spark3.2" sparkProfile: "spark3.2"
sparkVersion: "3.2.1"
flinkProfile: "flink1.14" flinkProfile: "flink1.14"
steps: steps:
@@ -73,16 +48,14 @@ jobs:
env: env:
SCALA_PROFILE: ${{ matrix.scalaProfile }} SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }} FLINK_PROFILE: ${{ matrix.flinkProfile }}
run: run:
mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V
- name: Quickstart Test - name: Quickstart Test
env: env:
SCALA_PROFILE: ${{ matrix.scalaProfile }} SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }} SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }} FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
run: run:
mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark

View File

@@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
``` ```
To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`.
To build the Javadoc for all Java and Scala classes: To build the Javadoc for all Java and Scala classes:
``` ```
# Javadoc generated under target/site/apidocs # Javadoc generated under target/site/apidocs
@@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs
### Build with different Spark versions ### Build with different Spark versions
The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions.
corresponding profile
| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes | | Maven build options | Expected Spark bundle jar name | Notes |
|--|--|--|--| |:--------------------------|:---------------------------------------------|:-------------------------------------------------|
| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default | | (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) |
| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 | | `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) |
| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x | | `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 |
| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x | | `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 |
| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` | | `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 |
| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 | | `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 |
| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 | | `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 |
For example, For example,
``` ```
# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle) # Build against Spark 3.2.x
mvn clean package -DskipTests -Pspark3.2 mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12
# Build against Spark 3.1.x # Build against Spark 3.1.x
mvn clean package -DskipTests -Pspark3.1 mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12
# Build against Spark 2.4.4 and Scala 2.12 # Build against Spark 2.4.4 and Scala 2.12
mvn clean package -DskipTests -Pspark2.4,scala-2.12 mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12
``` ```
### What about "spark-avro" module? #### What about "spark-avro" module?
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages` Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`

View File

@@ -112,7 +112,6 @@
<include>org.apache.httpcomponents:httpcore</include> <include>org.apache.httpcomponents:httpcore</include>
<include>org.apache.httpcomponents:fluent-hc</include> <include>org.apache.httpcomponents:fluent-hc</include>
<include>org.antlr:stringtemplate</include> <include>org.antlr:stringtemplate</include>
<include>org.apache.parquet:parquet-avro</include>
<include>com.github.davidmoten:guava-mini</include> <include>com.github.davidmoten:guava-mini</include>
<include>com.github.davidmoten:hilbert-curve</include> <include>com.github.davidmoten:hilbert-curve</include>
@@ -394,13 +393,6 @@
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<!-- Parquet -->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<scope>compile</scope>
</dependency>
<!-- Hive --> <!-- Hive -->
<dependency> <dependency>
<groupId>${hive.groupid}</groupId> <groupId>${hive.groupid}</groupId>

View File

@@ -1570,8 +1570,6 @@
<properties> <properties>
<scala.version>${scala12.version}</scala.version> <scala.version>${scala12.version}</scala.version>
<scala.binary.version>2.12</scala.binary.version> <scala.binary.version>2.12</scala.binary.version>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
<skipITs>true</skipITs>
</properties> </properties>
<activation> <activation>
<property> <property>
@@ -1613,6 +1611,9 @@
<module>hudi-spark-datasource/hudi-spark2</module> <module>hudi-spark-datasource/hudi-spark2</module>
<module>hudi-spark-datasource/hudi-spark2-common</module> <module>hudi-spark-datasource/hudi-spark2-common</module>
</modules> </modules>
<properties>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
</properties>
<activation> <activation>
<activeByDefault>true</activeByDefault> <activeByDefault>true</activeByDefault>
<property> <property>
@@ -1631,6 +1632,7 @@
</modules> </modules>
<properties> <properties>
<sparkbundle.version>2.4</sparkbundle.version> <sparkbundle.version>2.4</sparkbundle.version>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
</properties> </properties>
<activation> <activation>
<property> <property>