1
0

[HUDI-3859] Fix spark profiles and utilities-slim dep (#5297)

This commit is contained in:
Raymond Xu
2022-04-12 15:33:08 -07:00
committed by GitHub
parent 2d46d5287e
commit 2e6e302efe
4 changed files with 28 additions and 60 deletions

View File

@@ -14,51 +14,26 @@ jobs:
build:
runs-on: ubuntu-latest
strategy:
max-parallel: 8
matrix:
include:
# Spark 2.4.4, scala 2.11
- scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
sparkVersion: "2.4.4"
flinkProfile: "flink1.13"
# Spark 2.4.4, scala 2.12
- scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
flinkProfile: "flink1.14"
- scalaProfile: "scala-2.12"
sparkProfile: "spark2.4"
sparkVersion: "2.4.4"
flinkProfile: "flink1.14"
# Spark 3.1.x
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.0"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.1"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.2"
flinkProfile: "flink1.14"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
sparkVersion: "3.1.3"
flinkProfile: "flink1.14"
# Spark 3.2.x
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.2"
sparkVersion: "3.2.0"
flinkProfile: "flink1.13"
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.2"
sparkVersion: "3.2.1"
flinkProfile: "flink1.14"
steps:
@@ -73,16 +48,14 @@ jobs:
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
run:
mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V
mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V
- name: Quickstart Test
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_VERSION: ${{ matrix.sparkVersion }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x
if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
run:
mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark

View File

@@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
```
To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`.
To build the Javadoc for all Java and Scala classes:
```
# Javadoc generated under target/site/apidocs
@@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs
### Build with different Spark versions
The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the
corresponding profile
The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions.
| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes |
|--|--|--|--|
| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default |
| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 |
| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x |
| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x |
| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` |
| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 |
| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 |
| Maven build options | Expected Spark bundle jar name | Notes |
|:--------------------------|:---------------------------------------------|:-------------------------------------------------|
| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) |
| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) |
| `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 |
| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 |
| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 |
| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 |
| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 |
For example,
```
# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle)
mvn clean package -DskipTests -Pspark3.2
# Build against Spark 3.2.x
mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12
# Build against Spark 3.1.x
mvn clean package -DskipTests -Pspark3.1
mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12
# Build against Spark 2.4.4 and Scala 2.12
mvn clean package -DskipTests -Pspark2.4,scala-2.12
mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12
```
### What about "spark-avro" module?
#### What about "spark-avro" module?
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`

View File

@@ -112,7 +112,6 @@
<include>org.apache.httpcomponents:httpcore</include>
<include>org.apache.httpcomponents:fluent-hc</include>
<include>org.antlr:stringtemplate</include>
<include>org.apache.parquet:parquet-avro</include>
<include>com.github.davidmoten:guava-mini</include>
<include>com.github.davidmoten:hilbert-curve</include>
@@ -394,13 +393,6 @@
<version>${project.version}</version>
</dependency>
<!-- Parquet -->
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId>
<scope>compile</scope>
</dependency>
<!-- Hive -->
<dependency>
<groupId>${hive.groupid}</groupId>

View File

@@ -1570,8 +1570,6 @@
<properties>
<scala.version>${scala12.version}</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
<skipITs>true</skipITs>
</properties>
<activation>
<property>
@@ -1613,6 +1611,9 @@
<module>hudi-spark-datasource/hudi-spark2</module>
<module>hudi-spark-datasource/hudi-spark2-common</module>
</modules>
<properties>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
</properties>
<activation>
<activeByDefault>true</activeByDefault>
<property>
@@ -1631,6 +1632,7 @@
</modules>
<properties>
<sparkbundle.version>2.4</sparkbundle.version>
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
</properties>
<activation>
<property>