[HUDI-3859] Fix spark profiles and utilities-slim dep (#5297)
This commit is contained in:
41
.github/workflows/bot.yml
vendored
41
.github/workflows/bot.yml
vendored
@@ -14,51 +14,26 @@ jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
max-parallel: 8
|
||||
matrix:
|
||||
include:
|
||||
# Spark 2.4.4, scala 2.11
|
||||
- scalaProfile: "scala-2.11"
|
||||
sparkProfile: "spark2.4"
|
||||
sparkVersion: "2.4.4"
|
||||
flinkProfile: "flink1.13"
|
||||
|
||||
# Spark 2.4.4, scala 2.12
|
||||
- scalaProfile: "scala-2.11"
|
||||
sparkProfile: "spark2.4"
|
||||
flinkProfile: "flink1.14"
|
||||
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark2.4"
|
||||
sparkVersion: "2.4.4"
|
||||
flinkProfile: "flink1.14"
|
||||
|
||||
# Spark 3.1.x
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.1"
|
||||
sparkVersion: "3.1.0"
|
||||
flinkProfile: "flink1.13"
|
||||
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.1"
|
||||
sparkVersion: "3.1.1"
|
||||
flinkProfile: "flink1.13"
|
||||
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.1"
|
||||
sparkVersion: "3.1.2"
|
||||
flinkProfile: "flink1.14"
|
||||
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.1"
|
||||
sparkVersion: "3.1.3"
|
||||
flinkProfile: "flink1.14"
|
||||
|
||||
# Spark 3.2.x
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.2"
|
||||
sparkVersion: "3.2.0"
|
||||
flinkProfile: "flink1.13"
|
||||
|
||||
- scalaProfile: "scala-2.12"
|
||||
sparkProfile: "spark3.2"
|
||||
sparkVersion: "3.2.1"
|
||||
flinkProfile: "flink1.14"
|
||||
|
||||
steps:
|
||||
@@ -73,16 +48,14 @@ jobs:
|
||||
env:
|
||||
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
||||
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
||||
SPARK_VERSION: ${{ matrix.sparkVersion }}
|
||||
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
||||
run:
|
||||
mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V
|
||||
mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V
|
||||
- name: Quickstart Test
|
||||
env:
|
||||
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
||||
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
||||
SPARK_VERSION: ${{ matrix.sparkVersion }}
|
||||
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
||||
if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x
|
||||
if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
|
||||
run:
|
||||
mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
|
||||
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
|
||||
|
||||
33
README.md
33
README.md
@@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
|
||||
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
|
||||
```
|
||||
|
||||
To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`.
|
||||
|
||||
To build the Javadoc for all Java and Scala classes:
|
||||
```
|
||||
# Javadoc generated under target/site/apidocs
|
||||
@@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs
|
||||
|
||||
### Build with different Spark versions
|
||||
|
||||
The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the
|
||||
corresponding profile
|
||||
The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions.
|
||||
|
||||
| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes |
|
||||
|--|--|--|--|
|
||||
| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default |
|
||||
| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 |
|
||||
| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x |
|
||||
| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x |
|
||||
| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` |
|
||||
| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 |
|
||||
| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 |
|
||||
| Maven build options | Expected Spark bundle jar name | Notes |
|
||||
|:--------------------------|:---------------------------------------------|:-------------------------------------------------|
|
||||
| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) |
|
||||
| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) |
|
||||
| `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 |
|
||||
| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 |
|
||||
| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 |
|
||||
| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 |
|
||||
| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 |
|
||||
|
||||
For example,
|
||||
```
|
||||
# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle)
|
||||
mvn clean package -DskipTests -Pspark3.2
|
||||
# Build against Spark 3.2.x
|
||||
mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12
|
||||
|
||||
# Build against Spark 3.1.x
|
||||
mvn clean package -DskipTests -Pspark3.1
|
||||
mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12
|
||||
|
||||
# Build against Spark 2.4.4 and Scala 2.12
|
||||
mvn clean package -DskipTests -Pspark2.4,scala-2.12
|
||||
mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12
|
||||
```
|
||||
|
||||
### What about "spark-avro" module?
|
||||
#### What about "spark-avro" module?
|
||||
|
||||
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`
|
||||
|
||||
|
||||
@@ -112,7 +112,6 @@
|
||||
<include>org.apache.httpcomponents:httpcore</include>
|
||||
<include>org.apache.httpcomponents:fluent-hc</include>
|
||||
<include>org.antlr:stringtemplate</include>
|
||||
<include>org.apache.parquet:parquet-avro</include>
|
||||
|
||||
<include>com.github.davidmoten:guava-mini</include>
|
||||
<include>com.github.davidmoten:hilbert-curve</include>
|
||||
@@ -394,13 +393,6 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Parquet -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Hive -->
|
||||
<dependency>
|
||||
<groupId>${hive.groupid}</groupId>
|
||||
|
||||
6
pom.xml
6
pom.xml
@@ -1570,8 +1570,6 @@
|
||||
<properties>
|
||||
<scala.version>${scala12.version}</scala.version>
|
||||
<scala.binary.version>2.12</scala.binary.version>
|
||||
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
||||
<skipITs>true</skipITs>
|
||||
</properties>
|
||||
<activation>
|
||||
<property>
|
||||
@@ -1613,6 +1611,9 @@
|
||||
<module>hudi-spark-datasource/hudi-spark2</module>
|
||||
<module>hudi-spark-datasource/hudi-spark2-common</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
||||
</properties>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
<property>
|
||||
@@ -1631,6 +1632,7 @@
|
||||
</modules>
|
||||
<properties>
|
||||
<sparkbundle.version>2.4</sparkbundle.version>
|
||||
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
||||
</properties>
|
||||
<activation>
|
||||
<property>
|
||||
|
||||
Reference in New Issue
Block a user