[HUDI-3859] Fix spark profiles and utilities-slim dep (#5297)
This commit is contained in:
41
.github/workflows/bot.yml
vendored
41
.github/workflows/bot.yml
vendored
@@ -14,51 +14,26 @@ jobs:
|
|||||||
build:
|
build:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
max-parallel: 8
|
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
# Spark 2.4.4, scala 2.11
|
|
||||||
- scalaProfile: "scala-2.11"
|
- scalaProfile: "scala-2.11"
|
||||||
sparkProfile: "spark2.4"
|
sparkProfile: "spark2.4"
|
||||||
sparkVersion: "2.4.4"
|
|
||||||
flinkProfile: "flink1.13"
|
flinkProfile: "flink1.13"
|
||||||
|
|
||||||
# Spark 2.4.4, scala 2.12
|
- scalaProfile: "scala-2.11"
|
||||||
|
sparkProfile: "spark2.4"
|
||||||
|
flinkProfile: "flink1.14"
|
||||||
|
|
||||||
- scalaProfile: "scala-2.12"
|
- scalaProfile: "scala-2.12"
|
||||||
sparkProfile: "spark2.4"
|
sparkProfile: "spark2.4"
|
||||||
sparkVersion: "2.4.4"
|
|
||||||
flinkProfile: "flink1.14"
|
|
||||||
|
|
||||||
# Spark 3.1.x
|
|
||||||
- scalaProfile: "scala-2.12"
|
|
||||||
sparkProfile: "spark3.1"
|
|
||||||
sparkVersion: "3.1.0"
|
|
||||||
flinkProfile: "flink1.13"
|
flinkProfile: "flink1.13"
|
||||||
|
|
||||||
- scalaProfile: "scala-2.12"
|
- scalaProfile: "scala-2.12"
|
||||||
sparkProfile: "spark3.1"
|
sparkProfile: "spark3.1"
|
||||||
sparkVersion: "3.1.1"
|
|
||||||
flinkProfile: "flink1.13"
|
|
||||||
|
|
||||||
- scalaProfile: "scala-2.12"
|
|
||||||
sparkProfile: "spark3.1"
|
|
||||||
sparkVersion: "3.1.2"
|
|
||||||
flinkProfile: "flink1.14"
|
flinkProfile: "flink1.14"
|
||||||
|
|
||||||
- scalaProfile: "scala-2.12"
|
|
||||||
sparkProfile: "spark3.1"
|
|
||||||
sparkVersion: "3.1.3"
|
|
||||||
flinkProfile: "flink1.14"
|
|
||||||
|
|
||||||
# Spark 3.2.x
|
|
||||||
- scalaProfile: "scala-2.12"
|
|
||||||
sparkProfile: "spark3.2"
|
|
||||||
sparkVersion: "3.2.0"
|
|
||||||
flinkProfile: "flink1.13"
|
|
||||||
|
|
||||||
- scalaProfile: "scala-2.12"
|
- scalaProfile: "scala-2.12"
|
||||||
sparkProfile: "spark3.2"
|
sparkProfile: "spark3.2"
|
||||||
sparkVersion: "3.2.1"
|
|
||||||
flinkProfile: "flink1.14"
|
flinkProfile: "flink1.14"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -73,16 +48,14 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
||||||
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
||||||
SPARK_VERSION: ${{ matrix.sparkVersion }}
|
|
||||||
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
||||||
run:
|
run:
|
||||||
mvn clean install -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -Pintegration-tests -DskipTests=true -B -V
|
mvn clean install -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipTests=true -B -V
|
||||||
- name: Quickstart Test
|
- name: Quickstart Test
|
||||||
env:
|
env:
|
||||||
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
SCALA_PROFILE: ${{ matrix.scalaProfile }}
|
||||||
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
SPARK_PROFILE: ${{ matrix.sparkProfile }}
|
||||||
SPARK_VERSION: ${{ matrix.sparkVersion }}
|
|
||||||
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
FLINK_PROFILE: ${{ matrix.flinkProfile }}
|
||||||
if: ${{ !startsWith(env.SPARK_VERSION, '3.2.') }} # skip test spark 3.2 before hadoop upgrade to 3.x
|
if: ${{ !endsWith(env.SPARK_PROFILE, '3.2') }} # skip test spark 3.2 before hadoop upgrade to 3.x
|
||||||
run:
|
run:
|
||||||
mvn test -P "unit-tests" -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -Dspark.version="$SPARK_VERSION" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
|
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DfailIfNoTests=false -pl hudi-examples/hudi-examples-flink,hudi-examples/hudi-examples-java,hudi-examples/hudi-examples-spark
|
||||||
|
|||||||
33
README.md
33
README.md
@@ -64,6 +64,8 @@ spark-2.4.4-bin-hadoop2.7/bin/spark-shell \
|
|||||||
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
|
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To build for integration tests that include `hudi-integ-test-bundle`, use `-Dintegration-tests`.
|
||||||
|
|
||||||
To build the Javadoc for all Java and Scala classes:
|
To build the Javadoc for all Java and Scala classes:
|
||||||
```
|
```
|
||||||
# Javadoc generated under target/site/apidocs
|
# Javadoc generated under target/site/apidocs
|
||||||
@@ -72,32 +74,31 @@ mvn clean javadoc:aggregate -Pjavadocs
|
|||||||
|
|
||||||
### Build with different Spark versions
|
### Build with different Spark versions
|
||||||
|
|
||||||
The default Spark version supported is 2.4.4. To build for different Spark versions and Scala 2.12, use the
|
The default Spark version supported is 2.4.4. Refer to the table below for building with different Spark and Scala versions.
|
||||||
corresponding profile
|
|
||||||
|
|
||||||
| Label | Artifact Name for Spark Bundle | Maven Profile Option | Notes |
|
| Maven build options | Expected Spark bundle jar name | Notes |
|
||||||
|--|--|--|--|
|
|:--------------------------|:---------------------------------------------|:-------------------------------------------------|
|
||||||
| Spark 2.4, Scala 2.11 | hudi-spark2.4-bundle_2.11 | `-Pspark2.4` | For Spark 2.4.4, which is the same as the default |
|
| (empty) | hudi-spark-bundle_2.11 (legacy bundle name) | For Spark 2.4.4 and Scala 2.11 (default options) |
|
||||||
| Spark 2.4, Scala 2.12 | hudi-spark2.4-bundle_2.12 | `-Pspark2.4,scala-2.12` | For Spark 2.4.4, which is the same as the default and Scala 2.12 |
|
| `-Dspark2.4` | hudi-spark2.4-bundle_2.11 | For Spark 2.4.4 and Scala 2.11 (same as default) |
|
||||||
| Spark 3.1, Scala 2.12 | hudi-spark3.1-bundle_2.12 | `-Pspark3.1` | For Spark 3.1.x |
|
| `-Dspark2.4 -Dscala-2.12` | hudi-spark2.4-bundle_2.12 | For Spark 2.4.4 and Scala 2.12 |
|
||||||
| Spark 3.2, Scala 2.12 | hudi-spark3.2-bundle_2.12 | `-Pspark3.2` | For Spark 3.2.x |
|
| `-Dspark3.1 -Dscala-2.12` | hudi-spark3.1-bundle_2.12 | For Spark 3.1.x and Scala 2.12 |
|
||||||
| Spark 3, Scala 2.12 | hudi-spark3-bundle_2.12 | `-Pspark3` | This is the same as `Spark 3.2, Scala 2.12` |
|
| `-Dspark3.2 -Dscala-2.12` | hudi-spark3.2-bundle_2.12 | For Spark 3.2.x and Scala 2.12 |
|
||||||
| Spark, Scala 2.11 | hudi-spark-bundle_2.11 | Default | The default profile, supporting Spark 2.4.4 |
|
| `-Dspark3` | hudi-spark3-bundle_2.12 (legacy bundle name) | For Spark 3.2.x and Scala 2.12 |
|
||||||
| Spark, Scala 2.12 | hudi-spark-bundle_2.12 | `-Pscala-2.12` | The default profile (for Spark 2.4.4) with Scala 2.12 |
|
| `-Dscala-2.12` | hudi-spark-bundle_2.12 (legacy bundle name) | For Spark 2.4.4 and Scala 2.12 |
|
||||||
|
|
||||||
For example,
|
For example,
|
||||||
```
|
```
|
||||||
# Build against Spark 3.2.x (the default build shipped with the public Spark 3 bundle)
|
# Build against Spark 3.2.x
|
||||||
mvn clean package -DskipTests -Pspark3.2
|
mvn clean package -DskipTests -Dspark3.2 -Dscala-2.12
|
||||||
|
|
||||||
# Build against Spark 3.1.x
|
# Build against Spark 3.1.x
|
||||||
mvn clean package -DskipTests -Pspark3.1
|
mvn clean package -DskipTests -Dspark3.1 -Dscala-2.12
|
||||||
|
|
||||||
# Build against Spark 2.4.4 and Scala 2.12
|
# Build against Spark 2.4.4 and Scala 2.12
|
||||||
mvn clean package -DskipTests -Pspark2.4,scala-2.12
|
mvn clean package -DskipTests -Dspark2.4 -Dscala-2.12
|
||||||
```
|
```
|
||||||
|
|
||||||
### What about "spark-avro" module?
|
#### What about "spark-avro" module?
|
||||||
|
|
||||||
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`
|
Starting from versions 0.11, Hudi no longer requires `spark-avro` to be specified using `--packages`
|
||||||
|
|
||||||
|
|||||||
@@ -112,7 +112,6 @@
|
|||||||
<include>org.apache.httpcomponents:httpcore</include>
|
<include>org.apache.httpcomponents:httpcore</include>
|
||||||
<include>org.apache.httpcomponents:fluent-hc</include>
|
<include>org.apache.httpcomponents:fluent-hc</include>
|
||||||
<include>org.antlr:stringtemplate</include>
|
<include>org.antlr:stringtemplate</include>
|
||||||
<include>org.apache.parquet:parquet-avro</include>
|
|
||||||
|
|
||||||
<include>com.github.davidmoten:guava-mini</include>
|
<include>com.github.davidmoten:guava-mini</include>
|
||||||
<include>com.github.davidmoten:hilbert-curve</include>
|
<include>com.github.davidmoten:hilbert-curve</include>
|
||||||
@@ -394,13 +393,6 @@
|
|||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Parquet -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.parquet</groupId>
|
|
||||||
<artifactId>parquet-avro</artifactId>
|
|
||||||
<scope>compile</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<!-- Hive -->
|
<!-- Hive -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
|||||||
6
pom.xml
6
pom.xml
@@ -1570,8 +1570,6 @@
|
|||||||
<properties>
|
<properties>
|
||||||
<scala.version>${scala12.version}</scala.version>
|
<scala.version>${scala12.version}</scala.version>
|
||||||
<scala.binary.version>2.12</scala.binary.version>
|
<scala.binary.version>2.12</scala.binary.version>
|
||||||
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
|
||||||
<skipITs>true</skipITs>
|
|
||||||
</properties>
|
</properties>
|
||||||
<activation>
|
<activation>
|
||||||
<property>
|
<property>
|
||||||
@@ -1613,6 +1611,9 @@
|
|||||||
<module>hudi-spark-datasource/hudi-spark2</module>
|
<module>hudi-spark-datasource/hudi-spark2</module>
|
||||||
<module>hudi-spark-datasource/hudi-spark2-common</module>
|
<module>hudi-spark-datasource/hudi-spark2-common</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
<properties>
|
||||||
|
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
||||||
|
</properties>
|
||||||
<activation>
|
<activation>
|
||||||
<activeByDefault>true</activeByDefault>
|
<activeByDefault>true</activeByDefault>
|
||||||
<property>
|
<property>
|
||||||
@@ -1631,6 +1632,7 @@
|
|||||||
</modules>
|
</modules>
|
||||||
<properties>
|
<properties>
|
||||||
<sparkbundle.version>2.4</sparkbundle.version>
|
<sparkbundle.version>2.4</sparkbundle.version>
|
||||||
|
<skip.hudi-spark3.unit.tests>true</skip.hudi-spark3.unit.tests>
|
||||||
</properties>
|
</properties>
|
||||||
<activation>
|
<activation>
|
||||||
<property>
|
<property>
|
||||||
|
|||||||
Reference in New Issue
Block a user