1
0

[HUDI-3423] upgrade spark to 3.2.1 (#4815)

This commit is contained in:
Yann Byron
2022-02-22 08:52:21 +08:00
committed by GitHub
parent 801fdab55c
commit 0c950181aa
8 changed files with 17 additions and 18 deletions

View File

@@ -83,7 +83,7 @@ mvn clean package -DskipTests -Dscala-2.12
The default Spark version supported is 2.4.4. To build for different Spark 3 versions, use the corresponding profile
```
# Build against Spark 3.2.0 (the default build shipped with the public Spark 3 bundle)
# Build against Spark 3.2.1 (the default build shipped with the public Spark 3 bundle)
mvn clean package -DskipTests -Dspark3
# Build against Spark 3.1.2

View File

@@ -57,13 +57,7 @@ object HoodieSparkUtils extends SparkAdapterSupport {
def isSpark3_2: Boolean = SPARK_VERSION.startsWith("3.2")
def beforeSpark3_2(): Boolean = {
if (isSpark2 || isSpark3_0 || isSpark3_1) {
true
} else {
false
}
}
def gteqSpark3_2: Boolean = SPARK_VERSION > "3.2"
def getMetaSchema: StructType = {
StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {

View File

@@ -50,7 +50,7 @@ object HoodieAnalysis {
) ++ extraPostHocResolutionRules()
def extraResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] = {
if (!HoodieSparkUtils.beforeSpark3_2()) {
if (HoodieSparkUtils.gteqSpark3_2) {
val spark3AnalysisClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3Analysis"
val spark3Analysis: SparkSession => Rule[LogicalPlan] =
session => ReflectionUtils.loadClass(spark3AnalysisClass, session).asInstanceOf[Rule[LogicalPlan]]
@@ -66,7 +66,7 @@ object HoodieAnalysis {
}
def extraPostHocResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] =
if (!HoodieSparkUtils.beforeSpark3_2()) {
if (HoodieSparkUtils.gteqSpark3_2) {
val spark3PostHocResolutionClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3PostAnalysisRule"
val spark3PostHocResolution: SparkSession => Rule[LogicalPlan] =
session => ReflectionUtils.loadClass(spark3PostHocResolutionClass, session).asInstanceOf[Rule[LogicalPlan]]

View File

@@ -95,7 +95,7 @@ class TestHoodieSparkSqlWriter {
*/
def initSparkContext(): Unit = {
val sparkConf = new SparkConf()
if (!HoodieSparkUtils.beforeSpark3_2()) {
if (HoodieSparkUtils.gteqSpark3_2) {
sparkConf.set("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
}

View File

@@ -191,7 +191,7 @@ class TestHoodieSparkUtils {
val genRecRDD3 = HoodieSparkUtils.createRdd(df1, "test_struct_name", "test_namespace", true,
org.apache.hudi.common.util.Option.of(schema2))
assert(genRecRDD3.collect()(0).getSchema.equals(schema2))
genRecRDD3.foreach(entry => assertNull(entry.get("nonNullableInnerStruct2")))
genRecRDD3.foreach(entry => assertNull(entry.get("nullableInnerStruct2")))
val innerStruct3 = new StructType().add("innerKey","string",false).add("innerValue", "long", true)
.add("new_nested_col","string",true)

View File

@@ -30,8 +30,7 @@ import org.apache.hudi.index.HoodieIndex.IndexType
import org.apache.hudi.keygen.NonpartitionedKeyGenerator
import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
import org.apache.hudi.testutils.{DataSourceTestUtils, HoodieClientTestBase}
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieSparkUtils}
import org.apache.log4j.LogManager
import org.apache.spark.sql._
@@ -557,7 +556,12 @@ class TestMORDataSource extends HoodieClientTestBase {
assertEquals(sampleRow.getLong(1), sampleRow.get(1))
assertEquals(sampleRow.getString(2), sampleRow.get(2))
assertEquals(sampleRow.getSeq(3), sampleRow.get(3))
assertEquals(sampleRow.getStruct(4), sampleRow.get(4))
if (HoodieSparkUtils.gteqSpark3_2) {
// Since Spark3.2, the `nation` column is parsed as String, not Struct.
assertEquals(sampleRow.getString(4), sampleRow.get(4))
} else {
assertEquals(sampleRow.getStruct(4), sampleRow.get(4))
}
}
def verifyShow(df: DataFrame): Unit = {

View File

@@ -58,7 +58,7 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
def sparkConf(): SparkConf = {
val sparkConf = new SparkConf()
if (!HoodieSparkUtils.beforeSpark3_2()) {
if (HoodieSparkUtils.gteqSpark3_2) {
sparkConf.set("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
}

View File

@@ -119,7 +119,7 @@
<sparkbundle.version></sparkbundle.version>
<flink.version>1.14.3</flink.version>
<spark2.version>2.4.4</spark2.version>
<spark3.version>3.2.0</spark3.version>
<spark3.version>3.2.1</spark3.version>
<hudi.spark.module>hudi-spark2</hudi.spark.module>
<hudi.spark.common.module>hudi-spark2-common</hudi.spark.common.module>
<avro.version>1.8.2</avro.version>
@@ -1592,8 +1592,9 @@
<hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
<scalatest.version>3.1.0</scalatest.version>
<kafka.version>2.4.1</kafka.version>
<parquet.version>1.12.1</parquet.version>
<parquet.version>1.12.2</parquet.version>
<avro.version>1.10.2</avro.version>
<orc.version>1.6.12</orc.version>
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
<fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>