[HUDI-3423] upgrade spark to 3.2.1 (#4815)
This commit is contained in:
@@ -83,7 +83,7 @@ mvn clean package -DskipTests -Dscala-2.12
|
||||
The default Spark version supported is 2.4.4. To build for different Spark 3 versions, use the corresponding profile
|
||||
|
||||
```
|
||||
# Build against Spark 3.2.0 (the default build shipped with the public Spark 3 bundle)
|
||||
# Build against Spark 3.2.1 (the default build shipped with the public Spark 3 bundle)
|
||||
mvn clean package -DskipTests -Dspark3
|
||||
|
||||
# Build against Spark 3.1.2
|
||||
|
||||
@@ -57,13 +57,7 @@ object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
|
||||
def isSpark3_2: Boolean = SPARK_VERSION.startsWith("3.2")
|
||||
|
||||
def beforeSpark3_2(): Boolean = {
|
||||
if (isSpark2 || isSpark3_0 || isSpark3_1) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
def gteqSpark3_2: Boolean = SPARK_VERSION > "3.2"
|
||||
|
||||
def getMetaSchema: StructType = {
|
||||
StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
|
||||
|
||||
@@ -50,7 +50,7 @@ object HoodieAnalysis {
|
||||
) ++ extraPostHocResolutionRules()
|
||||
|
||||
def extraResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] = {
|
||||
if (!HoodieSparkUtils.beforeSpark3_2()) {
|
||||
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||
val spark3AnalysisClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3Analysis"
|
||||
val spark3Analysis: SparkSession => Rule[LogicalPlan] =
|
||||
session => ReflectionUtils.loadClass(spark3AnalysisClass, session).asInstanceOf[Rule[LogicalPlan]]
|
||||
@@ -66,7 +66,7 @@ object HoodieAnalysis {
|
||||
}
|
||||
|
||||
def extraPostHocResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] =
|
||||
if (!HoodieSparkUtils.beforeSpark3_2()) {
|
||||
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||
val spark3PostHocResolutionClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3PostAnalysisRule"
|
||||
val spark3PostHocResolution: SparkSession => Rule[LogicalPlan] =
|
||||
session => ReflectionUtils.loadClass(spark3PostHocResolutionClass, session).asInstanceOf[Rule[LogicalPlan]]
|
||||
|
||||
@@ -95,7 +95,7 @@ class TestHoodieSparkSqlWriter {
|
||||
*/
|
||||
def initSparkContext(): Unit = {
|
||||
val sparkConf = new SparkConf()
|
||||
if (!HoodieSparkUtils.beforeSpark3_2()) {
|
||||
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||
sparkConf.set("spark.sql.catalog.spark_catalog",
|
||||
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
|
||||
}
|
||||
|
||||
@@ -191,7 +191,7 @@ class TestHoodieSparkUtils {
|
||||
val genRecRDD3 = HoodieSparkUtils.createRdd(df1, "test_struct_name", "test_namespace", true,
|
||||
org.apache.hudi.common.util.Option.of(schema2))
|
||||
assert(genRecRDD3.collect()(0).getSchema.equals(schema2))
|
||||
genRecRDD3.foreach(entry => assertNull(entry.get("nonNullableInnerStruct2")))
|
||||
genRecRDD3.foreach(entry => assertNull(entry.get("nullableInnerStruct2")))
|
||||
|
||||
val innerStruct3 = new StructType().add("innerKey","string",false).add("innerValue", "long", true)
|
||||
.add("new_nested_col","string",true)
|
||||
|
||||
@@ -30,8 +30,7 @@ import org.apache.hudi.index.HoodieIndex.IndexType
|
||||
import org.apache.hudi.keygen.NonpartitionedKeyGenerator
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
|
||||
import org.apache.hudi.testutils.{DataSourceTestUtils, HoodieClientTestBase}
|
||||
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
|
||||
|
||||
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieSparkUtils}
|
||||
import org.apache.log4j.LogManager
|
||||
|
||||
import org.apache.spark.sql._
|
||||
@@ -557,7 +556,12 @@ class TestMORDataSource extends HoodieClientTestBase {
|
||||
assertEquals(sampleRow.getLong(1), sampleRow.get(1))
|
||||
assertEquals(sampleRow.getString(2), sampleRow.get(2))
|
||||
assertEquals(sampleRow.getSeq(3), sampleRow.get(3))
|
||||
assertEquals(sampleRow.getStruct(4), sampleRow.get(4))
|
||||
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||
// Since Spark3.2, the `nation` column is parsed as String, not Struct.
|
||||
assertEquals(sampleRow.getString(4), sampleRow.get(4))
|
||||
} else {
|
||||
assertEquals(sampleRow.getStruct(4), sampleRow.get(4))
|
||||
}
|
||||
}
|
||||
|
||||
def verifyShow(df: DataFrame): Unit = {
|
||||
|
||||
@@ -58,7 +58,7 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
||||
|
||||
def sparkConf(): SparkConf = {
|
||||
val sparkConf = new SparkConf()
|
||||
if (!HoodieSparkUtils.beforeSpark3_2()) {
|
||||
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||
sparkConf.set("spark.sql.catalog.spark_catalog",
|
||||
"org.apache.spark.sql.hudi.catalog.HoodieCatalog")
|
||||
}
|
||||
|
||||
5
pom.xml
5
pom.xml
@@ -119,7 +119,7 @@
|
||||
<sparkbundle.version></sparkbundle.version>
|
||||
<flink.version>1.14.3</flink.version>
|
||||
<spark2.version>2.4.4</spark2.version>
|
||||
<spark3.version>3.2.0</spark3.version>
|
||||
<spark3.version>3.2.1</spark3.version>
|
||||
<hudi.spark.module>hudi-spark2</hudi.spark.module>
|
||||
<hudi.spark.common.module>hudi-spark2-common</hudi.spark.common.module>
|
||||
<avro.version>1.8.2</avro.version>
|
||||
@@ -1592,8 +1592,9 @@
|
||||
<hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
|
||||
<scalatest.version>3.1.0</scalatest.version>
|
||||
<kafka.version>2.4.1</kafka.version>
|
||||
<parquet.version>1.12.1</parquet.version>
|
||||
<parquet.version>1.12.2</parquet.version>
|
||||
<avro.version>1.10.2</avro.version>
|
||||
<orc.version>1.6.12</orc.version>
|
||||
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
|
||||
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
|
||||
<fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>
|
||||
|
||||
Reference in New Issue
Block a user