[HUDI-2811] Support Spark 3.2 (#4270)

2021-12-28 16:12:44 +08:00
parent 32505d5adb
commit 05942e018c
36 changed files with 596 additions and 167 deletions
--- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/Spark2ParsePartitionUtil.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/execution/datasources/Spark2ParsePartitionUtil.scala
@@ -16,18 +16,26 @@
 */

 package org.apache.spark.sql.execution.datasources
+
 import java.util.TimeZone

 import org.apache.hadoop.fs.Path
-import org.apache.spark.sql.execution.datasources.PartitioningUtils.PartitionValues
-import org.apache.spark.sql.types.DataType
+
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.catalyst.InternalRow

 class Spark2ParsePartitionUtil extends SparkParsePartitionUtil {
-  override def parsePartition(path: Path, typeInference: Boolean,
-                              basePaths: Set[Path],
-                              userSpecifiedDataTypes: Map[String, DataType],
-                              timeZone: TimeZone): Option[PartitionValues] = {
-    PartitioningUtils.parsePartition(path, typeInference,
-      basePaths, userSpecifiedDataTypes, timeZone)._1
+
+  override def parsePartition(
+      path: Path,
+      typeInference: Boolean,
+      basePaths: Set[Path],
+      userSpecifiedDataTypes: Map[String, DataType],
+      timeZone: TimeZone): InternalRow = {
+    val (partitionValues, _) = PartitioningUtils.parsePartition(path, typeInference,
+      basePaths, userSpecifiedDataTypes, timeZone)
+
+    partitionValues.map(_.literals.map(_.value)).map(InternalRow.fromSeq)
+      .getOrElse(InternalRow.empty)
  }
 }