[HUDI-3254] Introduce HoodieCatalog to manage tables for Spark Datasource V2 (#4611)
This commit is contained in:
@@ -53,8 +53,18 @@ object HoodieSparkUtils extends SparkAdapterSupport {
|
||||
|
||||
def isSpark3_0: Boolean = SPARK_VERSION.startsWith("3.0")
|
||||
|
||||
def isSpark3_1: Boolean = SPARK_VERSION.startsWith("3.1")
|
||||
|
||||
def isSpark3_2: Boolean = SPARK_VERSION.startsWith("3.2")
|
||||
|
||||
def beforeSpark3_2(): Boolean = {
|
||||
if (isSpark2 || isSpark3_0 || isSpark3_1) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
def getMetaSchema: StructType = {
|
||||
StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
|
||||
StructField(col, StringType, nullable = true)
|
||||
|
||||
@@ -18,19 +18,22 @@
|
||||
|
||||
package org.apache.spark.sql.hudi
|
||||
|
||||
import org.apache.hudi.HoodieSparkUtils.sparkAdapter
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
|
||||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
||||
import org.apache.spark.sql.catalyst.catalog.CatalogTable
|
||||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||
import org.apache.spark.sql.catalyst.expressions.Expression
|
||||
import org.apache.spark.sql.catalyst.parser.ParserInterface
|
||||
import org.apache.spark.sql.catalyst.plans.JoinType
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, SubqueryAlias}
|
||||
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, LogicalRelation, PartitionedFile, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.{Row, SparkSession}
|
||||
|
||||
import java.util.Locale
|
||||
|
||||
/**
|
||||
* An interface to adapter the difference between spark2 and spark3
|
||||
* in some spark related class.
|
||||
@@ -99,4 +102,35 @@ trait SparkAdapter extends Serializable {
|
||||
*/
|
||||
def getFilePartitions(sparkSession: SparkSession, partitionedFiles: Seq[PartitionedFile],
|
||||
maxSplitBytes: Long): Seq[FilePartition]
|
||||
|
||||
def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
|
||||
tripAlias(table) match {
|
||||
case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
|
||||
case relation: UnresolvedRelation =>
|
||||
isHoodieTable(toTableIdentifier(relation), spark)
|
||||
case _=> false
|
||||
}
|
||||
}
|
||||
|
||||
def isHoodieTable(map: java.util.Map[String, String]): Boolean = {
|
||||
map.getOrDefault("provider", "").equals("hudi")
|
||||
}
|
||||
|
||||
def isHoodieTable(table: CatalogTable): Boolean = {
|
||||
table.provider.map(_.toLowerCase(Locale.ROOT)).orNull == "hudi"
|
||||
}
|
||||
|
||||
def isHoodieTable(tableId: TableIdentifier, spark: SparkSession): Boolean = {
|
||||
val table = spark.sessionState.catalog.getTableMetadata(tableId)
|
||||
isHoodieTable(table)
|
||||
}
|
||||
|
||||
def tripAlias(plan: LogicalPlan): LogicalPlan = {
|
||||
plan match {
|
||||
case SubqueryAlias(_, relation: LogicalPlan) =>
|
||||
tripAlias(relation)
|
||||
case other =>
|
||||
other
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user