[HUDI-3254] Introduce HoodieCatalog to manage tables for Spark Datasource V2 (#4611)
This commit is contained in:
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql
|
||||
|
||||
import org.apache.hudi.exception.HoodieException
|
||||
import org.apache.spark.sql.catalyst.catalog.BucketSpec
|
||||
import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
|
||||
|
||||
import scala.collection.mutable
|
||||
|
||||
object HoodieSpark3SqlUtils {
|
||||
def convertTransforms(partitions: Seq[Transform]): (Seq[String], Option[BucketSpec]) = {
|
||||
val identityCols = new mutable.ArrayBuffer[String]
|
||||
var bucketSpec = Option.empty[BucketSpec]
|
||||
|
||||
partitions.map {
|
||||
case IdentityTransform(FieldReference(Seq(col))) =>
|
||||
identityCols += col
|
||||
|
||||
|
||||
case BucketTransform(numBuckets, FieldReference(Seq(col))) =>
|
||||
bucketSpec = Some(BucketSpec(numBuckets, col :: Nil, Nil))
|
||||
|
||||
case _ =>
|
||||
throw new HoodieException(s"Partitioning by expressions is not supported.")
|
||||
}
|
||||
|
||||
(identityCols, bucketSpec)
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,6 @@ package org.apache.spark.sql.adapter
|
||||
import org.apache.hudi.Spark3RowSerDe
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.hudi.spark3.internal.ReflectUtil
|
||||
|
||||
import org.apache.spark.sql.{Row, SparkSession}
|
||||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
||||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||
@@ -30,11 +29,14 @@ import org.apache.spark.sql.catalyst.plans.JoinType
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, JoinHint, LogicalPlan}
|
||||
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
||||
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
|
||||
import org.apache.spark.sql.execution.datasources.{Spark3ParsePartitionUtil, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
|
||||
import org.apache.spark.sql.connector.catalog.Table
|
||||
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, LogicalRelation, PartitionedFile, Spark3ParsePartitionUtil, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.hudi.SparkAdapter
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
|
||||
import scala.collection.JavaConverters.mapAsScalaMapConverter
|
||||
|
||||
/**
|
||||
* The adapter for spark3.
|
||||
*/
|
||||
@@ -104,4 +106,14 @@ class Spark3Adapter extends SparkAdapter {
|
||||
maxSplitBytes: Long): Seq[FilePartition] = {
|
||||
FilePartition.getFilePartitions(sparkSession, partitionedFiles, maxSplitBytes)
|
||||
}
|
||||
|
||||
override def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
|
||||
tripAlias(table) match {
|
||||
case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
|
||||
case relation: UnresolvedRelation =>
|
||||
isHoodieTable(toTableIdentifier(relation), spark)
|
||||
case DataSourceV2Relation(table: Table, _, _, _, _) => isHoodieTable(table.properties())
|
||||
case _=> false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user