[HUDI-1296] Support Metadata Table in Spark Datasource (#4789)
* Bootstrapping initial support for Metadata Table in Spark Datasource - Consolidated Avro/Row conversion utilities to center around Spark's AvroDeserializer ; removed duplication - Bootstrapped HoodieBaseRelation - Updated HoodieMergeOnReadRDD to be able to handle Metadata Table - Modified MOR relations to be able to read different Base File formats (Parquet, HFile)
This commit is contained in:
@@ -17,10 +17,11 @@
|
||||
|
||||
package org.apache.spark.sql.adapter
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.hudi.Spark3RowSerDe
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.hudi.spark3.internal.ReflectUtil
|
||||
import org.apache.spark.sql.{Row, SparkSession}
|
||||
import org.apache.spark.sql.avro.{HoodieAvroDeserializerTrait, HoodieAvroSerializerTrait, Spark3HoodieAvroDeserializer, HoodieAvroSerializer}
|
||||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
||||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||
import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
|
||||
@@ -30,18 +31,24 @@ import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, J
|
||||
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
||||
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
|
||||
import org.apache.spark.sql.connector.catalog.Table
|
||||
import org.apache.spark.sql.execution.datasources._
|
||||
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, LogicalRelation, PartitionedFile, Spark3ParsePartitionUtil, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.hudi.SparkAdapter
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
|
||||
import scala.collection.JavaConverters.mapAsScalaMapConverter
|
||||
import org.apache.spark.sql.types.DataType
|
||||
import org.apache.spark.sql.{Row, SparkSession}
|
||||
|
||||
/**
|
||||
* The adapter for spark3.
|
||||
*/
|
||||
class Spark3Adapter extends SparkAdapter {
|
||||
|
||||
def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializerTrait =
|
||||
new HoodieAvroSerializer(rootCatalystType, rootAvroType, nullable)
|
||||
|
||||
def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializerTrait =
|
||||
new Spark3HoodieAvroDeserializer(rootAvroType, rootCatalystType)
|
||||
|
||||
override def createSparkRowSerDe(encoder: ExpressionEncoder[Row]): SparkRowSerDe = {
|
||||
new Spark3RowSerDe(encoder)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.avro
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.hudi.HoodieSparkUtils
|
||||
import org.apache.spark.sql.types.DataType
|
||||
|
||||
class Spark3HoodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
|
||||
extends HoodieAvroDeserializerTrait {
|
||||
|
||||
// SPARK-34404: As of Spark3.2, there is no AvroDeserializer's constructor with Schema and DataType arguments.
|
||||
// So use the reflection to get AvroDeserializer instance.
|
||||
private val avroDeserializer = if (HoodieSparkUtils.isSpark3_2) {
|
||||
val constructor = classOf[AvroDeserializer].getConstructor(classOf[Schema], classOf[DataType], classOf[String])
|
||||
constructor.newInstance(rootAvroType, rootCatalystType, "EXCEPTION")
|
||||
} else {
|
||||
val constructor = classOf[AvroDeserializer].getConstructor(classOf[Schema], classOf[DataType])
|
||||
constructor.newInstance(rootAvroType, rootCatalystType)
|
||||
}
|
||||
|
||||
def doDeserialize(data: Any): Any = avroDeserializer.deserialize(data)
|
||||
}
|
||||
Reference in New Issue
Block a user