1
0

[HUDI-4217] improve repeat init object in ExpressionPayload (#5825)

This commit is contained in:
KnightChess
2022-06-15 20:21:28 +08:00
committed by GitHub
parent c291b05699
commit 2bf0a1906d
2 changed files with 65 additions and 21 deletions

View File

@@ -17,27 +17,59 @@
package org.apache.spark.sql.hudi.command.payload
import com.google.common.cache.CacheBuilder
import org.apache.avro.Schema
import org.apache.avro.generic.IndexedRecord
import org.apache.hudi.{AvroConversionUtils, SparkAdapterSupport}
import org.apache.hudi.HoodieSparkUtils.sparkAdapter
import org.apache.hudi.AvroConversionUtils
import org.apache.spark.sql.avro.HoodieAvroDeserializer
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.hudi.command.payload.SqlTypedRecord.{getAvroDeserializer, getSqlType}
import org.apache.spark.sql.types.StructType
import java.util.concurrent.Callable
/**
* A sql typed record which will convert the avro field to sql typed value.
*/
class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord with SparkAdapterSupport {
class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord {
private lazy val sqlType = AvroConversionUtils.convertAvroSchemaToStructType(getSchema)
private lazy val avroDeserializer = sparkAdapter.createAvroDeserializer(record.getSchema, sqlType)
private lazy val sqlRow = avroDeserializer.deserialize(record).get.asInstanceOf[InternalRow]
private lazy val sqlRow = getAvroDeserializer(getSchema).deserialize(record).get.asInstanceOf[InternalRow]
override def put(i: Int, v: Any): Unit = {
record.put(i, v)
}
override def get(i: Int): AnyRef = {
sqlRow.get(i, sqlType(i).dataType)
sqlRow.get(i, getSqlType(getSchema)(i).dataType)
}
override def getSchema: Schema = record.getSchema
}
object SqlTypedRecord {
private val sqlTypeCache = CacheBuilder.newBuilder().build[Schema, StructType]()
private val avroDeserializerCache = CacheBuilder.newBuilder().build[Schema, HoodieAvroDeserializer]()
def getSqlType(schema: Schema): StructType = {
sqlTypeCache.get(schema, new Callable[StructType] {
override def call(): StructType = {
val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
sqlTypeCache.put(schema, structType)
structType
}
})
}
def getAvroDeserializer(schema: Schema): HoodieAvroDeserializer= {
avroDeserializerCache.get(schema, new Callable[HoodieAvroDeserializer] {
override def call(): HoodieAvroDeserializer = {
val deserializer = sparkAdapter.createAvroDeserializer(schema, getSqlType(schema))
avroDeserializerCache.put(schema, deserializer)
deserializer
}
})
}
}