1
0

[HUDI-3719] High performance costs of AvroSerizlizer in DataSource wr… (#5137)

* [HUDI-3719] High performance costs of AvroSerizlizer in DataSource writing

* add benchmark framework which modify from spark
add avroSerDerBenchmark
This commit is contained in:
xiarixiaoyao
2022-03-28 02:01:43 +08:00
committed by GitHub
parent 85c4a6cfc1
commit 9da2dd416e
5 changed files with 574 additions and 3 deletions

View File

@@ -62,10 +62,12 @@ object AvroConversionUtils {
* @param rootCatalystType Catalyst [[StructType]] to be transformed into
* @return converter accepting Avro payload and transforming it into a Catalyst one (in the form of [[InternalRow]])
*/
def createAvroToInternalRowConverter(rootAvroType: Schema, rootCatalystType: StructType): GenericRecord => Option[InternalRow] =
record => sparkAdapter.createAvroDeserializer(rootAvroType, rootCatalystType)
def createAvroToInternalRowConverter(rootAvroType: Schema, rootCatalystType: StructType): GenericRecord => Option[InternalRow] = {
val deserializer = sparkAdapter.createAvroDeserializer(rootAvroType, rootCatalystType)
record => deserializer
.deserialize(record)
.map(_.asInstanceOf[InternalRow])
}
/**
* Creates converter to transform Catalyst payload into Avro one
@@ -76,7 +78,8 @@ object AvroConversionUtils {
* @return converter accepting Catalyst payload (in the form of [[InternalRow]]) and transforming it into an Avro one
*/
def createInternalRowToAvroConverter(rootCatalystType: StructType, rootAvroType: Schema, nullable: Boolean): InternalRow => GenericRecord = {
row => sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
val serializer = sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
row => serializer
.serialize(row)
.asInstanceOf[GenericRecord]
}