[HUDI-4250][HUDI-4202] Optimize performance of Column Stats Index reading in Data Skipping (#5746)
We provide an alternative way of fetching Column Stats Index within the reading process to avoid the penalty of a more heavy-weight execution scheduled through a Spark engine.
This commit is contained in:
@@ -22,9 +22,13 @@ import org.apache.avro.Schema
|
||||
import org.apache.hudi.Spark2RowSerDe
|
||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||
import org.apache.spark.sql.avro._
|
||||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
||||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||
import org.apache.spark.sql.catalyst.expressions.{Expression, InterpretedPredicate}
|
||||
import org.apache.spark.sql.catalyst.parser.ParserInterface
|
||||
import org.apache.spark.sql.catalyst.plans.JoinType
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, Join, LogicalPlan}
|
||||
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
||||
import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, Spark24HoodieParquetFileFormat}
|
||||
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, Spark2ParsePartitionUtil, SparkParsePartitionUtil}
|
||||
import org.apache.spark.sql.hudi.SparkAdapter
|
||||
@@ -32,6 +36,8 @@ import org.apache.spark.sql.hudi.parser.HoodieSpark2ExtendedSqlParser
|
||||
import org.apache.spark.sql.internal.SQLConf
|
||||
import org.apache.spark.sql.types.DataType
|
||||
import org.apache.spark.sql.{HoodieCatalystExpressionUtils, HoodieCatalystPlansUtils, HoodieSpark2CatalystExpressionUtils, HoodieSpark2CatalystPlanUtils, Row, SparkSession}
|
||||
import org.apache.spark.storage.StorageLevel
|
||||
import org.apache.spark.storage.StorageLevel._
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
@@ -115,4 +121,20 @@ class Spark2Adapter extends SparkAdapter {
|
||||
override def createInterpretedPredicate(e: Expression): InterpretedPredicate = {
|
||||
InterpretedPredicate.create(e)
|
||||
}
|
||||
|
||||
override def convertStorageLevelToString(level: StorageLevel): String = level match {
|
||||
case NONE => "NONE"
|
||||
case DISK_ONLY => "DISK_ONLY"
|
||||
case DISK_ONLY_2 => "DISK_ONLY_2"
|
||||
case MEMORY_ONLY => "MEMORY_ONLY"
|
||||
case MEMORY_ONLY_2 => "MEMORY_ONLY_2"
|
||||
case MEMORY_ONLY_SER => "MEMORY_ONLY_SER"
|
||||
case MEMORY_ONLY_SER_2 => "MEMORY_ONLY_SER_2"
|
||||
case MEMORY_AND_DISK => "MEMORY_AND_DISK"
|
||||
case MEMORY_AND_DISK_2 => "MEMORY_AND_DISK_2"
|
||||
case MEMORY_AND_DISK_SER => "MEMORY_AND_DISK_SER"
|
||||
case MEMORY_AND_DISK_SER_2 => "MEMORY_AND_DISK_SER_2"
|
||||
case OFF_HEAP => "OFF_HEAP"
|
||||
case _ => throw new IllegalArgumentException(s"Invalid StorageLevel: $level")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user