[HUDI-3204] Fixing partition-values being derived from partition-path instead of source columns (#5364)
- Scaffolded `Spark24HoodieParquetFileFormat` extending `ParquetFileFormat` and overriding the behavior of adding partition columns to every row - Amended `SparkAdapter`s `createHoodieParquetFileFormat` API to be able to configure whether to append partition values or not - Fallback to append partition values in cases when the source columns are not persisted in data-file - Fixing HoodieBaseRelation incorrectly handling mandatory columns
This commit is contained in:
@@ -18,12 +18,30 @@
|
||||
package org.apache.spark.sql
|
||||
|
||||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction}
|
||||
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, SubqueryExpression}
|
||||
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
|
||||
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, SubqueryExpression, UnsafeProjection}
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan}
|
||||
import org.apache.spark.sql.types.StructType
|
||||
|
||||
trait HoodieCatalystExpressionUtils {
|
||||
|
||||
/**
|
||||
* Generates instance of [[UnsafeProjection]] projecting row of one [[StructType]] into another [[StructType]]
|
||||
*
|
||||
* NOTE: No safety checks are executed to validate that this projection is actually feasible,
|
||||
* it's up to the caller to make sure that such projection is possible.
|
||||
*
|
||||
* NOTE: Projection of the row from [[StructType]] A to [[StructType]] B is only possible, if
|
||||
* B is a subset of A
|
||||
*/
|
||||
def generateUnsafeProjection(from: StructType, to: StructType): UnsafeProjection = {
|
||||
val attrs = from.toAttributes
|
||||
val attrsMap = attrs.map(attr => (attr.name, attr)).toMap
|
||||
val targetExprs = to.fields.map(f => attrsMap(f.name))
|
||||
|
||||
GenerateUnsafeProjection.generate(targetExprs, attrs)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses and resolves expression against the attributes of the given table schema.
|
||||
*
|
||||
|
||||
@@ -177,7 +177,7 @@ trait SparkAdapter extends Serializable {
|
||||
def createResolveHudiAlterTableCommand(sparkSession: SparkSession): Rule[LogicalPlan]
|
||||
|
||||
/**
|
||||
* Create hoodie parquet file format.
|
||||
* Create instance of [[ParquetFileFormat]]
|
||||
*/
|
||||
def createHoodieParquetFileFormat(): Option[ParquetFileFormat]
|
||||
def createHoodieParquetFileFormat(appendPartitionValues: Boolean): Option[ParquetFileFormat]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user