[HUDI-3254] Introduce HoodieCatalog to manage tables for Spark Datasource V2 (#4611)
This commit is contained in:
@@ -195,7 +195,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
|
||||
if (doFullTableScan) {
|
||||
val hudiDF = sqlContext.read
|
||||
.format("hudi")
|
||||
.format("hudi_v1")
|
||||
.schema(usedSchema)
|
||||
.load(basePath)
|
||||
.filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, //Notice the > in place of >= because we are working with optParam instead of first commit > optParam
|
||||
@@ -208,7 +208,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
} else {
|
||||
if (metaBootstrapFileIdToFullPath.nonEmpty) {
|
||||
df = sqlContext.sparkSession.read
|
||||
.format("hudi")
|
||||
.format("hudi_v1")
|
||||
.schema(usedSchema)
|
||||
.option(DataSourceReadOptions.READ_PATHS.key, filteredMetaBootstrapFullPaths.mkString(","))
|
||||
.load()
|
||||
|
||||
@@ -32,11 +32,11 @@ import org.apache.spark.api.java.JavaSparkContext
|
||||
import org.apache.spark.sql.catalyst.TableIdentifier
|
||||
import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedRelation}
|
||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
|
||||
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression}
|
||||
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal}
|
||||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
|
||||
import org.apache.spark.sql.execution.datasources.LogicalRelation
|
||||
import org.apache.spark.sql.internal.StaticSQLConf
|
||||
import org.apache.spark.sql.types.{StringType, StructField, StructType}
|
||||
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
|
||||
import org.apache.spark.sql.types.{DataType, NullType, StringType, StructField, StructType}
|
||||
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
|
||||
|
||||
import java.net.URI
|
||||
@@ -54,24 +54,6 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
|
||||
override def get() = new SimpleDateFormat("yyyy-MM-dd")
|
||||
})
|
||||
|
||||
def isHoodieTable(table: CatalogTable): Boolean = {
|
||||
table.provider.map(_.toLowerCase(Locale.ROOT)).orNull == "hudi"
|
||||
}
|
||||
|
||||
def isHoodieTable(tableId: TableIdentifier, spark: SparkSession): Boolean = {
|
||||
val table = spark.sessionState.catalog.getTableMetadata(tableId)
|
||||
isHoodieTable(table)
|
||||
}
|
||||
|
||||
def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
|
||||
tripAlias(table) match {
|
||||
case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
|
||||
case relation: UnresolvedRelation =>
|
||||
isHoodieTable(sparkAdapter.toTableIdentifier(relation), spark)
|
||||
case _=> false
|
||||
}
|
||||
}
|
||||
|
||||
def getTableIdentifier(table: LogicalPlan): TableIdentifier = {
|
||||
table match {
|
||||
case SubqueryAlias(name, _) => sparkAdapter.toTableIdentifier(name)
|
||||
@@ -200,14 +182,29 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
|
||||
getTableLocation(table, spark)
|
||||
}
|
||||
|
||||
def getTableLocation(properties: Map[String, String], identifier: TableIdentifier, sparkSession: SparkSession): String = {
|
||||
val location: Option[String] = Some(properties.getOrElse("location", ""))
|
||||
val isManaged = location.isEmpty || location.get.isEmpty
|
||||
val uri = if (isManaged) {
|
||||
Some(sparkSession.sessionState.catalog.defaultTablePath(identifier))
|
||||
} else {
|
||||
Some(new Path(location.get).toUri)
|
||||
}
|
||||
getTableLocation(uri, identifier, sparkSession)
|
||||
}
|
||||
|
||||
def getTableLocation(table: CatalogTable, sparkSession: SparkSession): String = {
|
||||
val uri = table.storage.locationUri.orElse {
|
||||
Some(sparkSession.sessionState.catalog.defaultTablePath(table.identifier))
|
||||
}
|
||||
getTableLocation(uri, table.identifier, sparkSession)
|
||||
}
|
||||
|
||||
def getTableLocation(uri: Option[URI], identifier: TableIdentifier, sparkSession: SparkSession): String = {
|
||||
val conf = sparkSession.sessionState.newHadoopConf()
|
||||
uri.map(makePathQualified(_, conf))
|
||||
.map(removePlaceHolder)
|
||||
.getOrElse(throw new IllegalArgumentException(s"Missing location for ${table.identifier}"))
|
||||
.getOrElse(throw new IllegalArgumentException(s"Missing location for ${identifier}"))
|
||||
}
|
||||
|
||||
private def removePlaceHolder(path: String): String = {
|
||||
@@ -316,4 +313,12 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
|
||||
def columnEqual(field: StructField, other: StructField, resolver: Resolver): Boolean = {
|
||||
resolver(field.name, other.name) && field.dataType == other.dataType
|
||||
}
|
||||
|
||||
def castIfNeeded(child: Expression, dataType: DataType, conf: SQLConf): Expression = {
|
||||
child match {
|
||||
case Literal(nul, NullType) => Literal(nul, dataType)
|
||||
case _ => if (child.dataType != dataType)
|
||||
Cast(child, dataType, Option(conf.sessionLocalTimeZone)) else child
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,7 +57,8 @@ case class AlterHoodieTableAddColumnsCommand(
|
||||
s" table columns is: [${hoodieCatalogTable.tableSchemaWithoutMetaFields.fieldNames.mkString(",")}]")
|
||||
}
|
||||
// Get the new schema
|
||||
val newSqlSchema = StructType(tableSchema.fields ++ colsToAdd)
|
||||
val rearrangedSchema = hoodieCatalogTable.dataSchema ++ colsToAdd ++ hoodieCatalogTable.partitionSchema
|
||||
val newSqlSchema = StructType(rearrangedSchema)
|
||||
val (structName, nameSpace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tableId.table)
|
||||
val newSchema = AvroConversionUtils.convertStructTypeToAvroSchema(newSqlSchema, structName, nameSpace)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user