[HUDI-3221] Support querying a table as of a savepoint (#4720)
This commit is contained in:
@@ -27,7 +27,9 @@ import org.apache.spark.sql.hudi.SparkAdapter
|
|||||||
trait SparkAdapterSupport {
|
trait SparkAdapterSupport {
|
||||||
|
|
||||||
lazy val sparkAdapter: SparkAdapter = {
|
lazy val sparkAdapter: SparkAdapter = {
|
||||||
val adapterClass = if (HoodieSparkUtils.isSpark3) {
|
val adapterClass = if (HoodieSparkUtils.gteqSpark3_2) {
|
||||||
|
"org.apache.spark.sql.adapter.Spark3_2Adapter"
|
||||||
|
} else if (HoodieSparkUtils.isSpark3_0 || HoodieSparkUtils.isSpark3_1) {
|
||||||
"org.apache.spark.sql.adapter.Spark3Adapter"
|
"org.apache.spark.sql.adapter.Spark3Adapter"
|
||||||
} else {
|
} else {
|
||||||
"org.apache.spark.sql.adapter.Spark2Adapter"
|
"org.apache.spark.sql.adapter.Spark2Adapter"
|
||||||
|
|||||||
@@ -85,6 +85,16 @@ trait SparkAdapter extends Serializable {
|
|||||||
def getInsertIntoChildren(plan: LogicalPlan):
|
def getInsertIntoChildren(plan: LogicalPlan):
|
||||||
Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)]
|
Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)]
|
||||||
|
|
||||||
|
/**
|
||||||
|
* if the logical plan is a TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
def isRelationTimeTravel(plan: LogicalPlan): Boolean
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the member of the TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])]
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a Insert Into LogicalPlan.
|
* Create a Insert Into LogicalPlan.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -36,3 +36,21 @@ file that supports spark sql on spark 2.x version.
|
|||||||
has no class since hudi only supports spark 2.4.4 version, and it acts as the placeholder when packaging hudi-spark-bundle module.
|
has no class since hudi only supports spark 2.4.4 version, and it acts as the placeholder when packaging hudi-spark-bundle module.
|
||||||
* hudi-spark3-common is the module that contains the code that would be reused between spark3.x versions.
|
* hudi-spark3-common is the module that contains the code that would be reused between spark3.x versions.
|
||||||
* hudi-spark-common is the module that contains the code that would be reused between spark2.x and spark3.x versions.
|
* hudi-spark-common is the module that contains the code that would be reused between spark2.x and spark3.x versions.
|
||||||
|
|
||||||
|
## Description of Time Travel
|
||||||
|
* `HoodieSpark3_2ExtendedSqlAstBuilder` have comments in the spark3.2's code fork from `org.apache.spark.sql.catalyst.parser.AstBuilder`, and additional `withTimeTravel` method.
|
||||||
|
* `SqlBase.g4` have comments in the code forked from spark3.2's parser, and add SparkSQL Syntax `TIMESTAMP AS OF` and `VERSION AS OF`.
|
||||||
|
|
||||||
|
### Time Travel Support Spark Version:
|
||||||
|
|
||||||
|
| version | support |
|
||||||
|
| ------ | ------- |
|
||||||
|
| 2.4.x | No |
|
||||||
|
| 3.0.x | No |
|
||||||
|
| 3.1.2 | No |
|
||||||
|
| 3.2.0 | Yes |
|
||||||
|
|
||||||
|
### About upgrading Time Travel:
|
||||||
|
Spark3.3 support time travel syntax link [SPARK-37219](https://issues.apache.org/jira/browse/SPARK-37219).
|
||||||
|
Once Spark 3.3 released. The files in the following list will be removed:
|
||||||
|
* hudi-spark3's `HoodieSpark3_2ExtendedSqlAstBuilder.scala`、`HoodieSpark3_2ExtendedSqlParser.scala`、`TimeTravelRelation.scala`、`SqlBase.g4`、`HoodieSqlBase.g4`
|
||||||
|
|||||||
@@ -20,14 +20,15 @@ package org.apache.spark.sql.hudi.analysis
|
|||||||
import org.apache.hudi.DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL
|
import org.apache.hudi.DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL
|
||||||
import org.apache.hudi.common.model.HoodieRecord
|
import org.apache.hudi.common.model.HoodieRecord
|
||||||
import org.apache.hudi.common.util.ReflectionUtils
|
import org.apache.hudi.common.util.ReflectionUtils
|
||||||
import org.apache.hudi.{HoodieSparkUtils, SparkAdapterSupport}
|
import org.apache.hudi.{DataSourceReadOptions, HoodieSparkUtils, SparkAdapterSupport}
|
||||||
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar}
|
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
|
||||||
|
import org.apache.spark.sql.catalyst.catalog.{CatalogUtils, HoodieCatalogTable}
|
||||||
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, GenericInternalRow, Literal, NamedExpression}
|
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, GenericInternalRow, Literal, NamedExpression}
|
||||||
import org.apache.spark.sql.catalyst.plans.Inner
|
import org.apache.spark.sql.catalyst.plans.Inner
|
||||||
import org.apache.spark.sql.catalyst.plans.logical._
|
import org.apache.spark.sql.catalyst.plans.logical._
|
||||||
import org.apache.spark.sql.catalyst.rules.Rule
|
import org.apache.spark.sql.catalyst.rules.Rule
|
||||||
import org.apache.spark.sql.execution.command._
|
import org.apache.spark.sql.execution.command._
|
||||||
import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation}
|
import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, removeMetaFields}
|
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, removeMetaFields}
|
||||||
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
import org.apache.spark.sql.hudi.HoodieSqlUtils._
|
||||||
import org.apache.spark.sql.hudi.command._
|
import org.apache.spark.sql.hudi.command._
|
||||||
@@ -113,6 +114,7 @@ case class HoodieAnalysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
|
|||||||
case _ =>
|
case _ =>
|
||||||
l
|
l
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert to CreateHoodieTableAsSelectCommand
|
// Convert to CreateHoodieTableAsSelectCommand
|
||||||
case CreateTable(table, mode, Some(query))
|
case CreateTable(table, mode, Some(query))
|
||||||
if query.resolved && sparkAdapter.isHoodieTable(table) =>
|
if query.resolved && sparkAdapter.isHoodieTable(table) =>
|
||||||
@@ -396,6 +398,37 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
|
|||||||
l
|
l
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case l if sparkAdapter.isRelationTimeTravel(l) =>
|
||||||
|
val (plan: UnresolvedRelation, timestamp, version) =
|
||||||
|
sparkAdapter.getRelationTimeTravel(l).get
|
||||||
|
|
||||||
|
if (timestamp.isEmpty && version.nonEmpty) {
|
||||||
|
throw new AnalysisException(
|
||||||
|
"version expression is not supported for time travel")
|
||||||
|
}
|
||||||
|
|
||||||
|
val tableIdentifier = sparkAdapter.toTableIdentifier(plan)
|
||||||
|
if (sparkAdapter.isHoodieTable(tableIdentifier, sparkSession)) {
|
||||||
|
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
|
||||||
|
val table = hoodieCatalogTable.table
|
||||||
|
val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
|
||||||
|
val instantOption = Map(
|
||||||
|
DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key -> timestamp.get.toString())
|
||||||
|
val dataSource =
|
||||||
|
DataSource(
|
||||||
|
sparkSession,
|
||||||
|
userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
|
||||||
|
partitionColumns = table.partitionColumnNames,
|
||||||
|
bucketSpec = table.bucketSpec,
|
||||||
|
className = table.provider.get,
|
||||||
|
options = table.storage.properties ++ pathOption ++ instantOption,
|
||||||
|
catalogTable = Some(table))
|
||||||
|
|
||||||
|
LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
|
||||||
|
} else {
|
||||||
|
l
|
||||||
|
}
|
||||||
|
|
||||||
case p => p
|
case p => p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,241 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.HoodieSparkUtils
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient
|
||||||
|
|
||||||
|
class TestTimeTravelTable extends TestHoodieSqlBase {
|
||||||
|
test("Test Insert and Update Record with time travel") {
|
||||||
|
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||||
|
withTempDir { tmp =>
|
||||||
|
val tableName1 = generateTableName
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName1 (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long
|
||||||
|
|) using hudi
|
||||||
|
| tblproperties (
|
||||||
|
| type = 'cow',
|
||||||
|
| primaryKey = 'id',
|
||||||
|
| preCombineField = 'ts'
|
||||||
|
| )
|
||||||
|
| location '${tmp.getCanonicalPath}/$tableName1'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
|
||||||
|
|
||||||
|
val metaClient1 = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(s"${tmp.getCanonicalPath}/$tableName1")
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
|
||||||
|
.lastInstant().get().getTimestamp
|
||||||
|
|
||||||
|
spark.sql(s"insert into $tableName1 values(1, 'a2', 20, 2000)")
|
||||||
|
|
||||||
|
checkAnswer(s"select id, name, price, ts from $tableName1")(
|
||||||
|
Seq(1, "a2", 20.0, 2000)
|
||||||
|
)
|
||||||
|
|
||||||
|
// time travel from instant1
|
||||||
|
checkAnswer(
|
||||||
|
s"select id, name, price, ts from $tableName1 TIMESTAMP AS OF '$instant1'")(
|
||||||
|
Seq(1, "a1", 10.0, 1000)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("Test Insert Into Records with time travel To new Table") {
|
||||||
|
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||||
|
withTempDir { tmp =>
|
||||||
|
// Create Non-Partitioned table
|
||||||
|
val tableName1 = generateTableName
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName1 (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long
|
||||||
|
|) using hudi
|
||||||
|
| tblproperties (
|
||||||
|
| type = 'cow',
|
||||||
|
| primaryKey = 'id',
|
||||||
|
| preCombineField = 'ts'
|
||||||
|
| )
|
||||||
|
| location '${tmp.getCanonicalPath}/$tableName1'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
|
||||||
|
|
||||||
|
val metaClient1 = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(s"${tmp.getCanonicalPath}/$tableName1")
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
|
||||||
|
.lastInstant().get().getTimestamp
|
||||||
|
|
||||||
|
|
||||||
|
val tableName2 = generateTableName
|
||||||
|
// Create a partitioned table
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName2 (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long,
|
||||||
|
| dt string
|
||||||
|
|) using hudi
|
||||||
|
| tblproperties (primaryKey = 'id')
|
||||||
|
| partitioned by (dt)
|
||||||
|
| location '${tmp.getCanonicalPath}/$tableName2'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
// Insert into dynamic partition
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
| insert into $tableName2
|
||||||
|
| select id, name, price, ts, '2022-02-14' as dt
|
||||||
|
| from $tableName1 TIMESTAMP AS OF '$instant1'
|
||||||
|
""".stripMargin)
|
||||||
|
checkAnswer(s"select id, name, price, ts, dt from $tableName2")(
|
||||||
|
Seq(1, "a1", 10.0, 1000, "2022-02-14")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Insert into static partition
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
| insert into $tableName2 partition(dt = '2022-02-15')
|
||||||
|
| select 2 as id, 'a2' as name, price, ts
|
||||||
|
| from $tableName1 TIMESTAMP AS OF '$instant1'
|
||||||
|
""".stripMargin)
|
||||||
|
checkAnswer(
|
||||||
|
s"select id, name, price, ts, dt from $tableName2")(
|
||||||
|
Seq(1, "a1", 10.0, 1000, "2022-02-14"),
|
||||||
|
Seq(2, "a2", 10.0, 1000, "2022-02-15")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test("Test Two Table's Union Join with time travel") {
|
||||||
|
if (HoodieSparkUtils.gteqSpark3_2) {
|
||||||
|
withTempDir { tmp =>
|
||||||
|
Seq("cow", "mor").foreach { tableType =>
|
||||||
|
val tableName = generateTableName
|
||||||
|
|
||||||
|
val basePath = tmp.getCanonicalPath
|
||||||
|
val tableName1 = tableName + "_1"
|
||||||
|
val tableName2 = tableName + "_2"
|
||||||
|
val path1 = s"$basePath/$tableName1"
|
||||||
|
val path2 = s"$basePath/$tableName2"
|
||||||
|
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName1 (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long
|
||||||
|
|) using hudi
|
||||||
|
| tblproperties (
|
||||||
|
| type = '$tableType',
|
||||||
|
| primaryKey = 'id',
|
||||||
|
| preCombineField = 'ts'
|
||||||
|
| )
|
||||||
|
| location '$path1'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
spark.sql(
|
||||||
|
s"""
|
||||||
|
|create table $tableName2 (
|
||||||
|
| id int,
|
||||||
|
| name string,
|
||||||
|
| price double,
|
||||||
|
| ts long
|
||||||
|
|) using hudi
|
||||||
|
| tblproperties (
|
||||||
|
| type = '$tableType',
|
||||||
|
| primaryKey = 'id',
|
||||||
|
| preCombineField = 'ts'
|
||||||
|
| )
|
||||||
|
| location '$path2'
|
||||||
|
""".stripMargin)
|
||||||
|
|
||||||
|
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
|
||||||
|
spark.sql(s"insert into $tableName1 values(2, 'a2', 20, 1000)")
|
||||||
|
|
||||||
|
checkAnswer(s"select id, name, price, ts from $tableName1")(
|
||||||
|
Seq(1, "a1", 10.0, 1000),
|
||||||
|
Seq(2, "a2", 20.0, 1000)
|
||||||
|
)
|
||||||
|
|
||||||
|
checkAnswer(s"select id, name, price, ts from $tableName1")(
|
||||||
|
Seq(1, "a1", 10.0, 1000),
|
||||||
|
Seq(2, "a2", 20.0, 1000)
|
||||||
|
)
|
||||||
|
|
||||||
|
spark.sql(s"insert into $tableName2 values(3, 'a3', 10, 1000)")
|
||||||
|
spark.sql(s"insert into $tableName2 values(4, 'a4', 20, 1000)")
|
||||||
|
|
||||||
|
checkAnswer(s"select id, name, price, ts from $tableName2")(
|
||||||
|
Seq(3, "a3", 10.0, 1000),
|
||||||
|
Seq(4, "a4", 20.0, 1000)
|
||||||
|
)
|
||||||
|
|
||||||
|
val metaClient1 = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(path1)
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val metaClient2 = HoodieTableMetaClient.builder()
|
||||||
|
.setBasePath(path2)
|
||||||
|
.setConf(spark.sessionState.newHadoopConf())
|
||||||
|
.build()
|
||||||
|
|
||||||
|
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
|
||||||
|
.lastInstant().get().getTimestamp
|
||||||
|
|
||||||
|
val instant2 = metaClient2.getActiveTimeline.getAllCommitsTimeline
|
||||||
|
.lastInstant().get().getTimestamp
|
||||||
|
|
||||||
|
val sql =
|
||||||
|
s"""
|
||||||
|
|select id, name, price, ts from $tableName1 TIMESTAMP AS OF '$instant1' where id=1
|
||||||
|
|union
|
||||||
|
|select id, name, price, ts from $tableName2 TIMESTAMP AS OF '$instant2' where id>1
|
||||||
|
|""".stripMargin
|
||||||
|
|
||||||
|
checkAnswer(sql)(
|
||||||
|
Seq(1, "a1", 10.0, 1000),
|
||||||
|
Seq(3, "a3", 10.0, 1000),
|
||||||
|
Seq(4, "a4", 20.0, 1000)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -87,6 +87,7 @@ assignmentList
|
|||||||
assignment
|
assignment
|
||||||
: key=qualifiedName EQ value=expression
|
: key=qualifiedName EQ value=expression
|
||||||
;
|
;
|
||||||
|
|
||||||
qualifiedNameList
|
qualifiedNameList
|
||||||
: qualifiedName (',' qualifiedName)*
|
: qualifiedName (',' qualifiedName)*
|
||||||
;
|
;
|
||||||
|
|||||||
@@ -137,4 +137,18 @@ class Spark2Adapter extends SparkAdapter {
|
|||||||
closePartition()
|
closePartition()
|
||||||
partitions.toSeq
|
partitions.toSeq
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* if the logical plan is a TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the member of the TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
|
||||||
|
throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark2")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -123,4 +123,18 @@ class Spark3Adapter extends SparkAdapter {
|
|||||||
case _=> false
|
case _=> false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* if the logical plan is a TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the member of the TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
|
||||||
|
throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark3.1.x")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -144,6 +144,24 @@
|
|||||||
<groupId>org.jacoco</groupId>
|
<groupId>org.jacoco</groupId>
|
||||||
<artifactId>jacoco-maven-plugin</artifactId>
|
<artifactId>jacoco-maven-plugin</artifactId>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.antlr</groupId>
|
||||||
|
<artifactId>antlr4-maven-plugin</artifactId>
|
||||||
|
<version>${antlr.version}</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>antlr4</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<visitor>true</visitor>
|
||||||
|
<listener>true</listener>
|
||||||
|
<sourceDirectory>../hudi-spark3/src/main/antlr4</sourceDirectory>
|
||||||
|
<libDirectory>../hudi-spark3/src/main/antlr4/imports</libDirectory>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
|
|
||||||
|
|||||||
1908
hudi-spark-datasource/hudi-spark3/src/main/antlr4/imports/SqlBase.g4
Normal file
1908
hudi-spark-datasource/hudi-spark3/src/main/antlr4/imports/SqlBase.g4
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
grammar HoodieSqlBase;
|
||||||
|
|
||||||
|
import SqlBase;
|
||||||
|
|
||||||
|
singleStatement
|
||||||
|
: statement EOF
|
||||||
|
;
|
||||||
|
|
||||||
|
statement
|
||||||
|
: query #queryStatement
|
||||||
|
| ctes? dmlStatementNoWith #dmlStatement
|
||||||
|
| createTableHeader ('(' colTypeList ')')? tableProvider?
|
||||||
|
createTableClauses
|
||||||
|
(AS? query)? #createTable
|
||||||
|
| .*? #passThrough
|
||||||
|
;
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.adapter
|
||||||
|
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.apache.spark.sql.catalyst.expressions.Expression
|
||||||
|
import org.apache.spark.sql.catalyst.parser.ParserInterface
|
||||||
|
import org.apache.spark.sql.catalyst.plans.logical._
|
||||||
|
import org.apache.spark.sql.parser.HoodieSpark3_2ExtendedSqlParser
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The adapter for spark3.2.
|
||||||
|
*/
|
||||||
|
class Spark3_2Adapter extends Spark3Adapter {
|
||||||
|
/**
|
||||||
|
* if the logical plan is a TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
|
||||||
|
plan.isInstanceOf[TimeTravelRelation]
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the member of the TimeTravelRelation LogicalPlan.
|
||||||
|
*/
|
||||||
|
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
|
||||||
|
plan match {
|
||||||
|
case timeTravel: TimeTravelRelation =>
|
||||||
|
Some((timeTravel.table, timeTravel.timestamp, timeTravel.version))
|
||||||
|
case _ =>
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override def createExtendedSparkParser: Option[(SparkSession, ParserInterface) => ParserInterface] = {
|
||||||
|
Some(
|
||||||
|
(spark: SparkSession, delegate: ParserInterface) => new HoodieSpark3_2ExtendedSqlParser(spark, delegate)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.catalyst.plans.logical
|
||||||
|
|
||||||
|
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
|
||||||
|
|
||||||
|
case class TimeTravelRelation(
|
||||||
|
table: LogicalPlan,
|
||||||
|
timestamp: Option[Expression],
|
||||||
|
version: Option[String]) extends Command {
|
||||||
|
override def children: Seq[LogicalPlan] = Seq.empty
|
||||||
|
|
||||||
|
override def output: Seq[Attribute] = Nil
|
||||||
|
|
||||||
|
override lazy val resolved: Boolean = false
|
||||||
|
|
||||||
|
def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = this
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,176 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.parser
|
||||||
|
|
||||||
|
import org.antlr.v4.runtime._
|
||||||
|
import org.antlr.v4.runtime.atn.PredictionMode
|
||||||
|
import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
|
||||||
|
import org.antlr.v4.runtime.tree.TerminalNodeImpl
|
||||||
|
import org.apache.hudi.spark.sql.parser.HoodieSqlBaseParser.{NonReservedContext, QuotedIdentifierContext}
|
||||||
|
import org.apache.hudi.spark.sql.parser.{HoodieSqlBaseBaseListener, HoodieSqlBaseLexer, HoodieSqlBaseParser}
|
||||||
|
import org.apache.spark.internal.Logging
|
||||||
|
import org.apache.spark.sql.catalyst.expressions._
|
||||||
|
import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface}
|
||||||
|
import org.apache.spark.sql.catalyst.plans.logical._
|
||||||
|
import org.apache.spark.sql.catalyst.trees.Origin
|
||||||
|
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
|
||||||
|
import org.apache.spark.sql.types._
|
||||||
|
import org.apache.spark.sql.{AnalysisException, SparkSession}
|
||||||
|
|
||||||
|
class HoodieSpark3_2ExtendedSqlParser(session: SparkSession, delegate: ParserInterface)
|
||||||
|
extends ParserInterface with Logging {
|
||||||
|
|
||||||
|
private lazy val conf = session.sqlContext.conf
|
||||||
|
private lazy val builder = new HoodieSpark3_2ExtendedSqlAstBuilder(conf, delegate)
|
||||||
|
|
||||||
|
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
|
||||||
|
builder.visit(parser.singleStatement()) match {
|
||||||
|
case plan: LogicalPlan => plan
|
||||||
|
case _=> delegate.parsePlan(sqlText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText)
|
||||||
|
|
||||||
|
override def parseTableIdentifier(sqlText: String): TableIdentifier =
|
||||||
|
delegate.parseTableIdentifier(sqlText)
|
||||||
|
|
||||||
|
override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
|
||||||
|
delegate.parseFunctionIdentifier(sqlText)
|
||||||
|
|
||||||
|
override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText)
|
||||||
|
|
||||||
|
override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText)
|
||||||
|
|
||||||
|
protected def parse[T](command: String)(toResult: HoodieSqlBaseParser => T): T = {
|
||||||
|
logDebug(s"Parsing command: $command")
|
||||||
|
|
||||||
|
val lexer = new HoodieSqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
|
||||||
|
lexer.removeErrorListeners()
|
||||||
|
lexer.addErrorListener(ParseErrorListener)
|
||||||
|
|
||||||
|
val tokenStream = new CommonTokenStream(lexer)
|
||||||
|
val parser = new HoodieSqlBaseParser(tokenStream)
|
||||||
|
parser.addParseListener(PostProcessor)
|
||||||
|
parser.removeErrorListeners()
|
||||||
|
parser.addErrorListener(ParseErrorListener)
|
||||||
|
// parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
|
||||||
|
parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
|
||||||
|
parser.SQL_standard_keyword_behavior = conf.ansiEnabled
|
||||||
|
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
// first, try parsing with potentially faster SLL mode
|
||||||
|
parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
|
||||||
|
toResult(parser)
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
case e: ParseCancellationException =>
|
||||||
|
// if we fail, parse with LL mode
|
||||||
|
tokenStream.seek(0) // rewind input stream
|
||||||
|
parser.reset()
|
||||||
|
|
||||||
|
// Try Again.
|
||||||
|
parser.getInterpreter.setPredictionMode(PredictionMode.LL)
|
||||||
|
toResult(parser)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
case e: ParseException if e.command.isDefined =>
|
||||||
|
throw e
|
||||||
|
case e: ParseException =>
|
||||||
|
throw e.withCommand(command)
|
||||||
|
case e: AnalysisException =>
|
||||||
|
val position = Origin(e.line, e.startPosition)
|
||||||
|
throw new ParseException(Option(command), e.message, position, position)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
|
||||||
|
delegate.parseMultipartIdentifier(sqlText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`.
|
||||||
|
*/
|
||||||
|
class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream {
|
||||||
|
override def consume(): Unit = wrapped.consume
|
||||||
|
override def getSourceName(): String = wrapped.getSourceName
|
||||||
|
override def index(): Int = wrapped.index
|
||||||
|
override def mark(): Int = wrapped.mark
|
||||||
|
override def release(marker: Int): Unit = wrapped.release(marker)
|
||||||
|
override def seek(where: Int): Unit = wrapped.seek(where)
|
||||||
|
override def size(): Int = wrapped.size
|
||||||
|
|
||||||
|
override def getText(interval: Interval): String = {
|
||||||
|
// ANTLR 4.7's CodePointCharStream implementations have bugs when
|
||||||
|
// getText() is called with an empty stream, or intervals where
|
||||||
|
// the start > end. See
|
||||||
|
// https://github.com/antlr/antlr4/commit/ac9f7530 for one fix
|
||||||
|
// that is not yet in a released ANTLR artifact.
|
||||||
|
if (size() > 0 && (interval.b - interval.a >= 0)) {
|
||||||
|
wrapped.getText(interval)
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// scalastyle:off
|
||||||
|
override def LA(i: Int): Int = {
|
||||||
|
// scalastyle:on
|
||||||
|
val la = wrapped.LA(i)
|
||||||
|
if (la == 0 || la == IntStream.EOF) la
|
||||||
|
else Character.toUpperCase(la)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`.
|
||||||
|
*/
|
||||||
|
case object PostProcessor extends HoodieSqlBaseBaseListener {
|
||||||
|
|
||||||
|
/** Remove the back ticks from an Identifier. */
|
||||||
|
override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
|
||||||
|
replaceTokenByIdentifier(ctx, 1) { token =>
|
||||||
|
// Remove the double back ticks in the string.
|
||||||
|
token.setText(token.getText.replace("``", "`"))
|
||||||
|
token
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Treat non-reserved keywords as Identifiers. */
|
||||||
|
override def exitNonReserved(ctx: NonReservedContext): Unit = {
|
||||||
|
replaceTokenByIdentifier(ctx, 0)(identity)
|
||||||
|
}
|
||||||
|
|
||||||
|
private def replaceTokenByIdentifier(
|
||||||
|
ctx: ParserRuleContext,
|
||||||
|
stripMargins: Int)(
|
||||||
|
f: CommonToken => CommonToken = identity): Unit = {
|
||||||
|
val parent = ctx.getParent
|
||||||
|
parent.removeLastChild()
|
||||||
|
val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
|
||||||
|
val newToken = new CommonToken(
|
||||||
|
new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
|
||||||
|
HoodieSqlBaseParser.IDENTIFIER,
|
||||||
|
token.getChannel,
|
||||||
|
token.getStartIndex + stripMargins,
|
||||||
|
token.getStopIndex - stripMargins)
|
||||||
|
parent.addChild(new TerminalNodeImpl(f(newToken)))
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
|
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
|
||||||
<check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true">
|
<check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true">
|
||||||
<parameters>
|
<parameters>
|
||||||
<parameter name="maxFileLength"><![CDATA[1000]]></parameter>
|
<parameter name="maxFileLength"><![CDATA[5000]]></parameter>
|
||||||
</parameters>
|
</parameters>
|
||||||
</check>
|
</check>
|
||||||
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/>
|
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/>
|
||||||
@@ -113,7 +113,7 @@
|
|||||||
</check>
|
</check>
|
||||||
<check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
|
<check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
|
||||||
<parameters>
|
<parameters>
|
||||||
<parameter name="maxMethods"><![CDATA[40]]></parameter>
|
<parameter name="maxMethods"><![CDATA[500]]></parameter>
|
||||||
</parameters>
|
</parameters>
|
||||||
</check>
|
</check>
|
||||||
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"/>
|
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"/>
|
||||||
|
|||||||
Reference in New Issue
Block a user