1
0

[HUDI-3221] Support querying a table as of a savepoint (#4720)

This commit is contained in:
ForwardXu
2022-03-09 02:02:34 +08:00
committed by GitHub
parent 575bc63468
commit 08fd80c913
16 changed files with 5903 additions and 6 deletions

View File

@@ -27,7 +27,9 @@ import org.apache.spark.sql.hudi.SparkAdapter
trait SparkAdapterSupport { trait SparkAdapterSupport {
lazy val sparkAdapter: SparkAdapter = { lazy val sparkAdapter: SparkAdapter = {
val adapterClass = if (HoodieSparkUtils.isSpark3) { val adapterClass = if (HoodieSparkUtils.gteqSpark3_2) {
"org.apache.spark.sql.adapter.Spark3_2Adapter"
} else if (HoodieSparkUtils.isSpark3_0 || HoodieSparkUtils.isSpark3_1) {
"org.apache.spark.sql.adapter.Spark3Adapter" "org.apache.spark.sql.adapter.Spark3Adapter"
} else { } else {
"org.apache.spark.sql.adapter.Spark2Adapter" "org.apache.spark.sql.adapter.Spark2Adapter"

View File

@@ -85,6 +85,16 @@ trait SparkAdapter extends Serializable {
def getInsertIntoChildren(plan: LogicalPlan): def getInsertIntoChildren(plan: LogicalPlan):
Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)] Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)]
/**
* if the logical plan is a TimeTravelRelation LogicalPlan.
*/
def isRelationTimeTravel(plan: LogicalPlan): Boolean
/**
* Get the member of the TimeTravelRelation LogicalPlan.
*/
def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])]
/** /**
* Create a Insert Into LogicalPlan. * Create a Insert Into LogicalPlan.
*/ */

View File

@@ -36,3 +36,21 @@ file that supports spark sql on spark 2.x version.
has no class since hudi only supports spark 2.4.4 version, and it acts as the placeholder when packaging hudi-spark-bundle module. has no class since hudi only supports spark 2.4.4 version, and it acts as the placeholder when packaging hudi-spark-bundle module.
* hudi-spark3-common is the module that contains the code that would be reused between spark3.x versions. * hudi-spark3-common is the module that contains the code that would be reused between spark3.x versions.
* hudi-spark-common is the module that contains the code that would be reused between spark2.x and spark3.x versions. * hudi-spark-common is the module that contains the code that would be reused between spark2.x and spark3.x versions.
## Description of Time Travel
* `HoodieSpark3_2ExtendedSqlAstBuilder` have comments in the spark3.2's code fork from `org.apache.spark.sql.catalyst.parser.AstBuilder`, and additional `withTimeTravel` method.
* `SqlBase.g4` have comments in the code forked from spark3.2's parser, and add SparkSQL Syntax `TIMESTAMP AS OF` and `VERSION AS OF`.
### Time Travel Support Spark Version:
| version | support |
| ------ | ------- |
| 2.4.x | No |
| 3.0.x | No |
| 3.1.2 | No |
| 3.2.0 | Yes |
### About upgrading Time Travel:
Spark3.3 support time travel syntax link [SPARK-37219](https://issues.apache.org/jira/browse/SPARK-37219).
Once Spark 3.3 released. The files in the following list will be removed:
* hudi-spark3's `HoodieSpark3_2ExtendedSqlAstBuilder.scala``HoodieSpark3_2ExtendedSqlParser.scala``TimeTravelRelation.scala``SqlBase.g4``HoodieSqlBase.g4`

View File

@@ -20,14 +20,15 @@ package org.apache.spark.sql.hudi.analysis
import org.apache.hudi.DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL import org.apache.hudi.DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL
import org.apache.hudi.common.model.HoodieRecord import org.apache.hudi.common.model.HoodieRecord
import org.apache.hudi.common.util.ReflectionUtils import org.apache.hudi.common.util.ReflectionUtils
import org.apache.hudi.{HoodieSparkUtils, SparkAdapterSupport} import org.apache.hudi.{DataSourceReadOptions, HoodieSparkUtils, SparkAdapterSupport}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar} import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
import org.apache.spark.sql.catalyst.catalog.{CatalogUtils, HoodieCatalogTable}
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, GenericInternalRow, Literal, NamedExpression} import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, GenericInternalRow, Literal, NamedExpression}
import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.Inner
import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation} import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, LogicalRelation}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, removeMetaFields} import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, removeMetaFields}
import org.apache.spark.sql.hudi.HoodieSqlUtils._ import org.apache.spark.sql.hudi.HoodieSqlUtils._
import org.apache.spark.sql.hudi.command._ import org.apache.spark.sql.hudi.command._
@@ -113,6 +114,7 @@ case class HoodieAnalysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
case _ => case _ =>
l l
} }
// Convert to CreateHoodieTableAsSelectCommand // Convert to CreateHoodieTableAsSelectCommand
case CreateTable(table, mode, Some(query)) case CreateTable(table, mode, Some(query))
if query.resolved && sparkAdapter.isHoodieTable(table) => if query.resolved && sparkAdapter.isHoodieTable(table) =>
@@ -396,6 +398,37 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
l l
} }
case l if sparkAdapter.isRelationTimeTravel(l) =>
val (plan: UnresolvedRelation, timestamp, version) =
sparkAdapter.getRelationTimeTravel(l).get
if (timestamp.isEmpty && version.nonEmpty) {
throw new AnalysisException(
"version expression is not supported for time travel")
}
val tableIdentifier = sparkAdapter.toTableIdentifier(plan)
if (sparkAdapter.isHoodieTable(tableIdentifier, sparkSession)) {
val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableIdentifier)
val table = hoodieCatalogTable.table
val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
val instantOption = Map(
DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key -> timestamp.get.toString())
val dataSource =
DataSource(
sparkSession,
userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
partitionColumns = table.partitionColumnNames,
bucketSpec = table.bucketSpec,
className = table.provider.get,
options = table.storage.properties ++ pathOption ++ instantOption,
catalogTable = Some(table))
LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
} else {
l
}
case p => p case p => p
} }

View File

@@ -0,0 +1,241 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.hudi
import org.apache.hudi.HoodieSparkUtils
import org.apache.hudi.common.table.HoodieTableMetaClient
class TestTimeTravelTable extends TestHoodieSqlBase {
test("Test Insert and Update Record with time travel") {
if (HoodieSparkUtils.gteqSpark3_2) {
withTempDir { tmp =>
val tableName1 = generateTableName
spark.sql(
s"""
|create table $tableName1 (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| tblproperties (
| type = 'cow',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
| location '${tmp.getCanonicalPath}/$tableName1'
""".stripMargin)
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
val metaClient1 = HoodieTableMetaClient.builder()
.setBasePath(s"${tmp.getCanonicalPath}/$tableName1")
.setConf(spark.sessionState.newHadoopConf())
.build()
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
.lastInstant().get().getTimestamp
spark.sql(s"insert into $tableName1 values(1, 'a2', 20, 2000)")
checkAnswer(s"select id, name, price, ts from $tableName1")(
Seq(1, "a2", 20.0, 2000)
)
// time travel from instant1
checkAnswer(
s"select id, name, price, ts from $tableName1 TIMESTAMP AS OF '$instant1'")(
Seq(1, "a1", 10.0, 1000)
)
}
}
}
test("Test Insert Into Records with time travel To new Table") {
if (HoodieSparkUtils.gteqSpark3_2) {
withTempDir { tmp =>
// Create Non-Partitioned table
val tableName1 = generateTableName
spark.sql(
s"""
|create table $tableName1 (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| tblproperties (
| type = 'cow',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
| location '${tmp.getCanonicalPath}/$tableName1'
""".stripMargin)
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
val metaClient1 = HoodieTableMetaClient.builder()
.setBasePath(s"${tmp.getCanonicalPath}/$tableName1")
.setConf(spark.sessionState.newHadoopConf())
.build()
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
.lastInstant().get().getTimestamp
val tableName2 = generateTableName
// Create a partitioned table
spark.sql(
s"""
|create table $tableName2 (
| id int,
| name string,
| price double,
| ts long,
| dt string
|) using hudi
| tblproperties (primaryKey = 'id')
| partitioned by (dt)
| location '${tmp.getCanonicalPath}/$tableName2'
""".stripMargin)
// Insert into dynamic partition
spark.sql(
s"""
| insert into $tableName2
| select id, name, price, ts, '2022-02-14' as dt
| from $tableName1 TIMESTAMP AS OF '$instant1'
""".stripMargin)
checkAnswer(s"select id, name, price, ts, dt from $tableName2")(
Seq(1, "a1", 10.0, 1000, "2022-02-14")
)
// Insert into static partition
spark.sql(
s"""
| insert into $tableName2 partition(dt = '2022-02-15')
| select 2 as id, 'a2' as name, price, ts
| from $tableName1 TIMESTAMP AS OF '$instant1'
""".stripMargin)
checkAnswer(
s"select id, name, price, ts, dt from $tableName2")(
Seq(1, "a1", 10.0, 1000, "2022-02-14"),
Seq(2, "a2", 10.0, 1000, "2022-02-15")
)
}
}
}
test("Test Two Table's Union Join with time travel") {
if (HoodieSparkUtils.gteqSpark3_2) {
withTempDir { tmp =>
Seq("cow", "mor").foreach { tableType =>
val tableName = generateTableName
val basePath = tmp.getCanonicalPath
val tableName1 = tableName + "_1"
val tableName2 = tableName + "_2"
val path1 = s"$basePath/$tableName1"
val path2 = s"$basePath/$tableName2"
spark.sql(
s"""
|create table $tableName1 (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| tblproperties (
| type = '$tableType',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
| location '$path1'
""".stripMargin)
spark.sql(
s"""
|create table $tableName2 (
| id int,
| name string,
| price double,
| ts long
|) using hudi
| tblproperties (
| type = '$tableType',
| primaryKey = 'id',
| preCombineField = 'ts'
| )
| location '$path2'
""".stripMargin)
spark.sql(s"insert into $tableName1 values(1, 'a1', 10, 1000)")
spark.sql(s"insert into $tableName1 values(2, 'a2', 20, 1000)")
checkAnswer(s"select id, name, price, ts from $tableName1")(
Seq(1, "a1", 10.0, 1000),
Seq(2, "a2", 20.0, 1000)
)
checkAnswer(s"select id, name, price, ts from $tableName1")(
Seq(1, "a1", 10.0, 1000),
Seq(2, "a2", 20.0, 1000)
)
spark.sql(s"insert into $tableName2 values(3, 'a3', 10, 1000)")
spark.sql(s"insert into $tableName2 values(4, 'a4', 20, 1000)")
checkAnswer(s"select id, name, price, ts from $tableName2")(
Seq(3, "a3", 10.0, 1000),
Seq(4, "a4", 20.0, 1000)
)
val metaClient1 = HoodieTableMetaClient.builder()
.setBasePath(path1)
.setConf(spark.sessionState.newHadoopConf())
.build()
val metaClient2 = HoodieTableMetaClient.builder()
.setBasePath(path2)
.setConf(spark.sessionState.newHadoopConf())
.build()
val instant1 = metaClient1.getActiveTimeline.getAllCommitsTimeline
.lastInstant().get().getTimestamp
val instant2 = metaClient2.getActiveTimeline.getAllCommitsTimeline
.lastInstant().get().getTimestamp
val sql =
s"""
|select id, name, price, ts from $tableName1 TIMESTAMP AS OF '$instant1' where id=1
|union
|select id, name, price, ts from $tableName2 TIMESTAMP AS OF '$instant2' where id>1
|""".stripMargin
checkAnswer(sql)(
Seq(1, "a1", 10.0, 1000),
Seq(3, "a3", 10.0, 1000),
Seq(4, "a4", 20.0, 1000)
)
}
}
}
}
}

View File

@@ -87,6 +87,7 @@ assignmentList
assignment assignment
: key=qualifiedName EQ value=expression : key=qualifiedName EQ value=expression
; ;
qualifiedNameList qualifiedNameList
: qualifiedName (',' qualifiedName)* : qualifiedName (',' qualifiedName)*
; ;

View File

@@ -137,4 +137,18 @@ class Spark2Adapter extends SparkAdapter {
closePartition() closePartition()
partitions.toSeq partitions.toSeq
} }
/**
* if the logical plan is a TimeTravelRelation LogicalPlan.
*/
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
false
}
/**
* Get the member of the TimeTravelRelation LogicalPlan.
*/
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark2")
}
} }

View File

@@ -123,4 +123,18 @@ class Spark3Adapter extends SparkAdapter {
case _=> false case _=> false
} }
} }
/**
* if the logical plan is a TimeTravelRelation LogicalPlan.
*/
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
false
}
/**
* Get the member of the TimeTravelRelation LogicalPlan.
*/
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
throw new IllegalStateException(s"Should not call getRelationTimeTravel for spark3.1.x")
}
} }

View File

@@ -144,6 +144,24 @@
<groupId>org.jacoco</groupId> <groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId> <artifactId>jacoco-maven-plugin</artifactId>
</plugin> </plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>${antlr.version}</version>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
<configuration>
<visitor>true</visitor>
<listener>true</listener>
<sourceDirectory>../hudi-spark3/src/main/antlr4</sourceDirectory>
<libDirectory>../hudi-spark3/src/main/antlr4/imports</libDirectory>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
grammar HoodieSqlBase;
import SqlBase;
singleStatement
: statement EOF
;
statement
: query #queryStatement
| ctes? dmlStatementNoWith #dmlStatement
| createTableHeader ('(' colTypeList ')')? tableProvider?
createTableClauses
(AS? query)? #createTable
| .*? #passThrough
;

View File

@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.adapter
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.parser.HoodieSpark3_2ExtendedSqlParser
/**
* The adapter for spark3.2.
*/
class Spark3_2Adapter extends Spark3Adapter {
/**
* if the logical plan is a TimeTravelRelation LogicalPlan.
*/
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
plan.isInstanceOf[TimeTravelRelation]
}
/**
* Get the member of the TimeTravelRelation LogicalPlan.
*/
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
plan match {
case timeTravel: TimeTravelRelation =>
Some((timeTravel.table, timeTravel.timestamp, timeTravel.version))
case _ =>
None
}
}
override def createExtendedSparkParser: Option[(SparkSession, ParserInterface) => ParserInterface] = {
Some(
(spark: SparkSession, delegate: ParserInterface) => new HoodieSpark3_2ExtendedSqlParser(spark, delegate)
)
}
}

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
case class TimeTravelRelation(
table: LogicalPlan,
timestamp: Option[Expression],
version: Option[String]) extends Command {
override def children: Seq[LogicalPlan] = Seq.empty
override def output: Seq[Attribute] = Nil
override lazy val resolved: Boolean = false
def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = this
}

View File

@@ -0,0 +1,176 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.parser
import org.antlr.v4.runtime._
import org.antlr.v4.runtime.atn.PredictionMode
import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
import org.antlr.v4.runtime.tree.TerminalNodeImpl
import org.apache.hudi.spark.sql.parser.HoodieSqlBaseParser.{NonReservedContext, QuotedIdentifierContext}
import org.apache.hudi.spark.sql.parser.{HoodieSqlBaseBaseListener, HoodieSqlBaseLexer, HoodieSqlBaseParser}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{AnalysisException, SparkSession}
class HoodieSpark3_2ExtendedSqlParser(session: SparkSession, delegate: ParserInterface)
extends ParserInterface with Logging {
private lazy val conf = session.sqlContext.conf
private lazy val builder = new HoodieSpark3_2ExtendedSqlAstBuilder(conf, delegate)
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
builder.visit(parser.singleStatement()) match {
case plan: LogicalPlan => plan
case _=> delegate.parsePlan(sqlText)
}
}
override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText)
override def parseTableIdentifier(sqlText: String): TableIdentifier =
delegate.parseTableIdentifier(sqlText)
override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
delegate.parseFunctionIdentifier(sqlText)
override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText)
override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText)
protected def parse[T](command: String)(toResult: HoodieSqlBaseParser => T): T = {
logDebug(s"Parsing command: $command")
val lexer = new HoodieSqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
lexer.removeErrorListeners()
lexer.addErrorListener(ParseErrorListener)
val tokenStream = new CommonTokenStream(lexer)
val parser = new HoodieSqlBaseParser(tokenStream)
parser.addParseListener(PostProcessor)
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)
// parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
parser.SQL_standard_keyword_behavior = conf.ansiEnabled
try {
try {
// first, try parsing with potentially faster SLL mode
parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
toResult(parser)
}
catch {
case e: ParseCancellationException =>
// if we fail, parse with LL mode
tokenStream.seek(0) // rewind input stream
parser.reset()
// Try Again.
parser.getInterpreter.setPredictionMode(PredictionMode.LL)
toResult(parser)
}
}
catch {
case e: ParseException if e.command.isDefined =>
throw e
case e: ParseException =>
throw e.withCommand(command)
case e: AnalysisException =>
val position = Origin(e.line, e.startPosition)
throw new ParseException(Option(command), e.message, position, position)
}
}
override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
delegate.parseMultipartIdentifier(sqlText)
}
}
/**
* Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`.
*/
class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream {
override def consume(): Unit = wrapped.consume
override def getSourceName(): String = wrapped.getSourceName
override def index(): Int = wrapped.index
override def mark(): Int = wrapped.mark
override def release(marker: Int): Unit = wrapped.release(marker)
override def seek(where: Int): Unit = wrapped.seek(where)
override def size(): Int = wrapped.size
override def getText(interval: Interval): String = {
// ANTLR 4.7's CodePointCharStream implementations have bugs when
// getText() is called with an empty stream, or intervals where
// the start > end. See
// https://github.com/antlr/antlr4/commit/ac9f7530 for one fix
// that is not yet in a released ANTLR artifact.
if (size() > 0 && (interval.b - interval.a >= 0)) {
wrapped.getText(interval)
} else {
""
}
}
// scalastyle:off
override def LA(i: Int): Int = {
// scalastyle:on
val la = wrapped.LA(i)
if (la == 0 || la == IntStream.EOF) la
else Character.toUpperCase(la)
}
}
/**
* Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`.
*/
case object PostProcessor extends HoodieSqlBaseBaseListener {
/** Remove the back ticks from an Identifier. */
override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
replaceTokenByIdentifier(ctx, 1) { token =>
// Remove the double back ticks in the string.
token.setText(token.getText.replace("``", "`"))
token
}
}
/** Treat non-reserved keywords as Identifiers. */
override def exitNonReserved(ctx: NonReservedContext): Unit = {
replaceTokenByIdentifier(ctx, 0)(identity)
}
private def replaceTokenByIdentifier(
ctx: ParserRuleContext,
stripMargins: Int)(
f: CommonToken => CommonToken = identity): Unit = {
val parent = ctx.getParent
parent.removeLastChild()
val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
val newToken = new CommonToken(
new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
HoodieSqlBaseParser.IDENTIFIER,
token.getChannel,
token.getStartIndex + stripMargins,
token.getStopIndex - stripMargins)
parent.addChild(new TerminalNodeImpl(f(newToken)))
}
}

View File

@@ -27,7 +27,7 @@
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/> <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"/>
<check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true"> <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="true">
<parameters> <parameters>
<parameter name="maxFileLength"><![CDATA[1000]]></parameter> <parameter name="maxFileLength"><![CDATA[5000]]></parameter>
</parameters> </parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/> <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"/>
@@ -113,7 +113,7 @@
</check> </check>
<check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true"> <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
<parameters> <parameters>
<parameter name="maxMethods"><![CDATA[40]]></parameter> <parameter name="maxMethods"><![CDATA[500]]></parameter>
</parameters> </parameters>
</check> </check>
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"/> <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"/>