[HUDI-3221] Support querying a table as of a savepoint (#4720)
This commit is contained in:
1908
hudi-spark-datasource/hudi-spark3/src/main/antlr4/imports/SqlBase.g4
Normal file
1908
hudi-spark-datasource/hudi-spark3/src/main/antlr4/imports/SqlBase.g4
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
grammar HoodieSqlBase;
|
||||
|
||||
import SqlBase;
|
||||
|
||||
singleStatement
|
||||
: statement EOF
|
||||
;
|
||||
|
||||
statement
|
||||
: query #queryStatement
|
||||
| ctes? dmlStatementNoWith #dmlStatement
|
||||
| createTableHeader ('(' colTypeList ')')? tableProvider?
|
||||
createTableClauses
|
||||
(AS? query)? #createTable
|
||||
| .*? #passThrough
|
||||
;
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.adapter
|
||||
|
||||
import org.apache.spark.sql.SparkSession
|
||||
import org.apache.spark.sql.catalyst.expressions.Expression
|
||||
import org.apache.spark.sql.catalyst.parser.ParserInterface
|
||||
import org.apache.spark.sql.catalyst.plans.logical._
|
||||
import org.apache.spark.sql.parser.HoodieSpark3_2ExtendedSqlParser
|
||||
|
||||
/**
|
||||
* The adapter for spark3.2.
|
||||
*/
|
||||
class Spark3_2Adapter extends Spark3Adapter {
|
||||
/**
|
||||
* if the logical plan is a TimeTravelRelation LogicalPlan.
|
||||
*/
|
||||
override def isRelationTimeTravel(plan: LogicalPlan): Boolean = {
|
||||
plan.isInstanceOf[TimeTravelRelation]
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the member of the TimeTravelRelation LogicalPlan.
|
||||
*/
|
||||
override def getRelationTimeTravel(plan: LogicalPlan): Option[(LogicalPlan, Option[Expression], Option[String])] = {
|
||||
plan match {
|
||||
case timeTravel: TimeTravelRelation =>
|
||||
Some((timeTravel.table, timeTravel.timestamp, timeTravel.version))
|
||||
case _ =>
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
override def createExtendedSparkParser: Option[(SparkSession, ParserInterface) => ParserInterface] = {
|
||||
Some(
|
||||
(spark: SparkSession, delegate: ParserInterface) => new HoodieSpark3_2ExtendedSqlParser(spark, delegate)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.catalyst.plans.logical
|
||||
|
||||
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
|
||||
|
||||
case class TimeTravelRelation(
|
||||
table: LogicalPlan,
|
||||
timestamp: Option[Expression],
|
||||
version: Option[String]) extends Command {
|
||||
override def children: Seq[LogicalPlan] = Seq.empty
|
||||
|
||||
override def output: Seq[Attribute] = Nil
|
||||
|
||||
override lazy val resolved: Boolean = false
|
||||
|
||||
def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = this
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.spark.sql.parser
|
||||
|
||||
import org.antlr.v4.runtime._
|
||||
import org.antlr.v4.runtime.atn.PredictionMode
|
||||
import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
|
||||
import org.antlr.v4.runtime.tree.TerminalNodeImpl
|
||||
import org.apache.hudi.spark.sql.parser.HoodieSqlBaseParser.{NonReservedContext, QuotedIdentifierContext}
|
||||
import org.apache.hudi.spark.sql.parser.{HoodieSqlBaseBaseListener, HoodieSqlBaseLexer, HoodieSqlBaseParser}
|
||||
import org.apache.spark.internal.Logging
|
||||
import org.apache.spark.sql.catalyst.expressions._
|
||||
import org.apache.spark.sql.catalyst.parser.{ParseErrorListener, ParseException, ParserInterface}
|
||||
import org.apache.spark.sql.catalyst.plans.logical._
|
||||
import org.apache.spark.sql.catalyst.trees.Origin
|
||||
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
|
||||
import org.apache.spark.sql.types._
|
||||
import org.apache.spark.sql.{AnalysisException, SparkSession}
|
||||
|
||||
class HoodieSpark3_2ExtendedSqlParser(session: SparkSession, delegate: ParserInterface)
|
||||
extends ParserInterface with Logging {
|
||||
|
||||
private lazy val conf = session.sqlContext.conf
|
||||
private lazy val builder = new HoodieSpark3_2ExtendedSqlAstBuilder(conf, delegate)
|
||||
|
||||
override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
|
||||
builder.visit(parser.singleStatement()) match {
|
||||
case plan: LogicalPlan => plan
|
||||
case _=> delegate.parsePlan(sqlText)
|
||||
}
|
||||
}
|
||||
|
||||
override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText)
|
||||
|
||||
override def parseTableIdentifier(sqlText: String): TableIdentifier =
|
||||
delegate.parseTableIdentifier(sqlText)
|
||||
|
||||
override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
|
||||
delegate.parseFunctionIdentifier(sqlText)
|
||||
|
||||
override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText)
|
||||
|
||||
override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText)
|
||||
|
||||
protected def parse[T](command: String)(toResult: HoodieSqlBaseParser => T): T = {
|
||||
logDebug(s"Parsing command: $command")
|
||||
|
||||
val lexer = new HoodieSqlBaseLexer(new UpperCaseCharStream(CharStreams.fromString(command)))
|
||||
lexer.removeErrorListeners()
|
||||
lexer.addErrorListener(ParseErrorListener)
|
||||
|
||||
val tokenStream = new CommonTokenStream(lexer)
|
||||
val parser = new HoodieSqlBaseParser(tokenStream)
|
||||
parser.addParseListener(PostProcessor)
|
||||
parser.removeErrorListeners()
|
||||
parser.addErrorListener(ParseErrorListener)
|
||||
// parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
|
||||
parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
|
||||
parser.SQL_standard_keyword_behavior = conf.ansiEnabled
|
||||
|
||||
try {
|
||||
try {
|
||||
// first, try parsing with potentially faster SLL mode
|
||||
parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
|
||||
toResult(parser)
|
||||
}
|
||||
catch {
|
||||
case e: ParseCancellationException =>
|
||||
// if we fail, parse with LL mode
|
||||
tokenStream.seek(0) // rewind input stream
|
||||
parser.reset()
|
||||
|
||||
// Try Again.
|
||||
parser.getInterpreter.setPredictionMode(PredictionMode.LL)
|
||||
toResult(parser)
|
||||
}
|
||||
}
|
||||
catch {
|
||||
case e: ParseException if e.command.isDefined =>
|
||||
throw e
|
||||
case e: ParseException =>
|
||||
throw e.withCommand(command)
|
||||
case e: AnalysisException =>
|
||||
val position = Origin(e.line, e.startPosition)
|
||||
throw new ParseException(Option(command), e.message, position, position)
|
||||
}
|
||||
}
|
||||
|
||||
override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
|
||||
delegate.parseMultipartIdentifier(sqlText)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fork from `org.apache.spark.sql.catalyst.parser.UpperCaseCharStream`.
|
||||
*/
|
||||
class UpperCaseCharStream(wrapped: CodePointCharStream) extends CharStream {
|
||||
override def consume(): Unit = wrapped.consume
|
||||
override def getSourceName(): String = wrapped.getSourceName
|
||||
override def index(): Int = wrapped.index
|
||||
override def mark(): Int = wrapped.mark
|
||||
override def release(marker: Int): Unit = wrapped.release(marker)
|
||||
override def seek(where: Int): Unit = wrapped.seek(where)
|
||||
override def size(): Int = wrapped.size
|
||||
|
||||
override def getText(interval: Interval): String = {
|
||||
// ANTLR 4.7's CodePointCharStream implementations have bugs when
|
||||
// getText() is called with an empty stream, or intervals where
|
||||
// the start > end. See
|
||||
// https://github.com/antlr/antlr4/commit/ac9f7530 for one fix
|
||||
// that is not yet in a released ANTLR artifact.
|
||||
if (size() > 0 && (interval.b - interval.a >= 0)) {
|
||||
wrapped.getText(interval)
|
||||
} else {
|
||||
""
|
||||
}
|
||||
}
|
||||
// scalastyle:off
|
||||
override def LA(i: Int): Int = {
|
||||
// scalastyle:on
|
||||
val la = wrapped.LA(i)
|
||||
if (la == 0 || la == IntStream.EOF) la
|
||||
else Character.toUpperCase(la)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fork from `org.apache.spark.sql.catalyst.parser.PostProcessor`.
|
||||
*/
|
||||
case object PostProcessor extends HoodieSqlBaseBaseListener {
|
||||
|
||||
/** Remove the back ticks from an Identifier. */
|
||||
override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
|
||||
replaceTokenByIdentifier(ctx, 1) { token =>
|
||||
// Remove the double back ticks in the string.
|
||||
token.setText(token.getText.replace("``", "`"))
|
||||
token
|
||||
}
|
||||
}
|
||||
|
||||
/** Treat non-reserved keywords as Identifiers. */
|
||||
override def exitNonReserved(ctx: NonReservedContext): Unit = {
|
||||
replaceTokenByIdentifier(ctx, 0)(identity)
|
||||
}
|
||||
|
||||
private def replaceTokenByIdentifier(
|
||||
ctx: ParserRuleContext,
|
||||
stripMargins: Int)(
|
||||
f: CommonToken => CommonToken = identity): Unit = {
|
||||
val parent = ctx.getParent
|
||||
parent.removeLastChild()
|
||||
val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
|
||||
val newToken = new CommonToken(
|
||||
new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
|
||||
HoodieSqlBaseParser.IDENTIFIER,
|
||||
token.getChannel,
|
||||
token.getStartIndex + stripMargins,
|
||||
token.getStopIndex - stripMargins)
|
||||
parent.addChild(new TerminalNodeImpl(f(newToken)))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user