[HUDI-1869] Upgrading Spark3 To 3.1 (#3844)
Co-authored-by: pengzhiwei <pengzhiwei2015@icloud.com>
This commit is contained in:
@@ -28,12 +28,15 @@ import org.apache.avro.Schema.Type._
|
|||||||
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
||||||
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
|
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
|
||||||
import org.apache.avro.{LogicalTypes, Schema}
|
import org.apache.avro.{LogicalTypes, Schema}
|
||||||
|
|
||||||
import org.apache.spark.sql.Row
|
import org.apache.spark.sql.Row
|
||||||
import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
|
import org.apache.spark.sql.avro.SchemaConverters
|
||||||
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
||||||
import org.apache.spark.sql.catalyst.util.DateTimeUtils
|
import org.apache.spark.sql.catalyst.util.DateTimeUtils
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
import org.apache.hudi.AvroConversionUtils._
|
import org.apache.hudi.AvroConversionUtils._
|
||||||
|
import org.apache.hudi.exception.HoodieIncompatibleSchemaException
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
@@ -131,7 +134,7 @@ object AvroConversionHelper {
|
|||||||
case null =>
|
case null =>
|
||||||
new Timestamp(item.asInstanceOf[Long])
|
new Timestamp(item.asInstanceOf[Long])
|
||||||
case other =>
|
case other =>
|
||||||
throw new IncompatibleSchemaException(
|
throw new HoodieIncompatibleSchemaException(
|
||||||
s"Cannot convert Avro logical type $other to Catalyst Timestamp type.")
|
s"Cannot convert Avro logical type $other to Catalyst Timestamp type.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -149,7 +152,7 @@ object AvroConversionHelper {
|
|||||||
converters(i) = converter
|
converters(i) = converter
|
||||||
avroFieldIndexes(i) = avroField.pos()
|
avroFieldIndexes(i) = avroField.pos()
|
||||||
} else if (!sqlField.nullable) {
|
} else if (!sqlField.nullable) {
|
||||||
throw new IncompatibleSchemaException(
|
throw new HoodieIncompatibleSchemaException(
|
||||||
s"Cannot find non-nullable field ${sqlField.name} at path ${path.mkString(".")} " +
|
s"Cannot find non-nullable field ${sqlField.name} at path ${path.mkString(".")} " +
|
||||||
"in Avro schema\n" +
|
"in Avro schema\n" +
|
||||||
s"Source Avro schema: $sourceAvroSchema.\n" +
|
s"Source Avro schema: $sourceAvroSchema.\n" +
|
||||||
@@ -254,7 +257,7 @@ object AvroConversionHelper {
|
|||||||
converted(i) = fieldConverters(i)(item)
|
converted(i) = fieldConverters(i)(item)
|
||||||
new GenericRow(converted)
|
new GenericRow(converted)
|
||||||
}
|
}
|
||||||
case _ => throw new IncompatibleSchemaException(
|
case _ => throw new HoodieIncompatibleSchemaException(
|
||||||
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
||||||
s"${path.mkString(".")} is not compatible " +
|
s"${path.mkString(".")} is not compatible " +
|
||||||
s"(avroType = $other, sqlType = $sqlType). \n" +
|
s"(avroType = $other, sqlType = $sqlType). \n" +
|
||||||
@@ -263,7 +266,7 @@ object AvroConversionHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case (left, right) =>
|
case (left, right) =>
|
||||||
throw new IncompatibleSchemaException(
|
throw new HoodieIncompatibleSchemaException(
|
||||||
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
s"Cannot convert Avro schema to catalyst type because schema at path " +
|
||||||
s"${path.mkString(".")} is not compatible (avroType = $left, sqlType = $right). \n" +
|
s"${path.mkString(".")} is not compatible (avroType = $left, sqlType = $right). \n" +
|
||||||
s"Source Avro schema: $sourceAvroSchema.\n" +
|
s"Source Avro schema: $sourceAvroSchema.\n" +
|
||||||
|
|||||||
@@ -87,4 +87,9 @@ trait SparkAdapter extends Serializable {
|
|||||||
* Create Like expression.
|
* Create Like expression.
|
||||||
*/
|
*/
|
||||||
def createLike(left: Expression, right: Expression): Expression
|
def createLike(left: Expression, right: Expression): Expression
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ParserInterface#parseMultipartIdentifier is supported since spark3, for spark2 this should not be called.
|
||||||
|
*/
|
||||||
|
def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.exception;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exception for incompatible schema.
|
||||||
|
*/
|
||||||
|
public class HoodieIncompatibleSchemaException extends Exception {
|
||||||
|
|
||||||
|
public HoodieIncompatibleSchemaException(String msg, Throwable e) {
|
||||||
|
super(msg, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieIncompatibleSchemaException(String msg) {
|
||||||
|
super(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -503,6 +503,13 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
<version>${slf4j.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
@@ -524,6 +531,5 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import org.apache.hudi.exception.HoodieException
|
|||||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
|
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
|
||||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
|
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer}
|
import org.apache.spark.sql.avro.{HoodieAvroSerializer, HooodieAvroDeserializer}
|
||||||
import org.apache.spark.sql.catalyst.InternalRow
|
import org.apache.spark.sql.catalyst.InternalRow
|
||||||
import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
|
import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
|
||||||
import org.apache.spark.sql.execution.datasources.PartitionedFile
|
import org.apache.spark.sql.execution.datasources.PartitionedFile
|
||||||
@@ -119,7 +119,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
tableState.requiredStructSchema
|
tableState.requiredStructSchema
|
||||||
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
||||||
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
||||||
private val deserializer = new AvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
private val deserializer = HooodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
||||||
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
||||||
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
||||||
private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
|
private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
|
||||||
@@ -135,7 +135,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
} else {
|
} else {
|
||||||
val requiredAvroRecord = AvroConversionUtils
|
val requiredAvroRecord = AvroConversionUtils
|
||||||
.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
|
.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
|
||||||
recordToLoad = unsafeProjection(deserializer.deserialize(requiredAvroRecord).asInstanceOf[InternalRow])
|
recordToLoad = unsafeProjection(deserializer.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -158,7 +158,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
tableState.requiredStructSchema
|
tableState.requiredStructSchema
|
||||||
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
||||||
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
||||||
private val deserializer = new AvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
private val deserializer = HooodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
||||||
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
||||||
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
||||||
private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
|
private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
|
||||||
@@ -180,7 +180,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
} else {
|
} else {
|
||||||
val requiredAvroRecord = AvroConversionUtils
|
val requiredAvroRecord = AvroConversionUtils
|
||||||
.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
|
.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
|
||||||
recordToLoad = unsafeProjection(deserializer.deserialize(requiredAvroRecord).asInstanceOf[InternalRow])
|
recordToLoad = unsafeProjection(deserializer.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -203,8 +203,8 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
private val requiredFieldPosition =
|
private val requiredFieldPosition =
|
||||||
tableState.requiredStructSchema
|
tableState.requiredStructSchema
|
||||||
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
.map(f => tableAvroSchema.getField(f.name).pos()).toList
|
||||||
private val serializer = new AvroSerializer(tableState.tableStructSchema, tableAvroSchema, false)
|
private val serializer = HoodieAvroSerializer(tableState.tableStructSchema, tableAvroSchema, false)
|
||||||
private val requiredDeserializer = new AvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
private val requiredDeserializer = HooodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
|
||||||
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
|
||||||
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
|
||||||
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
private val logRecords = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config).getRecords
|
||||||
@@ -236,7 +236,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
recordBuilder
|
recordBuilder
|
||||||
)
|
)
|
||||||
recordToLoad = unsafeProjection(requiredDeserializer
|
recordToLoad = unsafeProjection(requiredDeserializer
|
||||||
.deserialize(requiredAvroRecord).asInstanceOf[InternalRow])
|
.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -264,7 +264,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
|||||||
recordBuilder
|
recordBuilder
|
||||||
)
|
)
|
||||||
recordToLoad = unsafeProjection(requiredDeserializer
|
recordToLoad = unsafeProjection(requiredDeserializer
|
||||||
.deserialize(requiredAvroRecord).asInstanceOf[InternalRow])
|
.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.avro
|
||||||
|
|
||||||
|
import org.apache.avro.Schema
|
||||||
|
import org.apache.spark.sql.types.DataType
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As AvroSerializer cannot be access out of the spark.sql.avro package since spark 3.1, we define
|
||||||
|
* this class to be accessed by other class.
|
||||||
|
*/
|
||||||
|
case class HoodieAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
|
||||||
|
extends AvroSerializer(rootCatalystType, rootAvroType, nullable)
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.spark.sql.avro
|
||||||
|
|
||||||
|
import org.apache.avro.Schema
|
||||||
|
import org.apache.spark.sql.types.DataType
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is to be compatible with the type returned by Spark 3.1
|
||||||
|
* and other spark versions for AvroDeserializer
|
||||||
|
*/
|
||||||
|
case class HooodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
|
||||||
|
extends AvroDeserializer(rootAvroType, rootCatalystType) {
|
||||||
|
|
||||||
|
def deserializeData(data: Any): Any = {
|
||||||
|
super.deserialize(data) match {
|
||||||
|
case Some(r) => r // spark 3.1 return type is Option, we fetch the data.
|
||||||
|
case o => o // for other spark version, return the data directly.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,7 +31,7 @@ import org.apache.hudi.common.util.{ValidationUtils, Option => HOption}
|
|||||||
import org.apache.hudi.config.HoodieWriteConfig
|
import org.apache.hudi.config.HoodieWriteConfig
|
||||||
import org.apache.hudi.io.HoodieWriteHandle
|
import org.apache.hudi.io.HoodieWriteHandle
|
||||||
import org.apache.hudi.sql.IExpressionEvaluator
|
import org.apache.hudi.sql.IExpressionEvaluator
|
||||||
import org.apache.spark.sql.avro.{AvroSerializer, SchemaConverters}
|
import org.apache.spark.sql.avro.{AvroSerializer, HoodieAvroSerializer, SchemaConverters}
|
||||||
import org.apache.spark.sql.catalyst.expressions.Expression
|
import org.apache.spark.sql.catalyst.expressions.Expression
|
||||||
import org.apache.spark.sql.hudi.SerDeUtils
|
import org.apache.spark.sql.hudi.SerDeUtils
|
||||||
import org.apache.spark.sql.hudi.command.payload.ExpressionPayload.getEvaluator
|
import org.apache.spark.sql.hudi.command.payload.ExpressionPayload.getEvaluator
|
||||||
@@ -310,7 +310,7 @@ object ExpressionPayload {
|
|||||||
val conditionEvaluator = ExpressionCodeGen.doCodeGen(Seq(condition), conditionSerializer)
|
val conditionEvaluator = ExpressionCodeGen.doCodeGen(Seq(condition), conditionSerializer)
|
||||||
|
|
||||||
val assignSqlType = SchemaConverters.toSqlType(writeSchema).dataType.asInstanceOf[StructType]
|
val assignSqlType = SchemaConverters.toSqlType(writeSchema).dataType.asInstanceOf[StructType]
|
||||||
val assignSerializer = new AvroSerializer(assignSqlType, writeSchema, false)
|
val assignSerializer = new HoodieAvroSerializer(assignSqlType, writeSchema, false)
|
||||||
val assignmentEvaluator = ExpressionCodeGen.doCodeGen(assignments, assignSerializer)
|
val assignmentEvaluator = ExpressionCodeGen.doCodeGen(assignments, assignSerializer)
|
||||||
conditionEvaluator -> assignmentEvaluator
|
conditionEvaluator -> assignmentEvaluator
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hudi.command.payload
|
|||||||
|
|
||||||
import org.apache.avro.generic.IndexedRecord
|
import org.apache.avro.generic.IndexedRecord
|
||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.spark.sql.avro.{AvroDeserializer, SchemaConverters}
|
import org.apache.spark.sql.avro.{HooodieAvroDeserializer, SchemaConverters}
|
||||||
import org.apache.spark.sql.catalyst.InternalRow
|
import org.apache.spark.sql.catalyst.InternalRow
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
@@ -29,8 +29,8 @@ import org.apache.spark.sql.types._
|
|||||||
class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord {
|
class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord {
|
||||||
|
|
||||||
private lazy val sqlType = SchemaConverters.toSqlType(getSchema).dataType.asInstanceOf[StructType]
|
private lazy val sqlType = SchemaConverters.toSqlType(getSchema).dataType.asInstanceOf[StructType]
|
||||||
private lazy val avroDeserializer = new AvroDeserializer(record.getSchema, sqlType)
|
private lazy val avroDeserializer = HooodieAvroDeserializer(record.getSchema, sqlType)
|
||||||
private lazy val sqlRow = avroDeserializer.deserialize(record).asInstanceOf[InternalRow]
|
private lazy val sqlRow = avroDeserializer.deserializeData(record).asInstanceOf[InternalRow]
|
||||||
|
|
||||||
override def put(i: Int, v: Any): Unit = {
|
override def put(i: Int, v: Any): Unit = {
|
||||||
record.put(i, v)
|
record.put(i, v)
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ class HoodieCommonSqlParser(session: SparkSession, delegate: ParserInterface)
|
|||||||
}
|
}
|
||||||
|
|
||||||
def parseMultipartIdentifier(sqlText: String): Seq[String] = {
|
def parseMultipartIdentifier(sqlText: String): Seq[String] = {
|
||||||
throw new UnsupportedOperationException(s"Unsupported parseMultipartIdentifier method")
|
sparkAdapter.parseMultipartIdentifier(delegate, sqlText)
|
||||||
}
|
}
|
||||||
|
|
||||||
protected def parse[T](command: String)(toResult: HoodieSqlCommonParser => T): T = {
|
protected def parse[T](command: String)(toResult: HoodieSqlCommonParser => T): T = {
|
||||||
|
|||||||
@@ -137,7 +137,8 @@ class TestDataSourceForBootstrap {
|
|||||||
verifyIncrementalViewResult(commitInstantTime1, commitInstantTime2, isPartitioned = false, isHiveStylePartitioned = false)
|
verifyIncrementalViewResult(commitInstantTime1, commitInstantTime2, isPartitioned = false, isHiveStylePartitioned = false)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test def testMetadataBootstrapCOWHiveStylePartitioned(): Unit = {
|
@Test
|
||||||
|
def testMetadataBootstrapCOWHiveStylePartitioned(): Unit = {
|
||||||
val timestamp = Instant.now.toEpochMilli
|
val timestamp = Instant.now.toEpochMilli
|
||||||
val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext)
|
val jsc = JavaSparkContext.fromSparkContext(spark.sparkContext)
|
||||||
|
|
||||||
|
|||||||
@@ -46,8 +46,8 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
// insert data
|
// insert data
|
||||||
spark.sql(s"""insert into $tableName values (1, "z3", "v1", "2021-10-01"), (2, "l4", "v1", "2021-10-02")""")
|
spark.sql(s"""insert into $tableName values (1, "z3", "v1", "2021-10-01"), (2, "l4", "v1", "2021-10-02")""")
|
||||||
|
|
||||||
checkException(s"alter table $tableName drop partition (dt='2021-10-01')")(
|
checkExceptionContain(s"alter table $tableName drop partition (dt='2021-10-01')")(
|
||||||
s"dt is not a valid partition column in table `default`.`${tableName}`.;")
|
s"dt is not a valid partition column in table `default`.`$tableName`.")
|
||||||
}
|
}
|
||||||
|
|
||||||
Seq(false, true).foreach { urlencode =>
|
Seq(false, true).foreach { urlencode =>
|
||||||
@@ -115,12 +115,8 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
spark.sql(s"""insert into $tableName values (1, "z3", "v1", "2021-10-01"), (2, "l4", "v1", "2021-10-02")""")
|
spark.sql(s"""insert into $tableName values (1, "z3", "v1", "2021-10-01"), (2, "l4", "v1", "2021-10-02")""")
|
||||||
|
|
||||||
// specify duplicate partition columns
|
// specify duplicate partition columns
|
||||||
try {
|
checkExceptionContain(s"alter table $tableName drop partition (dt='2021-10-01', dt='2021-10-02')")(
|
||||||
spark.sql(s"alter table $tableName drop partition (dt='2021-10-01', dt='2021-10-02')")
|
"Found duplicate keys 'dt'")
|
||||||
} catch {
|
|
||||||
case NonFatal(e) =>
|
|
||||||
assert(e.getMessage.contains("Found duplicate keys 'dt'"))
|
|
||||||
}
|
|
||||||
|
|
||||||
// drop 2021-10-01 partition
|
// drop 2021-10-01 partition
|
||||||
spark.sql(s"alter table $tableName drop partition (dt='2021-10-01')")
|
spark.sql(s"alter table $tableName drop partition (dt='2021-10-01')")
|
||||||
@@ -164,8 +160,8 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
|
|||||||
|""".stripMargin)
|
|""".stripMargin)
|
||||||
|
|
||||||
// not specified all partition column
|
// not specified all partition column
|
||||||
checkException(s"alter table $tableName drop partition (year='2021', month='10')")(
|
checkExceptionContain(s"alter table $tableName drop partition (year='2021', month='10')")(
|
||||||
"All partition columns need to be specified for Hoodie's dropping partition;"
|
"All partition columns need to be specified for Hoodie's dropping partition"
|
||||||
)
|
)
|
||||||
// drop 2021-10-01 partition
|
// drop 2021-10-01 partition
|
||||||
spark.sql(s"alter table $tableName drop partition (year='2021', month='10', day='01')")
|
spark.sql(s"alter table $tableName drop partition (year='2021', month='10', day='01')")
|
||||||
|
|||||||
@@ -18,13 +18,15 @@
|
|||||||
package org.apache.spark.sql.hudi
|
package org.apache.spark.sql.hudi
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
|
|
||||||
import org.apache.log4j.Level
|
import org.apache.log4j.Level
|
||||||
|
import org.apache.spark.sql.catalyst.util.DateTimeUtils
|
||||||
import org.apache.spark.sql.{Row, SparkSession}
|
import org.apache.spark.sql.{Row, SparkSession}
|
||||||
import org.apache.spark.util.Utils
|
import org.apache.spark.util.Utils
|
||||||
import org.scalactic.source
|
import org.scalactic.source
|
||||||
import org.scalatest.{BeforeAndAfterAll, FunSuite, Tag}
|
import org.scalatest.{BeforeAndAfterAll, FunSuite, Tag}
|
||||||
|
|
||||||
|
import java.util.TimeZone
|
||||||
|
|
||||||
class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
||||||
org.apache.log4j.Logger.getRootLogger.setLevel(Level.WARN)
|
org.apache.log4j.Logger.getRootLogger.setLevel(Level.WARN)
|
||||||
|
|
||||||
@@ -34,6 +36,7 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
|||||||
dir
|
dir
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TimeZone.setDefault(DateTimeUtils.getTimeZone("CTT"))
|
||||||
protected lazy val spark: SparkSession = SparkSession.builder()
|
protected lazy val spark: SparkSession = SparkSession.builder()
|
||||||
.master("local[1]")
|
.master("local[1]")
|
||||||
.appName("hoodie sql test")
|
.appName("hoodie sql test")
|
||||||
@@ -43,6 +46,7 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
|||||||
.config("hoodie.upsert.shuffle.parallelism", "4")
|
.config("hoodie.upsert.shuffle.parallelism", "4")
|
||||||
.config("hoodie.delete.shuffle.parallelism", "4")
|
.config("hoodie.delete.shuffle.parallelism", "4")
|
||||||
.config("spark.sql.warehouse.dir", sparkWareHouse.getCanonicalPath)
|
.config("spark.sql.warehouse.dir", sparkWareHouse.getCanonicalPath)
|
||||||
|
.config("spark.sql.session.timeZone", "CTT")
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
private var tableId = 0
|
private var tableId = 0
|
||||||
@@ -92,6 +96,19 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
|
|||||||
assertResult(true)(hasException)
|
assertResult(true)(hasException)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected def checkExceptionContain(sql: String)(errorMsg: String): Unit = {
|
||||||
|
var hasException = false
|
||||||
|
try {
|
||||||
|
spark.sql(sql)
|
||||||
|
} catch {
|
||||||
|
case e: Throwable =>
|
||||||
|
assertResult(true)(e.getMessage.contains(errorMsg))
|
||||||
|
hasException = true
|
||||||
|
}
|
||||||
|
assertResult(true)(hasException)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
protected def removeQuotes(value: Any): Any = {
|
protected def removeQuotes(value: Any): Any = {
|
||||||
value match {
|
value match {
|
||||||
case s: String => s.stripPrefix("'").stripSuffix("'")
|
case s: String => s.stripPrefix("'").stripSuffix("'")
|
||||||
|
|||||||
@@ -353,19 +353,7 @@ class TestMergeIntoTable2 extends TestHoodieSqlBase {
|
|||||||
|""".stripMargin
|
|""".stripMargin
|
||||||
|
|
||||||
if (HoodieSqlUtils.isSpark3) {
|
if (HoodieSqlUtils.isSpark3) {
|
||||||
checkException(mergeSql)(
|
checkExceptionContain(mergeSql)("Columns aliases are not allowed in MERGE")
|
||||||
"\nColumns aliases are not allowed in MERGE.(line 5, pos 5)\n\n" +
|
|
||||||
"== SQL ==\n\r\n" +
|
|
||||||
s" merge into $tableName\r\n" +
|
|
||||||
" using (\r\n" +
|
|
||||||
" select 1, 'a1', 10, 1000, '1'\r\n" +
|
|
||||||
" ) s0(id,name,price,ts,flag)\r\n" +
|
|
||||||
"-----^^^\n" +
|
|
||||||
s" on s0.id = $tableName.id\r\n" +
|
|
||||||
" when matched and flag = '1' then update set\r\n" +
|
|
||||||
" id = s0.id, name = s0.name, price = s0.price, ts = s0.ts\r\n" +
|
|
||||||
" when not matched and flag = '1' then insert *\r\n"
|
|
||||||
)
|
|
||||||
} else {
|
} else {
|
||||||
spark.sql(mergeSql)
|
spark.sql(mergeSql)
|
||||||
checkAnswer(s"select id, name, price, ts from $tableName")(
|
checkAnswer(s"select id, name, price, ts from $tableName")(
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
|)""".stripMargin)
|
|)""".stripMargin)
|
||||||
|
|
||||||
checkException(
|
checkExceptionContain(
|
||||||
s"""
|
s"""
|
||||||
|merge into $tableName t0
|
|merge into $tableName t0
|
||||||
|using ( select 1 as id, 'a1' as name, 12 as price) s0
|
|using ( select 1 as id, 'a1' as name, 12 as price) s0
|
||||||
@@ -106,7 +106,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
|when matched then update set price = s0.price
|
|when matched then update set price = s0.price
|
||||||
""".stripMargin)(
|
""".stripMargin)(
|
||||||
"Missing specify value for the preCombineField: _ts in merge-into update action. " +
|
"Missing specify value for the preCombineField: _ts in merge-into update action. " +
|
||||||
"You should add '... update set _ts = xx....' to the when-matched clause.;")
|
"You should add '... update set _ts = xx....' to the when-matched clause.")
|
||||||
|
|
||||||
val tableName2 = generateTableName
|
val tableName2 = generateTableName
|
||||||
spark.sql(
|
spark.sql(
|
||||||
@@ -123,7 +123,7 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
| preCombineField = '_ts'
|
| preCombineField = '_ts'
|
||||||
|)""".stripMargin)
|
|)""".stripMargin)
|
||||||
|
|
||||||
checkException(
|
checkExceptionContain(
|
||||||
s"""
|
s"""
|
||||||
|merge into $tableName2 t0
|
|merge into $tableName2 t0
|
||||||
|using ( select 1 as id, 'a1' as name, 12 as price, 1000 as ts) s0
|
|using ( select 1 as id, 'a1' as name, 12 as price, 1000 as ts) s0
|
||||||
@@ -132,6 +132,6 @@ class TestPartialUpdateForMergeInto extends TestHoodieSqlBase {
|
|||||||
""".stripMargin)(
|
""".stripMargin)(
|
||||||
"Missing specify the value for target field: 'id' in merge into update action for MOR table. " +
|
"Missing specify the value for target field: 'id' in merge into update action for MOR table. " +
|
||||||
"Currently we cannot support partial update for MOR, please complete all the target fields " +
|
"Currently we cannot support partial update for MOR, please complete all the target fields " +
|
||||||
"just like '...update set id = s0.id, name = s0.name ....';")
|
"just like '...update set id = s0.id, name = s0.name ....'")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -82,4 +82,8 @@ class Spark2Adapter extends SparkAdapter {
|
|||||||
override def createLike(left: Expression, right: Expression): Expression = {
|
override def createLike(left: Expression, right: Expression): Expression = {
|
||||||
Like(left, right)
|
Like(left, right)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String] = {
|
||||||
|
throw new IllegalStateException(s"Should not call ParserInterface#parseMultipartIdentifier for spark2")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.spark3.internal;
|
||||||
|
|
||||||
|
import org.apache.spark.sql.catalyst.plans.logical.InsertIntoStatement;
|
||||||
|
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
|
||||||
|
import scala.Option;
|
||||||
|
import scala.collection.Seq;
|
||||||
|
import scala.collection.immutable.Map;
|
||||||
|
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
|
||||||
|
public class ReflectUtil {
|
||||||
|
|
||||||
|
public static InsertIntoStatement createInsertInto(boolean isSpark30, LogicalPlan table, Map<String, Option<String>> partition, Seq<String> userSpecifiedCols,
|
||||||
|
LogicalPlan query, boolean overwrite, boolean ifPartitionNotExists) {
|
||||||
|
try {
|
||||||
|
if (isSpark30) {
|
||||||
|
Constructor<InsertIntoStatement> constructor = InsertIntoStatement.class.getConstructor(
|
||||||
|
LogicalPlan.class, Map.class, LogicalPlan.class, boolean.class, boolean.class);
|
||||||
|
return constructor.newInstance(table, partition, query, overwrite, ifPartitionNotExists);
|
||||||
|
} else {
|
||||||
|
Constructor<InsertIntoStatement> constructor = InsertIntoStatement.class.getConstructor(
|
||||||
|
LogicalPlan.class, Map.class, Seq.class, LogicalPlan.class, boolean.class, boolean.class);
|
||||||
|
return constructor.newInstance(table, partition, userSpecifiedCols, query, overwrite, ifPartitionNotExists);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Error in create InsertIntoStatement", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -19,10 +19,13 @@ package org.apache.spark.sql.adapter
|
|||||||
|
|
||||||
import org.apache.hudi.Spark3RowSerDe
|
import org.apache.hudi.Spark3RowSerDe
|
||||||
import org.apache.hudi.client.utils.SparkRowSerDe
|
import org.apache.hudi.client.utils.SparkRowSerDe
|
||||||
|
import org.apache.hudi.spark3.internal.ReflectUtil
|
||||||
|
import org.apache.spark.SPARK_VERSION
|
||||||
import org.apache.spark.sql.Row
|
import org.apache.spark.sql.Row
|
||||||
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
|
||||||
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||||
import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
|
import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
|
||||||
|
import org.apache.spark.sql.catalyst.parser.ParserInterface
|
||||||
import org.apache.spark.sql.catalyst.plans.JoinType
|
import org.apache.spark.sql.catalyst.plans.JoinType
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, JoinHint, LogicalPlan}
|
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, JoinHint, LogicalPlan}
|
||||||
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
|
||||||
@@ -67,15 +70,16 @@ class Spark3Adapter extends SparkAdapter {
|
|||||||
override def getInsertIntoChildren(plan: LogicalPlan):
|
override def getInsertIntoChildren(plan: LogicalPlan):
|
||||||
Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)] = {
|
Option[(LogicalPlan, Map[String, Option[String]], LogicalPlan, Boolean, Boolean)] = {
|
||||||
plan match {
|
plan match {
|
||||||
case InsertIntoStatement(table, partitionSpec, query, overwrite, ifPartitionNotExists) =>
|
case insert: InsertIntoStatement =>
|
||||||
Some((table, partitionSpec, query, overwrite, ifPartitionNotExists))
|
Some((insert.table, insert.partitionSpec, insert.query, insert.overwrite, insert.ifPartitionNotExists))
|
||||||
case _=> None
|
case _ =>
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
override def createInsertInto(table: LogicalPlan, partition: Map[String, Option[String]],
|
override def createInsertInto(table: LogicalPlan, partition: Map[String, Option[String]],
|
||||||
query: LogicalPlan, overwrite: Boolean, ifPartitionNotExists: Boolean): LogicalPlan = {
|
query: LogicalPlan, overwrite: Boolean, ifPartitionNotExists: Boolean): LogicalPlan = {
|
||||||
InsertIntoStatement(table, partition, query, overwrite, ifPartitionNotExists)
|
ReflectUtil.createInsertInto(SPARK_VERSION.startsWith("3.0"), table, partition, Seq.empty[String], query, overwrite, ifPartitionNotExists)
|
||||||
}
|
}
|
||||||
|
|
||||||
override def createSparkParsePartitionUtil(conf: SQLConf): SparkParsePartitionUtil = {
|
override def createSparkParsePartitionUtil(conf: SQLConf): SparkParsePartitionUtil = {
|
||||||
@@ -85,4 +89,8 @@ class Spark3Adapter extends SparkAdapter {
|
|||||||
override def createLike(left: Expression, right: Expression): Expression = {
|
override def createLike(left: Expression, right: Expression): Expression = {
|
||||||
new Like(left, right)
|
new Like(left, right)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String] = {
|
||||||
|
parser.parseMultipartIdentifier(sqlText)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.spark3.internal;
|
||||||
|
|
||||||
|
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||||
|
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation;
|
||||||
|
import org.apache.spark.sql.catalyst.plans.logical.InsertIntoStatement;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests {@link ReflectUtil}.
|
||||||
|
*/
|
||||||
|
public class TestReflectUtil extends HoodieClientTestBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDataSourceWriterExtraCommitMetadata() throws Exception {
|
||||||
|
SparkSession spark = sqlContext.sparkSession();
|
||||||
|
|
||||||
|
String insertIntoSql = "insert into test_reflect_util values (1, 'z3', 1, '2021')";
|
||||||
|
InsertIntoStatement statement = (InsertIntoStatement) spark.sessionState().sqlParser().parsePlan(insertIntoSql);
|
||||||
|
|
||||||
|
InsertIntoStatement newStatment = ReflectUtil.createInsertInto(
|
||||||
|
spark.version().startsWith("3.0"),
|
||||||
|
statement.table(),
|
||||||
|
statement.partitionSpec(),
|
||||||
|
scala.collection.immutable.List.empty(),
|
||||||
|
statement.query(),
|
||||||
|
statement.overwrite(),
|
||||||
|
statement.ifPartitionNotExists());
|
||||||
|
|
||||||
|
Assertions.assertTrue(
|
||||||
|
((UnresolvedRelation)newStatment.table()).multipartIdentifier().contains("test_reflect_util"));
|
||||||
|
|
||||||
|
if (!spark.version().startsWith("3.0")) {
|
||||||
|
Assertions.assertTrue(newStatment.userSpecifiedCols().isEmpty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -153,6 +153,12 @@
|
|||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-core_${scala.binary.version}</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Needed for running HiveServer for Tests -->
|
<!-- Needed for running HiveServer for Tests -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.eclipse.jetty.aggregate</groupId>
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
|
|||||||
@@ -36,7 +36,19 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
|||||||
public class TestParquet2SparkSchemaUtils {
|
public class TestParquet2SparkSchemaUtils {
|
||||||
private final SparkToParquetSchemaConverter spark2ParquetConverter =
|
private final SparkToParquetSchemaConverter spark2ParquetConverter =
|
||||||
new SparkToParquetSchemaConverter(new SQLConf());
|
new SparkToParquetSchemaConverter(new SQLConf());
|
||||||
private final SparkSqlParser parser = new SparkSqlParser(new SQLConf());
|
private final SparkSqlParser parser = createSqlParser();
|
||||||
|
|
||||||
|
private static SparkSqlParser createSqlParser() {
|
||||||
|
try {
|
||||||
|
return SparkSqlParser.class.getDeclaredConstructor(SQLConf.class).newInstance(new SQLConf());
|
||||||
|
} catch (Exception ne) {
|
||||||
|
try { // For spark 3.1, there is no constructor with SQLConf, use the default constructor
|
||||||
|
return SparkSqlParser.class.getDeclaredConstructor().newInstance();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testConvertPrimitiveType() {
|
public void testConvertPrimitiveType() {
|
||||||
|
|||||||
15
pom.xml
15
pom.xml
@@ -99,7 +99,7 @@
|
|||||||
<junit.platform.version>1.7.0-M1</junit.platform.version>
|
<junit.platform.version>1.7.0-M1</junit.platform.version>
|
||||||
<mockito.jupiter.version>3.3.3</mockito.jupiter.version>
|
<mockito.jupiter.version>3.3.3</mockito.jupiter.version>
|
||||||
<log4j.version>1.2.17</log4j.version>
|
<log4j.version>1.2.17</log4j.version>
|
||||||
<slf4j.version>1.7.15</slf4j.version>
|
<slf4j.version>1.7.30</slf4j.version>
|
||||||
<joda.version>2.9.9</joda.version>
|
<joda.version>2.9.9</joda.version>
|
||||||
<hadoop.version>2.7.3</hadoop.version>
|
<hadoop.version>2.7.3</hadoop.version>
|
||||||
<hive.groupid>org.apache.hive</hive.groupid>
|
<hive.groupid>org.apache.hive</hive.groupid>
|
||||||
@@ -114,7 +114,7 @@
|
|||||||
<sparkbundle.version>${spark2bundle.version}</sparkbundle.version>
|
<sparkbundle.version>${spark2bundle.version}</sparkbundle.version>
|
||||||
<flink.version>1.13.1</flink.version>
|
<flink.version>1.13.1</flink.version>
|
||||||
<spark2.version>2.4.4</spark2.version>
|
<spark2.version>2.4.4</spark2.version>
|
||||||
<spark3.version>3.0.0</spark3.version>
|
<spark3.version>3.1.2</spark3.version>
|
||||||
<spark2bundle.version></spark2bundle.version>
|
<spark2bundle.version></spark2bundle.version>
|
||||||
<spark3bundle.version>3</spark3bundle.version>
|
<spark3bundle.version>3</spark3bundle.version>
|
||||||
<hudi.spark.module>hudi-spark2</hudi.spark.module>
|
<hudi.spark.module>hudi-spark2</hudi.spark.module>
|
||||||
@@ -1472,6 +1472,7 @@
|
|||||||
<scala.version>${scala12.version}</scala.version>
|
<scala.version>${scala12.version}</scala.version>
|
||||||
<scala.binary.version>2.12</scala.binary.version>
|
<scala.binary.version>2.12</scala.binary.version>
|
||||||
<hudi.spark.module>hudi-spark3</hudi.spark.module>
|
<hudi.spark.module>hudi-spark3</hudi.spark.module>
|
||||||
|
<scalatest.version>3.1.0</scalatest.version>
|
||||||
<kafka.version>2.4.1</kafka.version>
|
<kafka.version>2.4.1</kafka.version>
|
||||||
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
|
<fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
|
||||||
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
|
<fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
|
||||||
@@ -1487,6 +1488,16 @@
|
|||||||
</activation>
|
</activation>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark3.0.x</id>
|
||||||
|
<!-- for spark 3.0.x we need override the follow propeprties to package and run test-->
|
||||||
|
<properties>
|
||||||
|
<spark3.version>3.0.0</spark3.version>
|
||||||
|
<spark.version>${spark3.version}</spark.version>
|
||||||
|
<scalatest.version>3.0.1</scalatest.version>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
<profile>
|
<profile>
|
||||||
<id>skipShadeSources</id>
|
<id>skipShadeSources</id>
|
||||||
<properties>
|
<properties>
|
||||||
|
|||||||
Reference in New Issue
Block a user