[HUDI-91][HUDI-12]Migrate to spark 2.4.4, migrate to spark-avro library instead of databricks-avro, add support for Decimal/Date types
- Upgrade Spark to 2.4.4, Parquet to 1.10.1, Avro to 1.8.2 - Remove spark-avro from hudi-spark-bundle. Users need to provide --packages org.apache.spark:spark-avro:2.4.4 when running spark-shell or spark-submit - Replace com.databricks:spark-avro with org.apache.spark:spark-avro - Shade avro in hudi-hadoop-mr-bundle to make sure it does not conflict with hive's avro version.
This commit is contained in:
committed by
Balaji Varadarajan
parent
d9675c4ec0
commit
ad50008a59
20
LICENSE
20
LICENSE
@@ -241,23 +241,13 @@ This product includes code from https://github.com/twitter/commons/blob/master/s
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
=================================================================================================
|
=================================================================================================
|
||||||
|
|
||||||
This product includes code from Databricks spark-avro with the below license
|
This product includes code from Apache Spark
|
||||||
|
|
||||||
* org.apache.hudi.AvroConversionHelper copied from classes in com/databricks/spark/avro package
|
* org.apache.hudi.AvroConversionHelper copied from classes in org/apache/spark/sql/avro package
|
||||||
|
|
||||||
Copyright 2014 Databricks
|
Copyright: 2014 and onwards The Apache Software Foundation
|
||||||
|
Home page: http://spark.apache.org/
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
License: http://www.apache.org/licenses/LICENSE-2.0
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -174,6 +174,8 @@ public class SchemaUtil {
|
|||||||
final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType().getDecimalMetadata();
|
final DecimalMetadata decimalMetadata = parquetType.asPrimitiveType().getDecimalMetadata();
|
||||||
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).append(" , ")
|
return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).append(" , ")
|
||||||
.append(decimalMetadata.getScale()).append(")").toString();
|
.append(decimalMetadata.getScale()).append(")").toString();
|
||||||
|
} else if (originalType == OriginalType.DATE) {
|
||||||
|
return field.append("DATE").toString();
|
||||||
}
|
}
|
||||||
// TODO - fix the method naming here
|
// TODO - fix the method naming here
|
||||||
return parquetPrimitiveTypeName.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
|
return parquetPrimitiveTypeName.convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
|
||||||
|
|||||||
@@ -107,7 +107,7 @@ public abstract class ITTestBase {
|
|||||||
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
|
.append(" --master local[2] --driver-class-path ").append(HADOOP_CONF_DIR)
|
||||||
.append(
|
.append(
|
||||||
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
" --conf spark.sql.hive.convertMetastoreParquet=false --deploy-mode client --driver-memory 1G --executor-memory 1G --num-executors 1 ")
|
||||||
.append(" --packages com.databricks:spark-avro_2.11:4.0.0 ").append(" -i ").append(commandFile).toString();
|
.append(" --packages org.apache.spark:spark-avro_2.11:2.4.4 ").append(" -i ").append(commandFile).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
static String getPrestoConsoleCommand(String commandFile) {
|
static String getPrestoConsoleCommand(String commandFile) {
|
||||||
|
|||||||
@@ -213,9 +213,9 @@
|
|||||||
|
|
||||||
<!-- Spark (Packages) -->
|
<!-- Spark (Packages) -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.databricks</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-avro_2.11</artifactId>
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
<version>4.0.0</version>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Hadoop -->
|
<!-- Hadoop -->
|
||||||
@@ -239,8 +239,19 @@
|
|||||||
<!-- Hive -->
|
<!-- Hive -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
<artifactId>hive-service</artifactId>
|
<artifactId>hive-exec</artifactId>
|
||||||
<version>${hive.version}</version>
|
<version>${hive.version}</version>
|
||||||
|
<classifier>${hive.exec.classifier}</classifier>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>javax.mail</groupId>
|
||||||
|
<artifactId>mail</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.eclipse.jetty.aggregate</groupId>
|
||||||
|
<artifactId>*</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>${hive.groupid}</groupId>
|
<groupId>${hive.groupid}</groupId>
|
||||||
|
|||||||
@@ -1,13 +1,12 @@
|
|||||||
/*
|
/*
|
||||||
* This code is copied from com.databricks:spark-avro with following license
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* Copyright 2014 Databricks
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
@@ -22,14 +21,15 @@ import java.nio.ByteBuffer
|
|||||||
import java.sql.{Date, Timestamp}
|
import java.sql.{Date, Timestamp}
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
import com.databricks.spark.avro.SchemaConverters
|
import org.apache.avro.Conversions.DecimalConversion
|
||||||
import com.databricks.spark.avro.SchemaConverters.IncompatibleSchemaException
|
import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
|
||||||
import org.apache.avro.{Schema, SchemaBuilder}
|
import org.apache.avro.{LogicalTypes, Schema}
|
||||||
import org.apache.avro.Schema.Type._
|
import org.apache.avro.Schema.Type._
|
||||||
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
import org.apache.avro.generic.GenericData.{Fixed, Record}
|
||||||
import org.apache.avro.generic.{GenericData, GenericRecord}
|
import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
|
||||||
import org.apache.hudi.AvroConversionUtils.getNewRecordNamespace
|
import org.apache.hudi.AvroConversionUtils.getNewRecordNamespace
|
||||||
import org.apache.spark.sql.Row
|
import org.apache.spark.sql.Row
|
||||||
|
import org.apache.spark.sql.avro.{IncompatibleSchemaException, SchemaConverters}
|
||||||
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
import org.apache.spark.sql.catalyst.expressions.GenericRow
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
@@ -37,6 +37,16 @@ import scala.collection.JavaConverters._
|
|||||||
|
|
||||||
object AvroConversionHelper {
|
object AvroConversionHelper {
|
||||||
|
|
||||||
|
private def createDecimal(decimal: java.math.BigDecimal, precision: Int, scale: Int): Decimal = {
|
||||||
|
if (precision <= Decimal.MAX_LONG_DIGITS) {
|
||||||
|
// Constructs a `Decimal` with an unscaled `Long` value if possible.
|
||||||
|
Decimal(decimal.unscaledValue().longValue(), precision, scale)
|
||||||
|
} else {
|
||||||
|
// Otherwise, resorts to an unscaled `BigInteger` instead.
|
||||||
|
Decimal(decimal, precision, scale)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* Returns a converter function to convert row in avro format to GenericRow of catalyst.
|
* Returns a converter function to convert row in avro format to GenericRow of catalyst.
|
||||||
@@ -76,7 +86,50 @@ object AvroConversionHelper {
|
|||||||
byteBuffer.get(bytes)
|
byteBuffer.get(bytes)
|
||||||
bytes
|
bytes
|
||||||
}
|
}
|
||||||
|
case (d: DecimalType, FIXED) =>
|
||||||
|
(item: AnyRef) =>
|
||||||
|
if (item == null) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
val decimalConversion = new DecimalConversion
|
||||||
|
val bigDecimal = decimalConversion.fromFixed(item.asInstanceOf[GenericFixed], avroSchema,
|
||||||
|
LogicalTypes.decimal(d.precision, d.scale))
|
||||||
|
createDecimal(bigDecimal, d.precision, d.scale)
|
||||||
|
}
|
||||||
|
case (d: DecimalType, BYTES) =>
|
||||||
|
(item: AnyRef) =>
|
||||||
|
if (item == null) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
val decimalConversion = new DecimalConversion
|
||||||
|
val bigDecimal = decimalConversion.fromBytes(item.asInstanceOf[ByteBuffer], avroSchema,
|
||||||
|
LogicalTypes.decimal(d.precision, d.scale))
|
||||||
|
createDecimal(bigDecimal, d.precision, d.scale)
|
||||||
|
}
|
||||||
|
case (DateType, INT) =>
|
||||||
|
(item: AnyRef) =>
|
||||||
|
if (item == null) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
new Date(item.asInstanceOf[Long])
|
||||||
|
}
|
||||||
|
case (TimestampType, LONG) =>
|
||||||
|
(item: AnyRef) =>
|
||||||
|
if (item == null) {
|
||||||
|
null
|
||||||
|
} else {
|
||||||
|
avroSchema.getLogicalType match {
|
||||||
|
case _: TimestampMillis =>
|
||||||
|
new Timestamp(item.asInstanceOf[Long])
|
||||||
|
case _: TimestampMicros =>
|
||||||
|
new Timestamp(item.asInstanceOf[Long] / 1000)
|
||||||
|
case null =>
|
||||||
|
new Timestamp(item.asInstanceOf[Long])
|
||||||
|
case other =>
|
||||||
|
throw new IncompatibleSchemaException(
|
||||||
|
s"Cannot convert Avro logical type ${other} to Catalyst Timestamp type.")
|
||||||
|
}
|
||||||
|
}
|
||||||
case (struct: StructType, RECORD) =>
|
case (struct: StructType, RECORD) =>
|
||||||
val length = struct.fields.length
|
val length = struct.fields.length
|
||||||
val converters = new Array[AnyRef => AnyRef](length)
|
val converters = new Array[AnyRef => AnyRef](length)
|
||||||
@@ -216,7 +269,8 @@ object AvroConversionHelper {
|
|||||||
createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
|
createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
|
||||||
}
|
}
|
||||||
|
|
||||||
def createConverterToAvro(dataType: DataType,
|
def createConverterToAvro(avroSchema: Schema,
|
||||||
|
dataType: DataType,
|
||||||
structName: String,
|
structName: String,
|
||||||
recordNamespace: String): Any => Any = {
|
recordNamespace: String): Any => Any = {
|
||||||
dataType match {
|
dataType match {
|
||||||
@@ -231,13 +285,22 @@ object AvroConversionHelper {
|
|||||||
if (item == null) null else item.asInstanceOf[Byte].intValue
|
if (item == null) null else item.asInstanceOf[Byte].intValue
|
||||||
case ShortType => (item: Any) =>
|
case ShortType => (item: Any) =>
|
||||||
if (item == null) null else item.asInstanceOf[Short].intValue
|
if (item == null) null else item.asInstanceOf[Short].intValue
|
||||||
case _: DecimalType => (item: Any) => if (item == null) null else item.toString
|
case dec: DecimalType => (item: Any) =>
|
||||||
|
Option(item).map { i =>
|
||||||
|
val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
|
||||||
|
val decimalConversions = new DecimalConversion()
|
||||||
|
decimalConversions.toFixed(bigDecimalValue, avroSchema.getField(structName).schema().getTypes.get(0),
|
||||||
|
LogicalTypes.decimal(dec.precision, dec.scale))
|
||||||
|
}.orNull
|
||||||
case TimestampType => (item: Any) =>
|
case TimestampType => (item: Any) =>
|
||||||
if (item == null) null else item.asInstanceOf[Timestamp].getTime
|
// Convert time to microseconds since spark-avro by default converts TimestampType to
|
||||||
|
// Avro Logical TimestampMicros
|
||||||
|
Option(item).map(_.asInstanceOf[Timestamp].getTime * 1000).orNull
|
||||||
case DateType => (item: Any) =>
|
case DateType => (item: Any) =>
|
||||||
if (item == null) null else item.asInstanceOf[Date].getTime
|
Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
|
||||||
case ArrayType(elementType, _) =>
|
case ArrayType(elementType, _) =>
|
||||||
val elementConverter = createConverterToAvro(
|
val elementConverter = createConverterToAvro(
|
||||||
|
avroSchema,
|
||||||
elementType,
|
elementType,
|
||||||
structName,
|
structName,
|
||||||
getNewRecordNamespace(elementType, recordNamespace, structName))
|
getNewRecordNamespace(elementType, recordNamespace, structName))
|
||||||
@@ -258,6 +321,7 @@ object AvroConversionHelper {
|
|||||||
}
|
}
|
||||||
case MapType(StringType, valueType, _) =>
|
case MapType(StringType, valueType, _) =>
|
||||||
val valueConverter = createConverterToAvro(
|
val valueConverter = createConverterToAvro(
|
||||||
|
avroSchema,
|
||||||
valueType,
|
valueType,
|
||||||
structName,
|
structName,
|
||||||
getNewRecordNamespace(valueType, recordNamespace, structName))
|
getNewRecordNamespace(valueType, recordNamespace, structName))
|
||||||
@@ -273,11 +337,10 @@ object AvroConversionHelper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
case structType: StructType =>
|
case structType: StructType =>
|
||||||
val builder = SchemaBuilder.record(structName).namespace(recordNamespace)
|
val schema: Schema = SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
|
||||||
val schema: Schema = SchemaConverters.convertStructToAvro(
|
|
||||||
structType, builder, recordNamespace)
|
|
||||||
val fieldConverters = structType.fields.map(field =>
|
val fieldConverters = structType.fields.map(field =>
|
||||||
createConverterToAvro(
|
createConverterToAvro(
|
||||||
|
avroSchema,
|
||||||
field.dataType,
|
field.dataType,
|
||||||
field.name,
|
field.name,
|
||||||
getNewRecordNamespace(field.dataType, recordNamespace, field.name)))
|
getNewRecordNamespace(field.dataType, recordNamespace, field.name)))
|
||||||
|
|||||||
@@ -17,11 +17,11 @@
|
|||||||
|
|
||||||
package org.apache.hudi
|
package org.apache.hudi
|
||||||
|
|
||||||
import com.databricks.spark.avro.SchemaConverters
|
|
||||||
import org.apache.avro.generic.GenericRecord
|
import org.apache.avro.generic.GenericRecord
|
||||||
import org.apache.avro.{Schema, SchemaBuilder}
|
|
||||||
import org.apache.hudi.common.model.HoodieKey
|
import org.apache.hudi.common.model.HoodieKey
|
||||||
|
import org.apache.avro.Schema
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
|
import org.apache.spark.sql.avro.SchemaConverters
|
||||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
|
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
|
||||||
@@ -30,13 +30,20 @@ import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
|
|||||||
object AvroConversionUtils {
|
object AvroConversionUtils {
|
||||||
|
|
||||||
def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
|
def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
|
||||||
|
val avroSchema = convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
|
||||||
|
createRdd(df, avroSchema.toString, structName, recordNamespace)
|
||||||
|
}
|
||||||
|
|
||||||
|
def createRdd(df: DataFrame, avroSchemaAsJsonString: String, structName: String, recordNamespace: String)
|
||||||
|
: RDD[GenericRecord] = {
|
||||||
val dataType = df.schema
|
val dataType = df.schema
|
||||||
val encoder = RowEncoder.apply(dataType).resolveAndBind()
|
val encoder = RowEncoder.apply(dataType).resolveAndBind()
|
||||||
df.queryExecution.toRdd.map(encoder.fromRow)
|
df.queryExecution.toRdd.map(encoder.fromRow)
|
||||||
.mapPartitions { records =>
|
.mapPartitions { records =>
|
||||||
if (records.isEmpty) Iterator.empty
|
if (records.isEmpty) Iterator.empty
|
||||||
else {
|
else {
|
||||||
val convertor = AvroConversionHelper.createConverterToAvro(dataType, structName, recordNamespace)
|
val avroSchema = new Schema.Parser().parse(avroSchemaAsJsonString)
|
||||||
|
val convertor = AvroConversionHelper.createConverterToAvro(avroSchema, dataType, structName, recordNamespace)
|
||||||
records.map { x => convertor(x).asInstanceOf[GenericRecord] }
|
records.map { x => convertor(x).asInstanceOf[GenericRecord] }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -75,11 +82,10 @@ object AvroConversionUtils {
|
|||||||
def convertStructTypeToAvroSchema(structType: StructType,
|
def convertStructTypeToAvroSchema(structType: StructType,
|
||||||
structName: String,
|
structName: String,
|
||||||
recordNamespace: String): Schema = {
|
recordNamespace: String): Schema = {
|
||||||
val builder = SchemaBuilder.record(structName).namespace(recordNamespace)
|
SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace)
|
||||||
SchemaConverters.convertStructToAvro(structType, builder, recordNamespace)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
|
def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
|
||||||
SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType];
|
SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -104,6 +104,7 @@ public class HoodieJavaApp {
|
|||||||
SparkSession spark = SparkSession.builder().appName("Hoodie Spark APP")
|
SparkSession spark = SparkSession.builder().appName("Hoodie Spark APP")
|
||||||
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]").getOrCreate();
|
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]").getOrCreate();
|
||||||
JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());
|
JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
spark.sparkContext().setLogLevel("WARN");
|
||||||
FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
|
FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
|
||||||
|
|
||||||
// Generator of some records to be loaded in.
|
// Generator of some records to be loaded in.
|
||||||
|
|||||||
@@ -161,7 +161,7 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.databricks</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-avro_2.11</artifactId>
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
<scope>provided</scope>
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|||||||
@@ -89,7 +89,7 @@
|
|||||||
</relocation>
|
</relocation>
|
||||||
<relocation>
|
<relocation>
|
||||||
<pattern>org.apache.avro.</pattern>
|
<pattern>org.apache.avro.</pattern>
|
||||||
<shadedPattern>${mr.bundle.avro.shade.prefix}org.apache.avro.</shadedPattern>
|
<shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern>
|
||||||
</relocation>
|
</relocation>
|
||||||
</relocations>
|
</relocations>
|
||||||
<createDependencyReducedPom>false</createDependencyReducedPom>
|
<createDependencyReducedPom>false</createDependencyReducedPom>
|
||||||
@@ -143,17 +143,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.avro</groupId>
|
<groupId>org.apache.avro</groupId>
|
||||||
<artifactId>avro</artifactId>
|
<artifactId>avro</artifactId>
|
||||||
<scope>${mr.bundle.avro.scope}</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<profiles>
|
|
||||||
<profile>
|
|
||||||
<id>mr-bundle-shade-avro</id>
|
|
||||||
<properties>
|
|
||||||
<mr.bundle.avro.scope>compile</mr.bundle.avro.scope>
|
|
||||||
<mr.bundle.avro.shade.prefix>org.apache.hudi.</mr.bundle.avro.shade.prefix>
|
|
||||||
</properties>
|
|
||||||
</profile>
|
|
||||||
</profiles>
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -94,8 +94,6 @@
|
|||||||
<include>org.apache.hive:hive-service-rpc</include>
|
<include>org.apache.hive:hive-service-rpc</include>
|
||||||
<include>org.apache.hive:hive-metastore</include>
|
<include>org.apache.hive:hive-metastore</include>
|
||||||
<include>org.apache.hive:hive-jdbc</include>
|
<include>org.apache.hive:hive-jdbc</include>
|
||||||
|
|
||||||
<include>com.databricks:spark-avro_2.11</include>
|
|
||||||
</includes>
|
</includes>
|
||||||
</artifactSet>
|
</artifactSet>
|
||||||
<relocations>
|
<relocations>
|
||||||
@@ -139,10 +137,6 @@
|
|||||||
<pattern>org.apache.commons.codec.</pattern>
|
<pattern>org.apache.commons.codec.</pattern>
|
||||||
<shadedPattern>org.apache.hudi.org.apache.commons.codec.</shadedPattern>
|
<shadedPattern>org.apache.hudi.org.apache.commons.codec.</shadedPattern>
|
||||||
</relocation>
|
</relocation>
|
||||||
<relocation>
|
|
||||||
<pattern>com.databricks.</pattern>
|
|
||||||
<shadedPattern>org.apache.hudi.com.databricks.</shadedPattern>
|
|
||||||
</relocation>
|
|
||||||
<!-- TODO: Revisit GH ISSUE #533 & PR#633-->
|
<!-- TODO: Revisit GH ISSUE #533 & PR#633-->
|
||||||
</relocations>
|
</relocations>
|
||||||
<filters>
|
<filters>
|
||||||
|
|||||||
20
pom.xml
20
pom.xml
@@ -76,7 +76,7 @@
|
|||||||
<java.version>1.8</java.version>
|
<java.version>1.8</java.version>
|
||||||
<fasterxml.version>2.6.7</fasterxml.version>
|
<fasterxml.version>2.6.7</fasterxml.version>
|
||||||
<glassfish.version>2.17</glassfish.version>
|
<glassfish.version>2.17</glassfish.version>
|
||||||
<parquet.version>1.8.1</parquet.version>
|
<parquet.version>1.10.1</parquet.version>
|
||||||
<junit.version>4.11</junit.version>
|
<junit.version>4.11</junit.version>
|
||||||
<junit-dep.version>4.10</junit-dep.version>
|
<junit-dep.version>4.10</junit-dep.version>
|
||||||
<mockito.version>1.10.19</mockito.version>
|
<mockito.version>1.10.19</mockito.version>
|
||||||
@@ -88,8 +88,8 @@
|
|||||||
<hive.version>2.3.1</hive.version>
|
<hive.version>2.3.1</hive.version>
|
||||||
<hive.exec.classifier>core</hive.exec.classifier>
|
<hive.exec.classifier>core</hive.exec.classifier>
|
||||||
<metrics.version>4.1.1</metrics.version>
|
<metrics.version>4.1.1</metrics.version>
|
||||||
<spark.version>2.1.0</spark.version>
|
<spark.version>2.4.4</spark.version>
|
||||||
<avro.version>1.7.7</avro.version>
|
<avro.version>1.8.2</avro.version>
|
||||||
<scala.version>2.11.8</scala.version>
|
<scala.version>2.11.8</scala.version>
|
||||||
<scala.libversion>2.11</scala.libversion>
|
<scala.libversion>2.11</scala.libversion>
|
||||||
<apache-rat-plugin.version>0.12</apache-rat-plugin.version>
|
<apache-rat-plugin.version>0.12</apache-rat-plugin.version>
|
||||||
@@ -105,8 +105,6 @@
|
|||||||
<skipUTs>${skipTests}</skipUTs>
|
<skipUTs>${skipTests}</skipUTs>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<main.basedir>${project.basedir}</main.basedir>
|
<main.basedir>${project.basedir}</main.basedir>
|
||||||
<mr.bundle.avro.scope>provided</mr.bundle.avro.scope>
|
|
||||||
<mr.bundle.avro.shade.prefix></mr.bundle.avro.shade.prefix>
|
|
||||||
<spark.bundle.hive.scope>provided</spark.bundle.hive.scope>
|
<spark.bundle.hive.scope>provided</spark.bundle.hive.scope>
|
||||||
<spark.bundle.hive.shade.prefix></spark.bundle.hive.shade.prefix>
|
<spark.bundle.hive.shade.prefix></spark.bundle.hive.shade.prefix>
|
||||||
<utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
|
<utilities.bundle.hive.scope>provided</utilities.bundle.hive.scope>
|
||||||
@@ -485,9 +483,10 @@
|
|||||||
|
|
||||||
<!-- Spark (Packages) -->
|
<!-- Spark (Packages) -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.databricks</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-avro_2.11</artifactId>
|
<artifactId>spark-avro_2.11</artifactId>
|
||||||
<version>4.0.0</version>
|
<version>${spark.version}</version>
|
||||||
|
<scope>provided</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Dropwizard Metrics -->
|
<!-- Dropwizard Metrics -->
|
||||||
@@ -934,13 +933,6 @@
|
|||||||
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire-quiet.properties</surefire-log4j.file>
|
<surefire-log4j.file>file://${project.basedir}/src/test/resources/log4j-surefire-quiet.properties</surefire-log4j.file>
|
||||||
</properties>
|
</properties>
|
||||||
</profile>
|
</profile>
|
||||||
<profile>
|
|
||||||
<id>aws-emr-profile</id>
|
|
||||||
<properties>
|
|
||||||
<mr.bundle.avro.scope>compile</mr.bundle.avro.scope>
|
|
||||||
<mr.bundle.avro.shade.prefix>org.apache.hudi.</mr.bundle.avro.shade.prefix>
|
|
||||||
</properties>
|
|
||||||
</profile>
|
|
||||||
<profile>
|
<profile>
|
||||||
<id>javadocs</id>
|
<id>javadocs</id>
|
||||||
<build>
|
<build>
|
||||||
|
|||||||
Reference in New Issue
Block a user