1
0

[HUDI-238] Make Hudi support Scala 2.12 (#1226)

* [HUDI-238] Rename scala related artifactId & add maven profile to support Scala 2.12
This commit is contained in:
wenningd
2020-01-17 14:02:21 -08:00
committed by Balaji Varadarajan
parent 923e2b4a1e
commit 292c1e2ff4
19 changed files with 261 additions and 153 deletions

View File

@@ -245,6 +245,8 @@ This product includes code from Apache Spark
* org.apache.hudi.AvroConversionHelper copied from classes in org/apache/spark/sql/avro package * org.apache.hudi.AvroConversionHelper copied from classes in org/apache/spark/sql/avro package
* dev/change-scala-version.sh copied from https://github.com/apache/spark/blob/branch-2.4/dev/change-scala-version.sh
Copyright: 2014 and onwards The Apache Software Foundation Copyright: 2014 and onwards The Apache Software Foundation
Home page: http://spark.apache.org/ Home page: http://spark.apache.org/
License: http://www.apache.org/licenses/LICENSE-2.0 License: http://www.apache.org/licenses/LICENSE-2.0

66
dev/change-scala-version.sh Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
VALID_VERSIONS=( 2.11 2.12 )
usage() {
echo "Usage: $(basename $0) [-h|--help] <version>
where :
-h| --help Display this help text
valid version values : ${VALID_VERSIONS[*]}
" 1>&2
exit 1
}
if [[ ($# -ne 1) || ( $1 == "--help") || $1 == "-h" ]]; then
usage
fi
TO_VERSION=$1
check_scala_version() {
for i in ${VALID_VERSIONS[*]}; do [ $i = "$1" ] && return 0; done
echo "Invalid Scala version: $1. Valid versions: ${VALID_VERSIONS[*]}" 1>&2
exit 1
}
check_scala_version "$TO_VERSION"
if [ $TO_VERSION = "2.11" ]; then
FROM_VERSION="2.12"
else
FROM_VERSION="2.11"
fi
sed_i() {
sed -e "$1" "$2" > "$2.tmp" && mv "$2.tmp" "$2"
}
export -f sed_i
BASEDIR=$(dirname $0)/..
find "$BASEDIR" -name 'pom.xml' -not -path '*target*' -print \
-exec bash -c "sed_i 's/\(artifactId.*\)_'$FROM_VERSION'/\1_'$TO_VERSION'/g' {}" \;
# Also update <scala.binary.version> in parent POM
# Match any scala binary version to ensure idempotency
sed_i '1,/<scala\.binary\.version>[0-9]*\.[0-9]*</s/<scala\.binary\.version>[0-9]*\.[0-9]*</<scala.binary.version>'$TO_VERSION'</' \
"$BASEDIR/pom.xml"

View File

@@ -26,5 +26,5 @@ hoodie.deltastreamer.schemaprovider.target.schema.file=/var/demo/config/schema.a
# Kafka Source # Kafka Source
hoodie.deltastreamer.source.kafka.topic=stock_ticks hoodie.deltastreamer.source.kafka.topic=stock_ticks
#Kafka props #Kafka props
metadata.broker.list=kafkabroker:9092 bootstrap.servers=kafkabroker:9092
auto.offset.reset=smallest auto.offset.reset=earliest

View File

@@ -57,9 +57,9 @@
<tasks> <tasks>
<copy file="${project.basedir}/../../../../packaging/hudi-hadoop-mr-bundle/target/hudi-hadoop-mr-bundle-${project.version}.jar" tofile="target/hoodie-hadoop-mr-bundle.jar" /> <copy file="${project.basedir}/../../../../packaging/hudi-hadoop-mr-bundle/target/hudi-hadoop-mr-bundle-${project.version}.jar" tofile="target/hoodie-hadoop-mr-bundle.jar" />
<copy file="${project.basedir}/../../../../packaging/hudi-hive-bundle/target/hudi-hive-bundle-${project.version}.jar" tofile="target/hoodie-hive-bundle.jar" /> <copy file="${project.basedir}/../../../../packaging/hudi-hive-bundle/target/hudi-hive-bundle-${project.version}.jar" tofile="target/hoodie-hive-bundle.jar" />
<copy file="${project.basedir}/../../../../packaging/hudi-spark-bundle/target/hudi-spark-bundle-${project.version}.jar" tofile="target/hoodie-spark-bundle.jar" /> <copy file="${project.basedir}/../../../../packaging/hudi-spark-bundle/target/hudi-spark-bundle_${scala.binary.version}-${project.version}.jar" tofile="target/hoodie-spark-bundle.jar" />
<copy <copy
file="${project.basedir}/../../../../packaging/hudi-utilities-bundle/target/hudi-utilities-bundle-${project.version}.jar" file="${project.basedir}/../../../../packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_${scala.binary.version}-${project.version}.jar"
tofile="target/hoodie-utilities.jar"/> tofile="target/hoodie-utilities.jar"/>
</tasks> </tasks>
</configuration> </configuration>

View File

@@ -42,7 +42,7 @@
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-bundle</artifactId> <artifactId>hudi-spark-bundle_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
</dependencies> </dependencies>

View File

@@ -147,7 +147,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-utilities</artifactId> <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
@@ -160,16 +160,22 @@
<dependency> <dependency>
<groupId>org.apache.parquet</groupId> <groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId> <artifactId>parquet-avro</artifactId>
<exclusions>
<exclusion>
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<!-- Spark --> <!-- Spark -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>

View File

@@ -95,11 +95,11 @@
<!-- Spark --> <!-- Spark -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!-- Dropwizard Metrics --> <!-- Dropwizard Metrics -->

View File

@@ -54,7 +54,7 @@
<!-- Hoodie --> <!-- Hoodie -->
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
@@ -83,7 +83,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<classifier>tests</classifier> <classifier>tests</classifier>
<type>test-jar</type> <type>test-jar</type>

View File

@@ -23,7 +23,7 @@
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_2.11</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
@@ -190,13 +190,20 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.module</groupId> <groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.11</artifactId> <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!-- Avro --> <!-- Avro -->
<dependency> <dependency>
<groupId>org.apache.avro</groupId> <groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId> <artifactId>avro</artifactId>
<exclusions>
<exclusion>
<!-- this version to conflict to spark-core_2.12 -->
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<!-- Parquet --> <!-- Parquet -->
@@ -208,17 +215,17 @@
<!-- Spark --> <!-- Spark -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!-- Spark (Packages) --> <!-- Spark (Packages) -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.11</artifactId> <artifactId>spark-avro_${scala.binary.version}</artifactId>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
@@ -293,7 +300,7 @@
<dependency> <dependency>
<groupId>org.scalatest</groupId> <groupId>org.scalatest</groupId>
<artifactId>scalatest_2.11</artifactId> <artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version> <version>${scalatest.version}</version>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>

View File

@@ -25,7 +25,7 @@ function error_exit {
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
#Ensure we pick the right jar even for hive11 builds #Ensure we pick the right jar even for hive11 builds
HUDI_JAR=`ls -c $DIR/../packaging/hudi-spark-bundle/target/hudi-spark-bundle-*.jar | grep -v source | head -1` HUDI_JAR=`ls -c $DIR/../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v source | head -1`
if [ -z "$HADOOP_CONF_DIR" ]; then if [ -z "$HADOOP_CONF_DIR" ]; then
echo "setting hadoop conf dir" echo "setting hadoop conf dir"

View File

@@ -23,7 +23,7 @@
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>hudi-utilities</artifactId> <artifactId>hudi-utilities_2.11</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
@@ -109,7 +109,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
@@ -128,19 +128,25 @@
<!-- Fasterxml --> <!-- Fasterxml -->
<dependency> <dependency>
<groupId>com.fasterxml.jackson.module</groupId> <groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.11</artifactId> <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
</dependency> </dependency>
<!-- Parquet --> <!-- Parquet -->
<dependency> <dependency>
<groupId>org.apache.parquet</groupId> <groupId>org.apache.parquet</groupId>
<artifactId>parquet-avro</artifactId> <artifactId>parquet-avro</artifactId>
<exclusions>
<exclusion>
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
</exclusion>
</exclusions>
</dependency> </dependency>
<!-- Spark --> <!-- Spark -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>javax.servlet</groupId> <groupId>javax.servlet</groupId>
@@ -151,7 +157,7 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>javax.servlet</groupId> <groupId>javax.servlet</groupId>
@@ -162,20 +168,26 @@
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.11</artifactId> <artifactId>spark-avro_${scala.binary.version}</artifactId>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId> <artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version> <version>${spark.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-8_2.11</artifactId> <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version> <version>${spark.version}</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<classifier>tests</classifier>
</dependency>
<!-- Dropwizard Metrics --> <!-- Dropwizard Metrics -->
<dependency> <dependency>
@@ -197,8 +209,8 @@
<dependency> <dependency>
<groupId>com.twitter</groupId> <groupId>com.twitter</groupId>
<artifactId>bijection-avro_2.11</artifactId> <artifactId>bijection-avro_${scala.binary.version}</artifactId>
<version>0.9.2</version> <version>0.9.3</version>
</dependency> </dependency>
<!-- Kafka --> <!-- Kafka -->
@@ -223,6 +235,13 @@
<version>3.0.0</version> <version>3.0.0</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_${scala.binary.version}</artifactId>
<version>${kafka.version}</version>
<scope>test</scope>
</dependency>
<!-- Httpcomponents --> <!-- Httpcomponents -->
<dependency> <dependency>
<groupId>org.apache.httpcomponents</groupId> <groupId>org.apache.httpcomponents</groupId>

View File

@@ -24,16 +24,17 @@ import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import io.confluent.kafka.serializers.KafkaAvroDecoder; import io.confluent.kafka.serializers.KafkaAvroDeserializer;
import kafka.serializer.StringDecoder;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.kafka.KafkaUtils; import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka.OffsetRange; import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.apache.spark.streaming.kafka010.OffsetRange;
/** /**
* Reads avro serialized Kafka data, based on the confluent schema-registry. * Reads avro serialized Kafka data, based on the confluent schema-registry.
@@ -47,6 +48,8 @@ public class AvroKafkaSource extends AvroSource {
public AvroKafkaSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession, public AvroKafkaSource(TypedProperties props, JavaSparkContext sparkContext, SparkSession sparkSession,
SchemaProvider schemaProvider) { SchemaProvider schemaProvider) {
super(props, sparkContext, sparkSession, schemaProvider); super(props, sparkContext, sparkSession, schemaProvider);
props.put("key.deserializer", StringDeserializer.class);
props.put("value.deserializer", KafkaAvroDeserializer.class);
offsetGen = new KafkaOffsetGen(props); offsetGen = new KafkaOffsetGen(props);
} }
@@ -64,9 +67,7 @@ public class AvroKafkaSource extends AvroSource {
} }
private JavaRDD<GenericRecord> toRDD(OffsetRange[] offsetRanges) { private JavaRDD<GenericRecord> toRDD(OffsetRange[] offsetRanges) {
JavaRDD<GenericRecord> recordRDD = return KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges,
KafkaUtils.createRDD(sparkContext, String.class, Object.class, StringDecoder.class, KafkaAvroDecoder.class, LocationStrategies.PreferConsistent()).map(obj -> (GenericRecord) obj.value());
offsetGen.getKafkaParams(), offsetRanges).values().map(obj -> (GenericRecord) obj);
return recordRDD;
} }
} }

View File

@@ -24,14 +24,15 @@ import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import kafka.serializer.StringDecoder; import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.apache.spark.streaming.kafka.KafkaUtils; import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka.OffsetRange; import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.apache.spark.streaming.kafka010.OffsetRange;
/** /**
* Read json kafka data. * Read json kafka data.
@@ -45,6 +46,8 @@ public class JsonKafkaSource extends JsonSource {
public JsonKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession, public JsonKafkaSource(TypedProperties properties, JavaSparkContext sparkContext, SparkSession sparkSession,
SchemaProvider schemaProvider) { SchemaProvider schemaProvider) {
super(properties, sparkContext, sparkSession, schemaProvider); super(properties, sparkContext, sparkSession, schemaProvider);
properties.put("key.deserializer", StringDeserializer.class);
properties.put("value.deserializer", StringDeserializer.class);
offsetGen = new KafkaOffsetGen(properties); offsetGen = new KafkaOffsetGen(properties);
} }
@@ -61,7 +64,7 @@ public class JsonKafkaSource extends JsonSource {
} }
private JavaRDD<String> toRDD(OffsetRange[] offsetRanges) { private JavaRDD<String> toRDD(OffsetRange[] offsetRanges) {
return KafkaUtils.createRDD(sparkContext, String.class, String.class, StringDecoder.class, StringDecoder.class, return KafkaUtils.createRDD(sparkContext, offsetGen.getKafkaParams(), offsetRanges,
offsetGen.getKafkaParams(), offsetRanges).values(); LocationStrategies.PreferConsistent()).map(x -> (String) x.value());
} }
} }

View File

@@ -22,30 +22,24 @@ import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.util.Option; import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties; import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.exception.HoodieNotSupportedException; import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
import kafka.common.TopicAndPartition; import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.log4j.LogManager; import org.apache.log4j.LogManager;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.spark.streaming.kafka.KafkaCluster; import org.apache.spark.streaming.kafka010.OffsetRange;
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset;
import org.apache.spark.streaming.kafka.OffsetRange;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import scala.Predef;
import scala.collection.JavaConverters;
import scala.collection.immutable.Map;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.StringBuilder;
import scala.util.Either;
/** /**
* Source to read data from Kafka, incrementally. * Source to read data from Kafka, incrementally.
*/ */
@@ -58,8 +52,8 @@ public class KafkaOffsetGen {
/** /**
* Reconstruct checkpoint from string. * Reconstruct checkpoint from string.
*/ */
public static HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> strToOffsets(String checkpointStr) { public static HashMap<TopicPartition, Long> strToOffsets(String checkpointStr) {
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> offsetMap = new HashMap<>(); HashMap<TopicPartition, Long> offsetMap = new HashMap<>();
if (checkpointStr.length() == 0) { if (checkpointStr.length() == 0) {
return offsetMap; return offsetMap;
} }
@@ -67,8 +61,7 @@ public class KafkaOffsetGen {
String topic = splits[0]; String topic = splits[0];
for (int i = 1; i < splits.length; i++) { for (int i = 1; i < splits.length; i++) {
String[] subSplits = splits[i].split(":"); String[] subSplits = splits[i].split(":");
offsetMap.put(new TopicAndPartition(topic, Integer.parseInt(subSplits[0])), offsetMap.put(new TopicPartition(topic, Integer.parseInt(subSplits[0])), Long.parseLong(subSplits[1]));
new KafkaCluster.LeaderOffset("", -1, Long.parseLong(subSplits[1])));
} }
return offsetMap; return offsetMap;
} }
@@ -83,7 +76,7 @@ public class KafkaOffsetGen {
// at least 1 partition will be present. // at least 1 partition will be present.
sb.append(ranges[0].topic() + ","); sb.append(ranges[0].topic() + ",");
sb.append(Arrays.stream(ranges).map(r -> String.format("%s:%d", r.partition(), r.untilOffset())) sb.append(Arrays.stream(ranges).map(r -> String.format("%s:%d", r.partition(), r.untilOffset()))
.collect(Collectors.joining(","))); .collect(Collectors.joining(",")));
return sb.toString(); return sb.toString();
} }
@@ -94,32 +87,32 @@ public class KafkaOffsetGen {
* @param toOffsetMap offsets of where each partitions is currently at * @param toOffsetMap offsets of where each partitions is currently at
* @param numEvents maximum number of events to read. * @param numEvents maximum number of events to read.
*/ */
public static OffsetRange[] computeOffsetRanges(HashMap<TopicAndPartition, LeaderOffset> fromOffsetMap, public static OffsetRange[] computeOffsetRanges(Map<TopicPartition, Long> fromOffsetMap,
HashMap<TopicAndPartition, LeaderOffset> toOffsetMap, long numEvents) { Map<TopicPartition, Long> toOffsetMap, long numEvents) {
Comparator<OffsetRange> byPartition = Comparator.comparing(OffsetRange::partition); Comparator<OffsetRange> byPartition = Comparator.comparing(OffsetRange::partition);
// Create initial offset ranges for each 'to' partition, with from = to offsets. // Create initial offset ranges for each 'to' partition, with from = to offsets.
OffsetRange[] ranges = new OffsetRange[toOffsetMap.size()]; OffsetRange[] ranges = new OffsetRange[toOffsetMap.size()];
toOffsetMap.entrySet().stream().map(e -> { toOffsetMap.entrySet().stream().map(e -> {
TopicAndPartition tp = e.getKey(); TopicPartition tp = e.getKey();
long fromOffset = fromOffsetMap.getOrDefault(tp, new LeaderOffset("", -1, 0)).offset(); long fromOffset = fromOffsetMap.getOrDefault(tp, 0L);
return OffsetRange.create(tp, fromOffset, fromOffset); return OffsetRange.create(tp, fromOffset, fromOffset);
}).sorted(byPartition).collect(Collectors.toList()).toArray(ranges); }).sorted(byPartition).collect(Collectors.toList()).toArray(ranges);
long allocedEvents = 0; long allocedEvents = 0;
java.util.Set<Integer> exhaustedPartitions = new HashSet<>(); Set<Integer> exhaustedPartitions = new HashSet<>();
// keep going until we have events to allocate and partitions still not exhausted. // keep going until we have events to allocate and partitions still not exhausted.
while (allocedEvents < numEvents && exhaustedPartitions.size() < toOffsetMap.size()) { while (allocedEvents < numEvents && exhaustedPartitions.size() < toOffsetMap.size()) {
long remainingEvents = numEvents - allocedEvents; long remainingEvents = numEvents - allocedEvents;
long eventsPerPartition = long eventsPerPartition =
(long) Math.ceil((1.0 * remainingEvents) / (toOffsetMap.size() - exhaustedPartitions.size())); (long) Math.ceil((1.0 * remainingEvents) / (toOffsetMap.size() - exhaustedPartitions.size()));
// Allocate the remaining events to non-exhausted partitions, in round robin fashion // Allocate the remaining events to non-exhausted partitions, in round robin fashion
for (int i = 0; i < ranges.length; i++) { for (int i = 0; i < ranges.length; i++) {
OffsetRange range = ranges[i]; OffsetRange range = ranges[i];
if (!exhaustedPartitions.contains(range.partition())) { if (!exhaustedPartitions.contains(range.partition())) {
long toOffsetMax = toOffsetMap.get(range.topicAndPartition()).offset(); long toOffsetMax = toOffsetMap.get(range.topicPartition());
long toOffset = Math.min(toOffsetMax, range.untilOffset() + eventsPerPartition); long toOffset = Math.min(toOffsetMax, range.untilOffset() + eventsPerPartition);
if (toOffset == toOffsetMax) { if (toOffset == toOffsetMax) {
exhaustedPartitions.add(range.partition()); exhaustedPartitions.add(range.partition());
@@ -130,7 +123,7 @@ public class KafkaOffsetGen {
long offsetsToAdd = Math.min(eventsPerPartition, (numEvents - allocedEvents)); long offsetsToAdd = Math.min(eventsPerPartition, (numEvents - allocedEvents));
toOffset = Math.min(toOffsetMax, toOffset + offsetsToAdd); toOffset = Math.min(toOffsetMax, toOffset + offsetsToAdd);
} }
ranges[i] = OffsetRange.create(range.topicAndPartition(), range.fromOffset(), toOffset); ranges[i] = OffsetRange.create(range.topicPartition(), range.fromOffset(), toOffset);
} }
} }
} }
@@ -143,29 +136,11 @@ public class KafkaOffsetGen {
} }
} }
/**
* Helpers to deal with tricky scala <=> java conversions. (oh my!)
*/
static class ScalaHelpers {
public static <K, V> Map<K, V> toScalaMap(HashMap<K, V> m) {
return JavaConverters.mapAsScalaMapConverter(m).asScala().toMap(Predef.conforms());
}
public static Set<String> toScalaSet(HashSet<String> s) {
return JavaConverters.asScalaSetConverter(s).asScala().toSet();
}
public static <K, V> java.util.Map<K, V> toJavaMap(Map<K, V> m) {
return JavaConverters.mapAsJavaMapConverter(m).asJava();
}
}
/** /**
* Kafka reset offset strategies. * Kafka reset offset strategies.
*/ */
enum KafkaResetOffsetStrategies { enum KafkaResetOffsetStrategies {
LARGEST, SMALLEST LATEST, EARLIEST
} }
/** /**
@@ -175,20 +150,20 @@ public class KafkaOffsetGen {
private static final String KAFKA_TOPIC_NAME = "hoodie.deltastreamer.source.kafka.topic"; private static final String KAFKA_TOPIC_NAME = "hoodie.deltastreamer.source.kafka.topic";
private static final String MAX_EVENTS_FROM_KAFKA_SOURCE_PROP = "hoodie.deltastreamer.kafka.source.maxEvents"; private static final String MAX_EVENTS_FROM_KAFKA_SOURCE_PROP = "hoodie.deltastreamer.kafka.source.maxEvents";
private static final KafkaResetOffsetStrategies DEFAULT_AUTO_RESET_OFFSET = KafkaResetOffsetStrategies.LARGEST; private static final KafkaResetOffsetStrategies DEFAULT_AUTO_RESET_OFFSET = KafkaResetOffsetStrategies.LATEST;
public static final long DEFAULT_MAX_EVENTS_FROM_KAFKA_SOURCE = 5000000; public static final long DEFAULT_MAX_EVENTS_FROM_KAFKA_SOURCE = 5000000;
public static long maxEventsFromKafkaSource = DEFAULT_MAX_EVENTS_FROM_KAFKA_SOURCE; public static long maxEventsFromKafkaSource = DEFAULT_MAX_EVENTS_FROM_KAFKA_SOURCE;
} }
private final HashMap<String, String> kafkaParams; private final HashMap<String, Object> kafkaParams;
private final TypedProperties props; private final TypedProperties props;
protected final String topicName; protected final String topicName;
public KafkaOffsetGen(TypedProperties props) { public KafkaOffsetGen(TypedProperties props) {
this.props = props; this.props = props;
kafkaParams = new HashMap<String, String>(); kafkaParams = new HashMap<>();
for (Object prop : props.keySet()) { for (Object prop : props.keySet()) {
kafkaParams.put(prop.toString(), props.getString(prop.toString())); kafkaParams.put(prop.toString(), props.get(prop.toString()));
} }
DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.KAFKA_TOPIC_NAME)); DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.KAFKA_TOPIC_NAME));
topicName = props.getString(Config.KAFKA_TOPIC_NAME); topicName = props.getString(Config.KAFKA_TOPIC_NAME);
@@ -197,31 +172,25 @@ public class KafkaOffsetGen {
public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long sourceLimit) { public OffsetRange[] getNextOffsetRanges(Option<String> lastCheckpointStr, long sourceLimit) {
// Obtain current metadata for the topic // Obtain current metadata for the topic
KafkaCluster cluster = new KafkaCluster(ScalaHelpers.toScalaMap(kafkaParams)); KafkaConsumer consumer = new KafkaConsumer(kafkaParams);
Either<ArrayBuffer<Throwable>, Set<TopicAndPartition>> either = List<PartitionInfo> partitionInfoList;
cluster.getPartitions(ScalaHelpers.toScalaSet(new HashSet<>(Collections.singletonList(topicName)))); partitionInfoList = consumer.partitionsFor(topicName);
if (either.isLeft()) { Set<TopicPartition> topicPartitions = partitionInfoList.stream()
// log errors. and bail out. .map(x -> new TopicPartition(x.topic(), x.partition())).collect(Collectors.toSet());
throw new HoodieDeltaStreamerException("Error obtaining partition metadata", either.left().get().head());
}
Set<TopicAndPartition> topicPartitions = either.right().get();
// Determine the offset ranges to read from // Determine the offset ranges to read from
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> fromOffsets; Map<TopicPartition, Long> fromOffsets;
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> checkpointOffsets;
if (lastCheckpointStr.isPresent()) { if (lastCheckpointStr.isPresent()) {
fromOffsets = checkupValidOffsets(cluster, lastCheckpointStr, topicPartitions); fromOffsets = checkupValidOffsets(consumer, lastCheckpointStr, topicPartitions);
} else { } else {
KafkaResetOffsetStrategies autoResetValue = KafkaResetOffsetStrategies KafkaResetOffsetStrategies autoResetValue = KafkaResetOffsetStrategies
.valueOf(props.getString("auto.offset.reset", Config.DEFAULT_AUTO_RESET_OFFSET.toString()).toUpperCase()); .valueOf(props.getString("auto.offset.reset", Config.DEFAULT_AUTO_RESET_OFFSET.toString()).toUpperCase());
switch (autoResetValue) { switch (autoResetValue) {
case SMALLEST: case EARLIEST:
fromOffsets = fromOffsets = consumer.beginningOffsets(topicPartitions);
new HashMap(ScalaHelpers.toJavaMap(cluster.getEarliestLeaderOffsets(topicPartitions).right().get()));
break; break;
case LARGEST: case LATEST:
fromOffsets = fromOffsets = consumer.endOffsets(topicPartitions);
new HashMap(ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
break; break;
default: default:
throw new HoodieNotSupportedException("Auto reset value must be one of 'smallest' or 'largest' "); throw new HoodieNotSupportedException("Auto reset value must be one of 'smallest' or 'largest' ");
@@ -229,8 +198,7 @@ public class KafkaOffsetGen {
} }
// Obtain the latest offsets. // Obtain the latest offsets.
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> toOffsets = Map<TopicPartition, Long> toOffsets = consumer.endOffsets(topicPartitions);
new HashMap(ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
// Come up with final set of OffsetRanges to read (account for new partitions, limit number of events) // Come up with final set of OffsetRanges to read (account for new partitions, limit number of events)
long maxEventsToReadFromKafka = props.getLong(Config.MAX_EVENTS_FROM_KAFKA_SOURCE_PROP, long maxEventsToReadFromKafka = props.getLong(Config.MAX_EVENTS_FROM_KAFKA_SOURCE_PROP,
@@ -245,15 +213,13 @@ public class KafkaOffsetGen {
// check up checkpoint offsets is valid or not, if true, return checkpoint offsets, // check up checkpoint offsets is valid or not, if true, return checkpoint offsets,
// else return earliest offsets // else return earliest offsets
private HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> checkupValidOffsets(KafkaCluster cluster, private Map<TopicPartition, Long> checkupValidOffsets(KafkaConsumer consumer,
Option<String> lastCheckpointStr, Set<TopicAndPartition> topicPartitions) { Option<String> lastCheckpointStr, Set<TopicPartition> topicPartitions) {
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> checkpointOffsets = Map<TopicPartition, Long> checkpointOffsets = CheckpointUtils.strToOffsets(lastCheckpointStr.get());
CheckpointUtils.strToOffsets(lastCheckpointStr.get()); Map<TopicPartition, Long> earliestOffsets = consumer.beginningOffsets(topicPartitions);
HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> earliestOffsets =
new HashMap(ScalaHelpers.toJavaMap(cluster.getEarliestLeaderOffsets(topicPartitions).right().get()));
boolean checkpointOffsetReseter = checkpointOffsets.entrySet().stream() boolean checkpointOffsetReseter = checkpointOffsets.entrySet().stream()
.anyMatch(offset -> offset.getValue().offset() < earliestOffsets.get(offset.getKey()).offset()); .anyMatch(offset -> offset.getValue() < earliestOffsets.get(offset.getKey()));
return checkpointOffsetReseter ? earliestOffsets : checkpointOffsets; return checkpointOffsetReseter ? earliestOffsets : checkpointOffsets;
} }
@@ -261,7 +227,7 @@ public class KafkaOffsetGen {
return topicName; return topicName;
} }
public HashMap<String, String> getKafkaParams() { public HashMap<String, Object> getKafkaParams() {
return kafkaParams; return kafkaParams;
} }
} }

View File

@@ -28,14 +28,14 @@ import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config; import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.Config;
import kafka.common.TopicAndPartition;
import org.apache.avro.generic.GenericRecord; import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.TopicPartition;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset; import org.apache.spark.streaming.kafka010.KafkaTestUtils;
import org.apache.spark.streaming.kafka.KafkaTestUtils; import org.apache.spark.streaming.kafka010.OffsetRange;
import org.apache.spark.streaming.kafka.OffsetRange;
import org.junit.After; import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
@@ -44,6 +44,7 @@ import org.junit.Test;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.UUID;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
@@ -84,13 +85,12 @@ public class TestKafkaSource extends UtilitiesTestBase {
private TypedProperties createPropsForJsonSource(Long maxEventsToReadFromKafkaSource) { private TypedProperties createPropsForJsonSource(Long maxEventsToReadFromKafkaSource) {
TypedProperties props = new TypedProperties(); TypedProperties props = new TypedProperties();
props.setProperty("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME); props.setProperty("hoodie.deltastreamer.source.kafka.topic", TEST_TOPIC_NAME);
props.setProperty("metadata.broker.list", testUtils.brokerAddress()); props.setProperty("bootstrap.servers", testUtils.brokerAddress());
props.setProperty("auto.offset.reset", "smallest"); props.setProperty("auto.offset.reset", "earliest");
props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents", props.setProperty("hoodie.deltastreamer.kafka.source.maxEvents",
maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) : maxEventsToReadFromKafkaSource != null ? String.valueOf(maxEventsToReadFromKafkaSource) :
String.valueOf(Config.maxEventsFromKafkaSource)); String.valueOf(Config.maxEventsFromKafkaSource));
props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
return props; return props;
} }
@@ -214,10 +214,10 @@ public class TestKafkaSource extends UtilitiesTestBase {
assertEquals(Option.empty(), fetch6.getBatch()); assertEquals(Option.empty(), fetch6.getBatch());
} }
private static HashMap<TopicAndPartition, LeaderOffset> makeOffsetMap(int[] partitions, long[] offsets) { private static HashMap<TopicPartition, Long> makeOffsetMap(int[] partitions, long[] offsets) {
HashMap<TopicAndPartition, LeaderOffset> map = new HashMap<>(); HashMap<TopicPartition, Long> map = new HashMap<>();
for (int i = 0; i < partitions.length; i++) { for (int i = 0; i < partitions.length; i++) {
map.put(new TopicAndPartition(TEST_TOPIC_NAME, partitions[i]), new LeaderOffset("", -1, offsets[i])); map.put(new TopicPartition(TEST_TOPIC_NAME, partitions[i]), offsets[i]);
} }
return map; return map;
} }

View File

@@ -25,6 +25,6 @@ hoodie.deltastreamer.schemaprovider.registry.url=http://localhost:8081/subjects/
#hoodie.deltastreamer.source.kafka.topic=uber_trips #hoodie.deltastreamer.source.kafka.topic=uber_trips
hoodie.deltastreamer.source.kafka.topic=impressions hoodie.deltastreamer.source.kafka.topic=impressions
#Kafka props #Kafka props
metadata.broker.list=localhost:9092 bootstrap.servers=localhost:9092
auto.offset.reset=smallest auto.offset.reset=earliest
schema.registry.url=http://localhost:8081 schema.registry.url=http://localhost:8081

View File

@@ -23,7 +23,7 @@
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>hudi-spark-bundle</artifactId> <artifactId>hudi-spark-bundle_2.11</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
@@ -32,7 +32,7 @@
</properties> </properties>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
<groupId>org.apache.rat</groupId> <groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId> <artifactId>apache-rat-plugin</artifactId>
@@ -66,7 +66,7 @@
<includes> <includes>
<include>org.apache.hudi:hudi-common</include> <include>org.apache.hudi:hudi-common</include>
<include>org.apache.hudi:hudi-client</include> <include>org.apache.hudi:hudi-client</include>
<include>org.apache.hudi:hudi-spark</include> <include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
<include>org.apache.hudi:hudi-hive</include> <include>org.apache.hudi:hudi-hive</include>
<include>org.apache.hudi:hudi-hadoop-mr</include> <include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.hudi:hudi-timeline-service</include> <include>org.apache.hudi:hudi-timeline-service</include>
@@ -83,8 +83,8 @@
<include>org.antlr:stringtemplate</include> <include>org.antlr:stringtemplate</include>
<include>org.apache.parquet:parquet-avro</include> <include>org.apache.parquet:parquet-avro</include>
<include>com.twitter:bijection-avro_2.11</include> <include>com.twitter:bijection-avro_${scala.binary.version}</include>
<include>com.twitter:bijection-core_2.11</include> <include>com.twitter:bijection-core_${scala.binary.version}</include>
<include>io.dropwizard.metrics:metrics-core</include> <include>io.dropwizard.metrics:metrics-core</include>
<include>io.dropwizard.metrics:metrics-graphite</include> <include>io.dropwizard.metrics:metrics-graphite</include>
<include>com.yammer.metrics:metrics-core</include> <include>com.yammer.metrics:metrics-core</include>
@@ -190,7 +190,7 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>

View File

@@ -24,7 +24,7 @@
<relativePath>../../pom.xml</relativePath> <relativePath>../../pom.xml</relativePath>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<artifactId>hudi-utilities-bundle</artifactId> <artifactId>hudi-utilities-bundle_2.11</artifactId>
<packaging>jar</packaging> <packaging>jar</packaging>
<properties> <properties>
@@ -67,8 +67,8 @@
<includes> <includes>
<include>org.apache.hudi:hudi-common</include> <include>org.apache.hudi:hudi-common</include>
<include>org.apache.hudi:hudi-client</include> <include>org.apache.hudi:hudi-client</include>
<include>org.apache.hudi:hudi-utilities</include> <include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
<include>org.apache.hudi:hudi-spark</include> <include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
<include>org.apache.hudi:hudi-hive</include> <include>org.apache.hudi:hudi-hive</include>
<include>org.apache.hudi:hudi-hadoop-mr</include> <include>org.apache.hudi:hudi-hadoop-mr</include>
<include>org.apache.hudi:hudi-timeline-service</include> <include>org.apache.hudi:hudi-timeline-service</include>
@@ -85,8 +85,8 @@
<include>org.antlr:stringtemplate</include> <include>org.antlr:stringtemplate</include>
<include>org.apache.parquet:parquet-avro</include> <include>org.apache.parquet:parquet-avro</include>
<include>com.twitter:bijection-avro_2.11</include> <include>com.twitter:bijection-avro_${scala.binary.version}</include>
<include>com.twitter:bijection-core_2.11</include> <include>com.twitter:bijection-core_${scala.binary.version}</include>
<include>io.confluent:kafka-avro-serializer</include> <include>io.confluent:kafka-avro-serializer</include>
<include>io.confluent:common-config</include> <include>io.confluent:common-config</include>
<include>io.confluent:common-utils</include> <include>io.confluent:common-utils</include>
@@ -94,8 +94,8 @@
<include>io.dropwizard.metrics:metrics-core</include> <include>io.dropwizard.metrics:metrics-core</include>
<include>io.dropwizard.metrics:metrics-graphite</include> <include>io.dropwizard.metrics:metrics-graphite</include>
<include>com.yammer.metrics:metrics-core</include> <include>com.yammer.metrics:metrics-core</include>
<include>org.apache.spark:spark-streaming-kafka-0-8_2.11</include> <include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
<include>org.apache.kafka:kafka_2.11</include> <include>org.apache.kafka:kafka_${scala.binary.version}</include>
<include>com.101tec:zkclient</include> <include>com.101tec:zkclient</include>
<include>org.apache.kafka:kafka-clients</include> <include>org.apache.kafka:kafka-clients</include>
@@ -200,12 +200,12 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark</artifactId> <artifactId>hudi-spark_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.hudi</groupId> <groupId>org.apache.hudi</groupId>
<artifactId>hudi-utilities</artifactId> <artifactId>hudi-utilities_${scala.binary.version}</artifactId>
<version>${project.version}</version> <version>${project.version}</version>
</dependency> </dependency>

50
pom.xml
View File

@@ -75,6 +75,7 @@
<java.version>1.8</java.version> <java.version>1.8</java.version>
<fasterxml.version>2.6.7</fasterxml.version> <fasterxml.version>2.6.7</fasterxml.version>
<kafka.version>2.0.0</kafka.version>
<glassfish.version>2.17</glassfish.version> <glassfish.version>2.17</glassfish.version>
<parquet.version>1.10.1</parquet.version> <parquet.version>1.10.1</parquet.version>
<junit.version>4.11</junit.version> <junit.version>4.11</junit.version>
@@ -91,7 +92,7 @@
<spark.version>2.4.4</spark.version> <spark.version>2.4.4</spark.version>
<avro.version>1.8.2</avro.version> <avro.version>1.8.2</avro.version>
<scala.version>2.11.8</scala.version> <scala.version>2.11.8</scala.version>
<scala.libversion>2.11</scala.libversion> <scala.binary.version>2.11</scala.binary.version>
<apache-rat-plugin.version>0.12</apache-rat-plugin.version> <apache-rat-plugin.version>0.12</apache-rat-plugin.version>
<scala-maven-plugin.version>3.3.1</scala-maven-plugin.version> <scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
<scalatest.version>3.0.1</scalatest.version> <scalatest.version>3.0.1</scalatest.version>
@@ -423,8 +424,8 @@
</dependency> </dependency>
<dependency> <dependency>
<groupId>com.fasterxml.jackson.module</groupId> <groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_2.11</artifactId> <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
<version>${fasterxml.version}</version> <version>${fasterxml.version}.1</version>
</dependency> </dependency>
<!-- Glassfish --> <!-- Glassfish -->
@@ -470,13 +471,13 @@
<!-- Spark --> <!-- Spark -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version> <version>${spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId> <artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version> <version>${spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
@@ -484,7 +485,7 @@
<!-- Spark (Packages) --> <!-- Spark (Packages) -->
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-avro_2.11</artifactId> <artifactId>spark-avro_${scala.binary.version}</artifactId>
<version>${spark.version}</version> <version>${spark.version}</version>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
@@ -1040,6 +1041,43 @@
</plugins> </plugins>
</build> </build>
</profile> </profile>
<!-- Exists for backwards compatibility; profile doesn't do anything -->
<profile>
<id>scala-2.11</id>
</profile>
<profile>
<id>scala-2.12</id>
<properties>
<scala.version>2.12.10</scala.version>
<scala.binary.version>2.12</scala.binary.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>enforce-versions</id>
<goals>
<goal>enforce</goal>
</goals>
<configuration>
<rules>
<bannedDependencies>
<excludes combine.children="append">
<exclude>*:*_2.11</exclude>
</excludes>
</bannedDependencies>
</rules>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles> </profiles>
<issueManagement> <issueManagement>