[HUDI-1040] Make Hudi support Spark 3 (#2208)
* Fix flaky MOR unit test * Update Spark APIs to make it be compatible with both spark2 & spark3 * Refactor bulk insert v2 part to make Hudi be able to compile with Spark3 * Add spark3 profile to handle fasterxml & spark version * Create hudi-spark-common module & refactor hudi-spark related modules Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
2
LICENSE
2
LICENSE
@@ -246,6 +246,8 @@ This product includes code from Apache Spark
|
|||||||
|
|
||||||
* org.apache.hudi.AvroConversionHelper copied from classes in org/apache/spark/sql/avro package
|
* org.apache.hudi.AvroConversionHelper copied from classes in org/apache/spark/sql/avro package
|
||||||
|
|
||||||
|
* org.apache.hudi.HoodieSparkUtils.scala copied some methods from org.apache.spark.deploy.SparkHadoopUtil.scala
|
||||||
|
|
||||||
Copyright: 2014 and onwards The Apache Software Foundation
|
Copyright: 2014 and onwards The Apache Software Foundation
|
||||||
Home page: http://spark.apache.org/
|
Home page: http://spark.apache.org/
|
||||||
License: http://www.apache.org/licenses/LICENSE-2.0
|
License: http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|||||||
@@ -76,6 +76,14 @@ The default Scala version supported is 2.11. To build for Scala 2.12 version, bu
|
|||||||
mvn clean package -DskipTests -Dscala-2.12
|
mvn clean package -DskipTests -Dscala-2.12
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Build with Spark 3.0.0
|
||||||
|
|
||||||
|
The default Spark version supported is 2.4.4. To build for Spark 3.0.0 version, build using `spark3` profile
|
||||||
|
|
||||||
|
```
|
||||||
|
mvn clean package -DskipTests -Dspark3
|
||||||
|
```
|
||||||
|
|
||||||
### Build without spark-avro module
|
### Build without spark-avro module
|
||||||
|
|
||||||
The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile
|
The default hudi-jar bundles spark-avro module. To build without spark-avro module, build using `spark-shade-unbundle-avro` profile
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.client.utils;
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Row;
|
||||||
|
import org.apache.spark.sql.catalyst.InternalRow;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
public interface SparkRowDeserializer extends Serializable {
|
||||||
|
Row deserializeRow(InternalRow internalRow);
|
||||||
|
}
|
||||||
@@ -21,41 +21,15 @@ package org.apache.hudi
|
|||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
|
import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils
|
import org.apache.hudi.avro.HoodieAvroUtils
|
||||||
import org.apache.hudi.common.model.HoodieKey
|
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
import org.apache.spark.sql.avro.SchemaConverters
|
import org.apache.spark.sql.avro.SchemaConverters
|
||||||
import org.apache.spark.sql.catalyst.encoders.RowEncoder
|
|
||||||
import org.apache.spark.sql.types.StructType
|
import org.apache.spark.sql.types.StructType
|
||||||
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
|
import org.apache.spark.sql.{Dataset, Row, SparkSession}
|
||||||
|
|
||||||
import scala.collection.JavaConverters._
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
object AvroConversionUtils {
|
object AvroConversionUtils {
|
||||||
|
|
||||||
def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
|
|
||||||
val avroSchema = convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
|
|
||||||
createRdd(df, avroSchema, structName, recordNamespace)
|
|
||||||
}
|
|
||||||
|
|
||||||
def createRdd(df: DataFrame, avroSchema: Schema, structName: String, recordNamespace: String)
|
|
||||||
: RDD[GenericRecord] = {
|
|
||||||
// Use the Avro schema to derive the StructType which has the correct nullability information
|
|
||||||
val dataType = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
|
|
||||||
val encoder = RowEncoder.apply(dataType).resolveAndBind()
|
|
||||||
df.queryExecution.toRdd.map(encoder.fromRow)
|
|
||||||
.mapPartitions { records =>
|
|
||||||
if (records.isEmpty) Iterator.empty
|
|
||||||
else {
|
|
||||||
val convertor = AvroConversionHelper.createConverterToAvro(dataType, structName, recordNamespace)
|
|
||||||
records.map { x => convertor(x).asInstanceOf[GenericRecord] }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def createRddForDeletes(df: DataFrame, rowField: String, partitionField: String): RDD[HoodieKey] = {
|
|
||||||
df.rdd.map(row => new HoodieKey(row.getAs[String](rowField), row.getAs[String](partitionField)))
|
|
||||||
}
|
|
||||||
|
|
||||||
def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = {
|
def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = {
|
||||||
if (rdd.isEmpty()) {
|
if (rdd.isEmpty()) {
|
||||||
ss.emptyDataFrame
|
ss.emptyDataFrame
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ public class TestHoodieRowCreateHandle extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRowCreateHandle() throws IOException {
|
public void testRowCreateHandle() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
||||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||||
@@ -113,7 +113,7 @@ public class TestHoodieRowCreateHandle extends HoodieClientTestHarness {
|
|||||||
* should be thrown.
|
* should be thrown.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testGlobalFailure() throws IOException {
|
public void testGlobalFailure() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
||||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||||
@@ -179,7 +179,8 @@ public class TestHoodieRowCreateHandle extends HoodieClientTestHarness {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieInternalWriteStatus writeAndGetWriteStatus(Dataset<Row> inputRows, HoodieRowCreateHandle handle) throws IOException {
|
private HoodieInternalWriteStatus writeAndGetWriteStatus(Dataset<Row> inputRows, HoodieRowCreateHandle handle)
|
||||||
|
throws Exception {
|
||||||
List<InternalRow> internalRows = SparkDatasetTestUtils.toInternalRows(inputRows, SparkDatasetTestUtils.ENCODER);
|
List<InternalRow> internalRows = SparkDatasetTestUtils.toInternalRows(inputRows, SparkDatasetTestUtils.ENCODER);
|
||||||
// issue writes
|
// issue writes
|
||||||
for (InternalRow internalRow : internalRows) {
|
for (InternalRow internalRow : internalRows) {
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ import org.junit.jupiter.api.AfterEach;
|
|||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@@ -64,7 +63,7 @@ public class TestHoodieInternalRowParquetWriter extends HoodieClientTestHarness
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void endToEndTest() throws IOException {
|
public void endToEndTest() throws Exception {
|
||||||
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath).build();
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
// init write support and parquet config
|
// init write support and parquet config
|
||||||
|
|||||||
@@ -43,7 +43,6 @@ import org.apache.hadoop.mapred.RecordReader;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@@ -84,36 +83,32 @@ public class HoodieMergeOnReadTestUtils {
|
|||||||
.map(f -> new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal()))
|
.map(f -> new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal()))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
return inputPaths.stream().map(path -> {
|
List<GenericRecord> records = new ArrayList<>();
|
||||||
setInputPath(jobConf, path);
|
try {
|
||||||
List<GenericRecord> records = new ArrayList<>();
|
FileInputFormat.setInputPaths(jobConf, String.join(",", inputPaths));
|
||||||
try {
|
InputSplit[] splits = inputFormat.getSplits(jobConf, inputPaths.size());
|
||||||
List<InputSplit> splits = Arrays.asList(inputFormat.getSplits(jobConf, 1));
|
|
||||||
for (InputSplit split : splits) {
|
for (InputSplit split : splits) {
|
||||||
RecordReader recordReader = inputFormat.getRecordReader(split, jobConf, null);
|
RecordReader recordReader = inputFormat.getRecordReader(split, jobConf, null);
|
||||||
Object key = recordReader.createKey();
|
Object key = recordReader.createKey();
|
||||||
ArrayWritable writable = (ArrayWritable) recordReader.createValue();
|
ArrayWritable writable = (ArrayWritable) recordReader.createValue();
|
||||||
while (recordReader.next(key, writable)) {
|
while (recordReader.next(key, writable)) {
|
||||||
GenericRecordBuilder newRecord = new GenericRecordBuilder(projectedSchema);
|
GenericRecordBuilder newRecord = new GenericRecordBuilder(projectedSchema);
|
||||||
// writable returns an array with [field1, field2, _hoodie_commit_time,
|
// writable returns an array with [field1, field2, _hoodie_commit_time,
|
||||||
// _hoodie_commit_seqno]
|
// _hoodie_commit_seqno]
|
||||||
Writable[] values = writable.get();
|
Writable[] values = writable.get();
|
||||||
schema.getFields().stream()
|
schema.getFields().stream()
|
||||||
.filter(f -> !projectCols || projectedColumns.contains(f.name()))
|
.filter(f -> !projectCols || projectedColumns.contains(f.name()))
|
||||||
.map(f -> Pair.of(projectedSchema.getFields().stream()
|
.map(f -> Pair.of(projectedSchema.getFields().stream()
|
||||||
.filter(p -> f.name().equals(p.name())).findFirst().get(), f))
|
.filter(p -> f.name().equals(p.name())).findFirst().get(), f))
|
||||||
.forEach(fieldsPair -> newRecord.set(fieldsPair.getKey(), values[fieldsPair.getValue().pos()]));
|
.forEach(fieldsPair -> newRecord.set(fieldsPair.getKey(), values[fieldsPair.getValue().pos()]));
|
||||||
records.add(newRecord.build());
|
records.add(newRecord.build());
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (IOException ie) {
|
|
||||||
ie.printStackTrace();
|
|
||||||
}
|
}
|
||||||
return records;
|
} catch (IOException ie) {
|
||||||
}).reduce((a, b) -> {
|
ie.printStackTrace();
|
||||||
a.addAll(b);
|
}
|
||||||
return a;
|
return records;
|
||||||
}).orElse(new ArrayList<>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema, String hiveColumnTypes, boolean projectCols, List<String> projectedCols) {
|
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema, String hiveColumnTypes, boolean projectCols, List<String> projectedCols) {
|
||||||
@@ -156,10 +151,4 @@ public class HoodieMergeOnReadTestUtils {
|
|||||||
configurable.setConf(conf);
|
configurable.setConf(conf);
|
||||||
jobConf.addResource(conf);
|
jobConf.addResource(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void setInputPath(JobConf jobConf, String inputPath) {
|
|
||||||
jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
|
|
||||||
jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
|
|
||||||
jobConf.set("map.input.dir", inputPath);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ import org.apache.hudi.config.HoodieStorageConfig;
|
|||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.index.HoodieIndex;
|
import org.apache.hudi.index.HoodieIndex;
|
||||||
|
|
||||||
|
import org.apache.spark.package$;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Row;
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.SQLContext;
|
import org.apache.spark.sql.SQLContext;
|
||||||
@@ -41,6 +42,8 @@ import org.apache.spark.sql.types.Metadata;
|
|||||||
import org.apache.spark.sql.types.StructField;
|
import org.apache.spark.sql.types.StructField;
|
||||||
import org.apache.spark.sql.types.StructType;
|
import org.apache.spark.sql.types.StructType;
|
||||||
|
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
@@ -139,11 +142,11 @@ public class SparkDatasetTestUtils {
|
|||||||
* @param rows Dataset<Row>s to be converted
|
* @param rows Dataset<Row>s to be converted
|
||||||
* @return the List of {@link InternalRow}s thus converted.
|
* @return the List of {@link InternalRow}s thus converted.
|
||||||
*/
|
*/
|
||||||
public static List<InternalRow> toInternalRows(Dataset<Row> rows, ExpressionEncoder encoder) {
|
public static List<InternalRow> toInternalRows(Dataset<Row> rows, ExpressionEncoder encoder) throws Exception {
|
||||||
List<InternalRow> toReturn = new ArrayList<>();
|
List<InternalRow> toReturn = new ArrayList<>();
|
||||||
List<Row> rowList = rows.collectAsList();
|
List<Row> rowList = rows.collectAsList();
|
||||||
for (Row row : rowList) {
|
for (Row row : rowList) {
|
||||||
toReturn.add(encoder.toRow(row).copy());
|
toReturn.add(serializeRow(encoder, row).copy());
|
||||||
}
|
}
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
@@ -173,4 +176,17 @@ public class SparkDatasetTestUtils {
|
|||||||
.withBulkInsertParallelism(2);
|
.withBulkInsertParallelism(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static InternalRow serializeRow(ExpressionEncoder encoder, Row row)
|
||||||
|
throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, ClassNotFoundException {
|
||||||
|
// TODO remove reflection if Spark 2.x support is dropped
|
||||||
|
if (package$.MODULE$.SPARK_VERSION().startsWith("2.")) {
|
||||||
|
Method spark2method = encoder.getClass().getMethod("toRow", Object.class);
|
||||||
|
return (InternalRow) spark2method.invoke(encoder, row);
|
||||||
|
} else {
|
||||||
|
Class<?> serializerClass = Class.forName("org.apache.spark.sql.catalyst.encoders.ExpressionEncoder$Serializer");
|
||||||
|
Object serializer = encoder.getClass().getMethod("createSerializer").invoke(encoder);
|
||||||
|
Method aboveSpark2method = serializerClass.getMethod("apply", Object.class);
|
||||||
|
return (InternalRow) aboveSpark2method.invoke(serializer, row);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -171,7 +171,7 @@ public class RemoteHoodieTableFileSystemView implements SyncableFileSystemView,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
String content = response.returnContent().asString();
|
String content = response.returnContent().asString();
|
||||||
return mapper.readValue(content, reference);
|
return (T) mapper.readValue(content, reference);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, String> getParamsWithPartitionPath(String partitionPath) {
|
private Map<String, String> getParamsWithPartitionPath(String partitionPath) {
|
||||||
|
|||||||
@@ -206,12 +206,11 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||||
<artifactId>jackson-dataformat-yaml</artifactId>
|
<artifactId>jackson-dataformat-yaml</artifactId>
|
||||||
<version>2.7.4</version>
|
<version>${fasterxml.jackson.dataformat.yaml.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
<version>2.6.7.3</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Fasterxml - Test-->
|
<!-- Fasterxml - Test-->
|
||||||
@@ -220,11 +219,6 @@
|
|||||||
<artifactId>jackson-annotations</artifactId>
|
<artifactId>jackson-annotations</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
|
||||||
<artifactId>jackson-databind</artifactId>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||||
<artifactId>jackson-datatype-guava</artifactId>
|
<artifactId>jackson-datatype-guava</artifactId>
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ package org.apache.hudi.integ.testsuite.reader;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.hudi.AvroConversionUtils;
|
import org.apache.hudi.HoodieSparkUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
|
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
@@ -49,7 +49,7 @@ public class SparkBasedReader {
|
|||||||
.option(AVRO_SCHEMA_OPTION_KEY, schemaStr)
|
.option(AVRO_SCHEMA_OPTION_KEY, schemaStr)
|
||||||
.load(JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq());
|
.load(JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq());
|
||||||
|
|
||||||
return AvroConversionUtils
|
return HoodieSparkUtils
|
||||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||||
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE))
|
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE))
|
||||||
.toJavaRDD();
|
.toJavaRDD();
|
||||||
@@ -61,7 +61,7 @@ public class SparkBasedReader {
|
|||||||
Dataset<Row> dataSet = sparkSession.read()
|
Dataset<Row> dataSet = sparkSession.read()
|
||||||
.parquet((JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq()));
|
.parquet((JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq()));
|
||||||
|
|
||||||
return AvroConversionUtils
|
return HoodieSparkUtils
|
||||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
||||||
.toJavaRDD();
|
.toJavaRDD();
|
||||||
|
|||||||
@@ -61,9 +61,9 @@ public abstract class ITTestBase {
|
|||||||
protected static final String HIVESERVER = "/hiveserver";
|
protected static final String HIVESERVER = "/hiveserver";
|
||||||
protected static final String PRESTO_COORDINATOR = "/presto-coordinator-1";
|
protected static final String PRESTO_COORDINATOR = "/presto-coordinator-1";
|
||||||
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
||||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_app.sh";
|
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_app.sh";
|
||||||
protected static final String HOODIE_GENERATE_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_generate_app.sh";
|
protected static final String HOODIE_GENERATE_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_generate_app.sh";
|
||||||
protected static final String HOODIE_JAVA_STREAMING_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_streaming_app.sh";
|
protected static final String HOODIE_JAVA_STREAMING_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_streaming_app.sh";
|
||||||
protected static final String HUDI_HADOOP_BUNDLE =
|
protected static final String HUDI_HADOOP_BUNDLE =
|
||||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
||||||
protected static final String HUDI_HIVE_SYNC_BUNDLE =
|
protected static final String HUDI_HIVE_SYNC_BUNDLE =
|
||||||
|
|||||||
178
hudi-spark-datasource/hudi-spark-common/pom.xml
Normal file
178
hudi-spark-datasource/hudi-spark-common/pom.xml
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>hudi-spark-datasource</artifactId>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<version>0.6.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${parent.version}</version>
|
||||||
|
|
||||||
|
<name>hudi-spark-common</name>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<main.basedir>${project.parent.parent.basedir}</main.basedir>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/resources</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>${scala-maven-plugin.version}</version>
|
||||||
|
<configuration>
|
||||||
|
<args>
|
||||||
|
<arg>-nobootcp</arg>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</pluginManagement>
|
||||||
|
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>copy-dependencies</id>
|
||||||
|
<phase>prepare-package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/lib</outputDirectory>
|
||||||
|
<overWriteReleases>true</overWriteReleases>
|
||||||
|
<overWriteSnapshots>true</overWriteSnapshots>
|
||||||
|
<overWriteIfNewer>true</overWriteIfNewer>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>scala-compile-first</id>
|
||||||
|
<phase>process-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-test-compile</id>
|
||||||
|
<phase>process-test-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>test-jar</goal>
|
||||||
|
</goals>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<skip>false</skip>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.scalastyle</groupId>
|
||||||
|
<artifactId>scalastyle-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.jacoco</groupId>
|
||||||
|
<artifactId>jacoco-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- Scala -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang</groupId>
|
||||||
|
<artifactId>scala-library</artifactId>
|
||||||
|
<version>${scala.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-client-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-hive-sync</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
||||||
@@ -36,7 +36,7 @@ import org.apache.log4j.LogManager
|
|||||||
*/
|
*/
|
||||||
object DataSourceReadOptions {
|
object DataSourceReadOptions {
|
||||||
|
|
||||||
private val log = LogManager.getLogger(classOf[DefaultSource])
|
private val log = LogManager.getLogger(DataSourceReadOptions.getClass)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether data needs to be read, in
|
* Whether data needs to be read, in
|
||||||
@@ -143,7 +143,7 @@ object DataSourceReadOptions {
|
|||||||
*/
|
*/
|
||||||
object DataSourceWriteOptions {
|
object DataSourceWriteOptions {
|
||||||
|
|
||||||
private val log = LogManager.getLogger(classOf[DefaultSource])
|
private val log = LogManager.getLogger(DataSourceWriteOptions.getClass)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The write operation, that this write should do
|
* The write operation, that this write should do
|
||||||
@@ -17,17 +17,20 @@
|
|||||||
-->
|
-->
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>hudi</artifactId>
|
<artifactId>hudi-spark-datasource</artifactId>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<version>0.6.1-SNAPSHOT</version>
|
<version>0.6.1-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||||
|
<version>${parent.version}</version>
|
||||||
|
|
||||||
|
<name>hudi-spark_${scala.binary.version}</name>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<main.basedir>${project.parent.basedir}</main.basedir>
|
<main.basedir>${project.parent.parent.basedir}</main.basedir>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@@ -196,6 +199,21 @@
|
|||||||
<artifactId>hudi-sync-common</artifactId>
|
<artifactId>hudi-sync-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Logging -->
|
<!-- Logging -->
|
||||||
<dependency>
|
<dependency>
|
||||||
@@ -23,7 +23,7 @@ function error_exit {
|
|||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
#Ensure we pick the right jar even for hive11 builds
|
#Ensure we pick the right jar even for hive11 builds
|
||||||
HUDI_JAR=`ls -c $DIR/../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v source | head -1`
|
HUDI_JAR=`ls -c $DIR/../../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v sources | head -1`
|
||||||
|
|
||||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||||
echo "setting hadoop conf dir"
|
echo "setting hadoop conf dir"
|
||||||
@@ -23,7 +23,7 @@ function error_exit {
|
|||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
#Ensure we pick the right jar even for hive11 builds
|
#Ensure we pick the right jar even for hive11 builds
|
||||||
HUDI_JAR=`ls -c $DIR/../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v source | head -1`
|
HUDI_JAR=`ls -c $DIR/../../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v sources | head -1`
|
||||||
|
|
||||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||||
echo "setting hadoop conf dir"
|
echo "setting hadoop conf dir"
|
||||||
@@ -23,7 +23,7 @@ function error_exit {
|
|||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
#Ensure we pick the right jar even for hive11 builds
|
#Ensure we pick the right jar even for hive11 builds
|
||||||
HUDI_JAR=`ls -c $DIR/../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v source | head -1`
|
HUDI_JAR=`ls -c $DIR/../../packaging/hudi-spark-bundle/target/hudi-spark-bundle*.jar | grep -v sources | head -1`
|
||||||
|
|
||||||
if [ -z "$HADOOP_CONF_DIR" ]; then
|
if [ -z "$HADOOP_CONF_DIR" ]; then
|
||||||
echo "setting hadoop conf dir"
|
echo "setting hadoop conf dir"
|
||||||
@@ -18,8 +18,8 @@
|
|||||||
|
|
||||||
package org.apache.hudi.bootstrap;
|
package org.apache.hudi.bootstrap;
|
||||||
|
|
||||||
import org.apache.hudi.AvroConversionUtils;
|
|
||||||
import org.apache.hudi.DataSourceUtils;
|
import org.apache.hudi.DataSourceUtils;
|
||||||
|
import org.apache.hudi.HoodieSparkUtils;
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||||
import org.apache.hudi.avro.model.HoodieFileStatus;
|
import org.apache.hudi.avro.model.HoodieFileStatus;
|
||||||
import org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider;
|
import org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider;
|
||||||
@@ -65,7 +65,7 @@ public class SparkParquetBootstrapDataProvider extends FullRecordBootstrapDataPr
|
|||||||
KeyGenerator keyGenerator = DataSourceUtils.createKeyGenerator(props);
|
KeyGenerator keyGenerator = DataSourceUtils.createKeyGenerator(props);
|
||||||
String structName = tableName + "_record";
|
String structName = tableName + "_record";
|
||||||
String namespace = "hoodie." + tableName;
|
String namespace = "hoodie." + tableName;
|
||||||
RDD<GenericRecord> genericRecords = AvroConversionUtils.createRdd(inputDataset, structName, namespace);
|
RDD<GenericRecord> genericRecords = HoodieSparkUtils.createRdd(inputDataset, structName, namespace);
|
||||||
return genericRecords.toJavaRDD().map(gr -> {
|
return genericRecords.toJavaRDD().map(gr -> {
|
||||||
String orderingVal = HoodieAvroUtils.getNestedFieldValAsString(
|
String orderingVal = HoodieAvroUtils.getNestedFieldValAsString(
|
||||||
gr, props.getString("hoodie.datasource.write.precombine.field"), false);
|
gr, props.getString("hoodie.datasource.write.precombine.field"), false);
|
||||||
@@ -41,6 +41,7 @@ import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool}
|
|||||||
import org.apache.hudi.internal.HoodieDataSourceInternalWriter
|
import org.apache.hudi.internal.HoodieDataSourceInternalWriter
|
||||||
import org.apache.hudi.sync.common.AbstractSyncTool
|
import org.apache.hudi.sync.common.AbstractSyncTool
|
||||||
import org.apache.log4j.LogManager
|
import org.apache.log4j.LogManager
|
||||||
|
import org.apache.spark.SPARK_VERSION
|
||||||
import org.apache.spark.SparkContext
|
import org.apache.spark.SparkContext
|
||||||
import org.apache.spark.api.java.JavaSparkContext
|
import org.apache.spark.api.java.JavaSparkContext
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
@@ -129,6 +130,9 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
// scalastyle:off
|
// scalastyle:off
|
||||||
if (parameters(ENABLE_ROW_WRITER_OPT_KEY).toBoolean &&
|
if (parameters(ENABLE_ROW_WRITER_OPT_KEY).toBoolean &&
|
||||||
operation == WriteOperationType.BULK_INSERT) {
|
operation == WriteOperationType.BULK_INSERT) {
|
||||||
|
if (!SPARK_VERSION.startsWith("2.")) {
|
||||||
|
throw new HoodieException("Bulk insert using row writer is not supported with Spark 3. To use row writer please switch to spark 2.")
|
||||||
|
}
|
||||||
val (success, commitTime: common.util.Option[String]) = bulkInsertAsRow(sqlContext, parameters, df, tblName,
|
val (success, commitTime: common.util.Option[String]) = bulkInsertAsRow(sqlContext, parameters, df, tblName,
|
||||||
basePath, path, instantTime)
|
basePath, path, instantTime)
|
||||||
return (success, commitTime, common.util.Option.empty(), hoodieWriteClient.orNull, tableConfig)
|
return (success, commitTime, common.util.Option.empty(), hoodieWriteClient.orNull, tableConfig)
|
||||||
@@ -148,7 +152,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
|
|
||||||
// Convert to RDD[HoodieRecord]
|
// Convert to RDD[HoodieRecord]
|
||||||
val keyGenerator = DataSourceUtils.createKeyGenerator(toProperties(parameters))
|
val keyGenerator = DataSourceUtils.createKeyGenerator(toProperties(parameters))
|
||||||
val genericRecords: RDD[GenericRecord] = AvroConversionUtils.createRdd(df, schema, structName, nameSpace)
|
val genericRecords: RDD[GenericRecord] = HoodieSparkUtils.createRdd(df, schema, structName, nameSpace)
|
||||||
val shouldCombine = parameters(INSERT_DROP_DUPS_OPT_KEY).toBoolean || operation.equals(WriteOperationType.UPSERT);
|
val shouldCombine = parameters(INSERT_DROP_DUPS_OPT_KEY).toBoolean || operation.equals(WriteOperationType.UPSERT);
|
||||||
val hoodieAllIncomingRecords = genericRecords.map(gr => {
|
val hoodieAllIncomingRecords = genericRecords.map(gr => {
|
||||||
val hoodieRecord = if (shouldCombine) {
|
val hoodieRecord = if (shouldCombine) {
|
||||||
@@ -195,7 +199,7 @@ private[hudi] object HoodieSparkSqlWriter {
|
|||||||
|
|
||||||
// Convert to RDD[HoodieKey]
|
// Convert to RDD[HoodieKey]
|
||||||
val keyGenerator = DataSourceUtils.createKeyGenerator(toProperties(parameters))
|
val keyGenerator = DataSourceUtils.createKeyGenerator(toProperties(parameters))
|
||||||
val genericRecords: RDD[GenericRecord] = AvroConversionUtils.createRdd(df, structName, nameSpace)
|
val genericRecords: RDD[GenericRecord] = HoodieSparkUtils.createRdd(df, structName, nameSpace)
|
||||||
val hoodieKeysToDelete = genericRecords.map(gr => keyGenerator.getKey(gr)).toJavaRDD()
|
val hoodieKeysToDelete = genericRecords.map(gr => keyGenerator.getKey(gr)).toJavaRDD()
|
||||||
|
|
||||||
if (!tableExists) {
|
if (!tableExists) {
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi
|
||||||
|
|
||||||
|
import org.apache.avro.Schema
|
||||||
|
import org.apache.avro.generic.GenericRecord
|
||||||
|
import org.apache.hadoop.fs.{FileSystem, Path}
|
||||||
|
import org.apache.hudi.client.utils.SparkRowDeserializer
|
||||||
|
import org.apache.hudi.common.model.HoodieRecord
|
||||||
|
import org.apache.spark.SPARK_VERSION
|
||||||
|
import org.apache.spark.rdd.RDD
|
||||||
|
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
|
||||||
|
import org.apache.spark.sql.avro.SchemaConverters
|
||||||
|
import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
|
||||||
|
import org.apache.spark.sql.execution.datasources.{FileStatusCache, InMemoryFileIndex}
|
||||||
|
import org.apache.spark.sql.types.{StringType, StructField, StructType}
|
||||||
|
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
|
|
||||||
|
object HoodieSparkUtils {
|
||||||
|
|
||||||
|
def getMetaSchema: StructType = {
|
||||||
|
StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
|
||||||
|
StructField(col, StringType, nullable = true)
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method copied from [[org.apache.spark.deploy.SparkHadoopUtil]].
|
||||||
|
* [[org.apache.spark.deploy.SparkHadoopUtil]] becomes private since Spark 3.0.0 and hence we had to copy it locally.
|
||||||
|
*/
|
||||||
|
def isGlobPath(pattern: Path): Boolean = {
|
||||||
|
pattern.toString.exists("{}[]*?\\".toSet.contains)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method copied from [[org.apache.spark.deploy.SparkHadoopUtil]].
|
||||||
|
* [[org.apache.spark.deploy.SparkHadoopUtil]] becomes private since Spark 3.0.0 and hence we had to copy it locally.
|
||||||
|
*/
|
||||||
|
def globPath(fs: FileSystem, pattern: Path): Seq[Path] = {
|
||||||
|
Option(fs.globStatus(pattern)).map { statuses =>
|
||||||
|
statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq
|
||||||
|
}.getOrElse(Seq.empty[Path])
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method copied from [[org.apache.spark.deploy.SparkHadoopUtil]].
|
||||||
|
* [[org.apache.spark.deploy.SparkHadoopUtil]] becomes private since Spark 3.0.0 and hence we had to copy it locally.
|
||||||
|
*/
|
||||||
|
def globPathIfNecessary(fs: FileSystem, pattern: Path): Seq[Path] = {
|
||||||
|
if (isGlobPath(pattern)) globPath(fs, pattern) else Seq(pattern)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see whether input path contains a glob pattern and if yes, maps it to a list of absolute paths
|
||||||
|
* which match the glob pattern. Otherwise, returns original path
|
||||||
|
*
|
||||||
|
* @param paths List of absolute or globbed paths
|
||||||
|
* @param fs File system
|
||||||
|
* @return list of absolute file paths
|
||||||
|
*/
|
||||||
|
def checkAndGlobPathIfNecessary(paths: Seq[String], fs: FileSystem): Seq[Path] = {
|
||||||
|
paths.flatMap(path => {
|
||||||
|
val qualified = new Path(path).makeQualified(fs.getUri, fs.getWorkingDirectory)
|
||||||
|
val globPaths = globPathIfNecessary(fs, qualified)
|
||||||
|
globPaths
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
def createInMemoryFileIndex(sparkSession: SparkSession, globbedPaths: Seq[Path]): InMemoryFileIndex = {
|
||||||
|
val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
|
||||||
|
new InMemoryFileIndex(sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
|
||||||
|
}
|
||||||
|
|
||||||
|
def createRdd(df: DataFrame, structName: String, recordNamespace: String): RDD[GenericRecord] = {
|
||||||
|
val avroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
|
||||||
|
createRdd(df, avroSchema, structName, recordNamespace)
|
||||||
|
}
|
||||||
|
|
||||||
|
def createRdd(df: DataFrame, avroSchema: Schema, structName: String, recordNamespace: String)
|
||||||
|
: RDD[GenericRecord] = {
|
||||||
|
// Use the Avro schema to derive the StructType which has the correct nullability information
|
||||||
|
val dataType = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
|
||||||
|
val encoder = RowEncoder.apply(dataType).resolveAndBind()
|
||||||
|
val deserializer = HoodieSparkUtils.createDeserializer(encoder)
|
||||||
|
df.queryExecution.toRdd.map(row => deserializer.deserializeRow(row))
|
||||||
|
.mapPartitions { records =>
|
||||||
|
if (records.isEmpty) Iterator.empty
|
||||||
|
else {
|
||||||
|
val convertor = AvroConversionHelper.createConverterToAvro(dataType, structName, recordNamespace)
|
||||||
|
records.map { x => convertor(x).asInstanceOf[GenericRecord] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def createDeserializer(encoder: ExpressionEncoder[Row]): SparkRowDeserializer = {
|
||||||
|
// TODO remove Spark2RowDeserializer if Spark 2.x support is dropped
|
||||||
|
if (SPARK_VERSION.startsWith("2.")) {
|
||||||
|
new Spark2RowDeserializer(encoder)
|
||||||
|
} else {
|
||||||
|
new Spark3RowDeserializer(encoder)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -113,9 +113,6 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
|
|||||||
hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
|
hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
|
||||||
)
|
)
|
||||||
|
|
||||||
// Follow the implementation of Spark internal HadoopRDD to handle the broadcast configuration.
|
|
||||||
FileSystem.getLocal(jobConf)
|
|
||||||
SparkHadoopUtil.get.addCredentials(jobConf)
|
|
||||||
val rdd = new HoodieMergeOnReadRDD(
|
val rdd = new HoodieMergeOnReadRDD(
|
||||||
sqlContext.sparkContext,
|
sqlContext.sparkContext,
|
||||||
jobConf,
|
jobConf,
|
||||||
@@ -43,7 +43,7 @@ import org.apache.spark.sql.SaveMode;
|
|||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.apache.spark.sql.streaming.DataStreamWriter;
|
import org.apache.spark.sql.streaming.DataStreamWriter;
|
||||||
import org.apache.spark.sql.streaming.OutputMode;
|
import org.apache.spark.sql.streaming.OutputMode;
|
||||||
import org.apache.spark.sql.streaming.ProcessingTime;
|
import org.apache.spark.sql.streaming.Trigger;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
@@ -366,7 +366,7 @@ public class HoodieJavaStreamingApp {
|
|||||||
.outputMode(OutputMode.Append());
|
.outputMode(OutputMode.Append());
|
||||||
|
|
||||||
updateHiveSyncConfig(writer);
|
updateHiveSyncConfig(writer);
|
||||||
StreamingQuery query = writer.trigger(new ProcessingTime(500)).start(tablePath);
|
StreamingQuery query = writer.trigger(Trigger.ProcessingTime(500)).start(tablePath);
|
||||||
query.awaitTermination(streamingDurationInMs);
|
query.awaitTermination(streamingDurationInMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ import org.apache.hudi.testutils.HoodieClientTestBase
|
|||||||
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
|
import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
|
||||||
import org.apache.log4j.LogManager
|
import org.apache.log4j.LogManager
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.apache.spark.sql.streaming.{OutputMode, ProcessingTime}
|
import org.apache.spark.sql.streaming.{OutputMode, Trigger}
|
||||||
import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
|
import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
|
||||||
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
|
||||||
|
|
||||||
@@ -93,7 +93,7 @@ class TestStructuredStreaming extends HoodieClientTestBase {
|
|||||||
.writeStream
|
.writeStream
|
||||||
.format("org.apache.hudi")
|
.format("org.apache.hudi")
|
||||||
.options(commonOpts)
|
.options(commonOpts)
|
||||||
.trigger(new ProcessingTime(100))
|
.trigger(Trigger.ProcessingTime(100))
|
||||||
.option("checkpointLocation", basePath + "/checkpoint")
|
.option("checkpointLocation", basePath + "/checkpoint")
|
||||||
.outputMode(OutputMode.Append)
|
.outputMode(OutputMode.Append)
|
||||||
.start(destPath)
|
.start(destPath)
|
||||||
225
hudi-spark-datasource/hudi-spark2/pom.xml
Normal file
225
hudi-spark-datasource/hudi-spark2/pom.xml
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>hudi-spark-datasource</artifactId>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<version>0.6.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${parent.version}</version>
|
||||||
|
|
||||||
|
<name>hudi-spark2_${scala.binary.version}</name>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<main.basedir>${project.parent.parent.basedir}</main.basedir>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/resources</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>${scala-maven-plugin.version}</version>
|
||||||
|
<configuration>
|
||||||
|
<args>
|
||||||
|
<arg>-nobootcp</arg>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</pluginManagement>
|
||||||
|
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>copy-dependencies</id>
|
||||||
|
<phase>prepare-package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/lib</outputDirectory>
|
||||||
|
<overWriteReleases>true</overWriteReleases>
|
||||||
|
<overWriteSnapshots>true</overWriteSnapshots>
|
||||||
|
<overWriteIfNewer>true</overWriteIfNewer>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>scala-compile-first</id>
|
||||||
|
<phase>process-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-test-compile</id>
|
||||||
|
<phase>process-test-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>test-jar</goal>
|
||||||
|
</goals>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<skip>false</skip>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.scalastyle</groupId>
|
||||||
|
<artifactId>scalastyle-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.jacoco</groupId>
|
||||||
|
<artifactId>jacoco-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- Scala -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang</groupId>
|
||||||
|
<artifactId>scala-library</artifactId>
|
||||||
|
<version>${scala.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-client-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_${scala.binary.version}</artifactId>
|
||||||
|
<version>${spark2.version}</version>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty</artifactId>
|
||||||
|
<version>3.9.9.Final</version>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>io.netty</groupId>
|
||||||
|
<artifactId>netty-all</artifactId>
|
||||||
|
<version>4.1.17.Final</version>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Hoodie - Test -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-client-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
<classifier>tests</classifier>
|
||||||
|
<type>test-jar</type>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.junit.jupiter</groupId>
|
||||||
|
<artifactId>junit-jupiter-api</artifactId>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.client.utils.SparkRowDeserializer
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Row
|
||||||
|
import org.apache.spark.sql.catalyst.InternalRow
|
||||||
|
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||||
|
|
||||||
|
class Spark2RowDeserializer(val encoder: ExpressionEncoder[Row]) extends SparkRowDeserializer {
|
||||||
|
def deserializeRow(internalRow: InternalRow): Row = {
|
||||||
|
encoder.fromRow(internalRow)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -27,6 +27,7 @@ import org.apache.hudi.table.HoodieSparkTable;
|
|||||||
import org.apache.hudi.table.HoodieTable;
|
import org.apache.hudi.table.HoodieTable;
|
||||||
import org.apache.hudi.testutils.HoodieClientTestHarness;
|
import org.apache.hudi.testutils.HoodieClientTestHarness;
|
||||||
|
|
||||||
|
import org.apache.spark.package$;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Row;
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.catalyst.InternalRow;
|
import org.apache.spark.sql.catalyst.InternalRow;
|
||||||
@@ -34,7 +35,6 @@ import org.junit.jupiter.api.AfterEach;
|
|||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
@@ -51,6 +51,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
|
|||||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.junit.jupiter.api.Assertions.fail;
|
import static org.junit.jupiter.api.Assertions.fail;
|
||||||
|
import static org.junit.jupiter.api.Assumptions.assumeTrue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit tests {@link HoodieBulkInsertDataInternalWriter}.
|
* Unit tests {@link HoodieBulkInsertDataInternalWriter}.
|
||||||
@@ -61,6 +62,8 @@ public class TestHoodieBulkInsertDataInternalWriter extends HoodieClientTestHarn
|
|||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
|
// this test is only compatible with spark 2
|
||||||
|
assumeTrue(package$.MODULE$.SPARK_VERSION().startsWith("2."));
|
||||||
initSparkContexts("TestHoodieBulkInsertDataInternalWriter");
|
initSparkContexts("TestHoodieBulkInsertDataInternalWriter");
|
||||||
initPath();
|
initPath();
|
||||||
initFileSystem();
|
initFileSystem();
|
||||||
@@ -74,7 +77,7 @@ public class TestHoodieBulkInsertDataInternalWriter extends HoodieClientTestHarn
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDataInternalWriter() throws IOException {
|
public void testDataInternalWriter() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||||
@@ -119,7 +122,7 @@ public class TestHoodieBulkInsertDataInternalWriter extends HoodieClientTestHarn
|
|||||||
* to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk.
|
* to throw Global Error. Verify global error is set appropriately and only first batch of records are written to disk.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testGlobalFailure() throws IOException {
|
public void testGlobalFailure() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
|
||||||
@@ -165,7 +168,8 @@ public class TestHoodieBulkInsertDataInternalWriter extends HoodieClientTestHarn
|
|||||||
assertOutput(inputRows, result, instantTime, fileNames);
|
assertOutput(inputRows, result, instantTime, fileNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeRows(Dataset<Row> inputRows, HoodieBulkInsertDataInternalWriter writer) throws IOException {
|
private void writeRows(Dataset<Row> inputRows, HoodieBulkInsertDataInternalWriter writer)
|
||||||
|
throws Exception {
|
||||||
List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
|
List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
|
||||||
// issue writes
|
// issue writes
|
||||||
for (InternalRow internalRow : internalRows) {
|
for (InternalRow internalRow : internalRows) {
|
||||||
@@ -26,6 +26,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
|||||||
import org.apache.hudi.testutils.HoodieClientTestHarness;
|
import org.apache.hudi.testutils.HoodieClientTestHarness;
|
||||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||||
|
|
||||||
|
import org.apache.spark.package$;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Row;
|
import org.apache.spark.sql.Row;
|
||||||
import org.apache.spark.sql.catalyst.InternalRow;
|
import org.apache.spark.sql.catalyst.InternalRow;
|
||||||
@@ -34,7 +35,6 @@ import org.junit.jupiter.api.AfterEach;
|
|||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -49,6 +49,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
|||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||||
|
import static org.junit.jupiter.api.Assumptions.assumeTrue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit tests {@link HoodieDataSourceInternalWriter}.
|
* Unit tests {@link HoodieDataSourceInternalWriter}.
|
||||||
@@ -59,6 +60,8 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
|
// this test is only compatible with spark 2
|
||||||
|
assumeTrue(package$.MODULE$.SPARK_VERSION().startsWith("2."));
|
||||||
initSparkContexts("TestHoodieDataSourceInternalWriter");
|
initSparkContexts("TestHoodieDataSourceInternalWriter");
|
||||||
initPath();
|
initPath();
|
||||||
initFileSystem();
|
initFileSystem();
|
||||||
@@ -72,7 +75,7 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDataSourceWriter() throws IOException {
|
public void testDataSourceWriter() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
String instantTime = "001";
|
String instantTime = "001";
|
||||||
@@ -114,7 +117,7 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultipleDataSourceWrites() throws IOException {
|
public void testMultipleDataSourceWrites() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
int partitionCounter = 0;
|
int partitionCounter = 0;
|
||||||
@@ -158,7 +161,7 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLargeWrites() throws IOException {
|
public void testLargeWrites() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
int partitionCounter = 0;
|
int partitionCounter = 0;
|
||||||
@@ -208,7 +211,7 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
* verify only records from batch1 is available to read
|
* verify only records from batch1 is available to read
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testAbort() throws IOException {
|
public void testAbort() throws Exception {
|
||||||
// init config and table
|
// init config and table
|
||||||
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
HoodieWriteConfig cfg = getConfigBuilder(basePath).build();
|
||||||
|
|
||||||
@@ -274,7 +277,7 @@ public class TestHoodieDataSourceInternalWriter extends HoodieClientTestHarness
|
|||||||
assertOutput(totalInputRows, result, instantTime0);
|
assertOutput(totalInputRows, result, instantTime0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeRows(Dataset<Row> inputRows, DataWriter<InternalRow> writer) throws IOException {
|
private void writeRows(Dataset<Row> inputRows, DataWriter<InternalRow> writer) throws Exception {
|
||||||
List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
|
List<InternalRow> internalRows = toInternalRows(inputRows, ENCODER);
|
||||||
// issue writes
|
// issue writes
|
||||||
for (InternalRow internalRow : internalRows) {
|
for (InternalRow internalRow : internalRows) {
|
||||||
163
hudi-spark-datasource/hudi-spark3/pom.xml
Normal file
163
hudi-spark-datasource/hudi-spark3/pom.xml
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>hudi-spark-datasource</artifactId>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<version>0.6.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${parent.version}</version>
|
||||||
|
|
||||||
|
<name>hudi-spark3_2.12</name>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<main.basedir>${project.parent.parent.basedir}</main.basedir>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<resources>
|
||||||
|
<resource>
|
||||||
|
<directory>src/main/resources</directory>
|
||||||
|
</resource>
|
||||||
|
</resources>
|
||||||
|
<pluginManagement>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>${scala-maven-plugin.version}</version>
|
||||||
|
<configuration>
|
||||||
|
<args>
|
||||||
|
<arg>-nobootcp</arg>
|
||||||
|
</args>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</pluginManagement>
|
||||||
|
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>copy-dependencies</id>
|
||||||
|
<phase>prepare-package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/lib</outputDirectory>
|
||||||
|
<overWriteReleases>true</overWriteReleases>
|
||||||
|
<overWriteSnapshots>true</overWriteSnapshots>
|
||||||
|
<overWriteIfNewer>true</overWriteIfNewer>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>scala-compile-first</id>
|
||||||
|
<phase>process-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-test-compile</id>
|
||||||
|
<phase>process-test-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>test-jar</goal>
|
||||||
|
</goals>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<skip>false</skip>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.scalastyle</groupId>
|
||||||
|
<artifactId>scalastyle-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.jacoco</groupId>
|
||||||
|
<artifactId>jacoco-maven-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang</groupId>
|
||||||
|
<artifactId>scala-library</artifactId>
|
||||||
|
<version>${scala12.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-sql_2.12</artifactId>
|
||||||
|
<version>${spark3.version}</version>
|
||||||
|
<optional>true</optional>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-client</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
</project>
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi
|
||||||
|
|
||||||
|
import org.apache.hudi.client.utils.SparkRowDeserializer
|
||||||
|
|
||||||
|
import org.apache.spark.sql.Row
|
||||||
|
import org.apache.spark.sql.catalyst.InternalRow
|
||||||
|
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
|
||||||
|
|
||||||
|
class Spark3RowDeserializer(val encoder: ExpressionEncoder[Row]) extends SparkRowDeserializer {
|
||||||
|
|
||||||
|
private val deserializer: ExpressionEncoder.Deserializer[Row] = encoder.createDeserializer()
|
||||||
|
|
||||||
|
def deserializeRow(internalRow: InternalRow): Row = {
|
||||||
|
deserializer.apply(internalRow)
|
||||||
|
}
|
||||||
|
}
|
||||||
39
hudi-spark-datasource/pom.xml
Normal file
39
hudi-spark-datasource/pom.xml
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<parent>
|
||||||
|
<artifactId>hudi</artifactId>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<version>0.6.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<artifactId>hudi-spark-datasource</artifactId>
|
||||||
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<main.basedir>${project.parent.basedir}</main.basedir>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<modules>
|
||||||
|
<module>hudi-spark-common</module>
|
||||||
|
<module>hudi-spark</module>
|
||||||
|
<module>hudi-spark2</module>
|
||||||
|
<module>hudi-spark3</module>
|
||||||
|
</modules>
|
||||||
|
</project>
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.apache.hudi
|
|
||||||
|
|
||||||
import org.apache.hadoop.fs.{FileSystem, Path}
|
|
||||||
import org.apache.hudi.common.model.HoodieRecord
|
|
||||||
import org.apache.spark.deploy.SparkHadoopUtil
|
|
||||||
import org.apache.spark.sql.SparkSession
|
|
||||||
import org.apache.spark.sql.execution.datasources.{FileStatusCache, InMemoryFileIndex}
|
|
||||||
import org.apache.spark.sql.types.{StringType, StructField, StructType}
|
|
||||||
import scala.collection.JavaConverters._
|
|
||||||
|
|
||||||
|
|
||||||
object HoodieSparkUtils {
|
|
||||||
|
|
||||||
def getMetaSchema: StructType = {
|
|
||||||
StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
|
|
||||||
StructField(col, StringType, nullable = true)
|
|
||||||
}))
|
|
||||||
}
|
|
||||||
|
|
||||||
def checkAndGlobPathIfNecessary(paths: Seq[String], fs: FileSystem): Seq[Path] = {
|
|
||||||
paths.flatMap(path => {
|
|
||||||
val qualified = new Path(path).makeQualified(fs.getUri, fs.getWorkingDirectory)
|
|
||||||
val globPaths = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
|
|
||||||
globPaths
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
def createInMemoryFileIndex(sparkSession: SparkSession, globbedPaths: Seq[Path]): InMemoryFileIndex = {
|
|
||||||
val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
|
|
||||||
new InMemoryFileIndex(sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -118,6 +118,11 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||||
@@ -129,6 +134,16 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<!-- Kafka -->
|
<!-- Kafka -->
|
||||||
<dependency>
|
<dependency>
|
||||||
|
|||||||
@@ -55,7 +55,6 @@ import org.apache.hadoop.fs.FileSystem;
|
|||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.Accumulator;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
@@ -68,6 +67,7 @@ import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils;
|
|||||||
import org.apache.spark.sql.jdbc.JdbcDialect;
|
import org.apache.spark.sql.jdbc.JdbcDialect;
|
||||||
import org.apache.spark.sql.jdbc.JdbcDialects;
|
import org.apache.spark.sql.jdbc.JdbcDialects;
|
||||||
import org.apache.spark.sql.types.StructType;
|
import org.apache.spark.sql.types.StructType;
|
||||||
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@@ -292,7 +292,7 @@ public class UtilHelpers {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static int handleErrors(JavaSparkContext jsc, String instantTime, JavaRDD<WriteStatus> writeResponse) {
|
public static int handleErrors(JavaSparkContext jsc, String instantTime, JavaRDD<WriteStatus> writeResponse) {
|
||||||
Accumulator<Integer> errors = jsc.accumulator(0);
|
LongAccumulator errors = jsc.sc().longAccumulator();
|
||||||
writeResponse.foreach(writeStatus -> {
|
writeResponse.foreach(writeStatus -> {
|
||||||
if (writeStatus.hasErrors()) {
|
if (writeStatus.hasErrors()) {
|
||||||
errors.add(1);
|
errors.add(1);
|
||||||
|
|||||||
@@ -18,8 +18,8 @@
|
|||||||
|
|
||||||
package org.apache.hudi.utilities.deltastreamer;
|
package org.apache.hudi.utilities.deltastreamer;
|
||||||
|
|
||||||
import org.apache.hudi.AvroConversionUtils;
|
|
||||||
import org.apache.hudi.DataSourceUtils;
|
import org.apache.hudi.DataSourceUtils;
|
||||||
|
import org.apache.hudi.HoodieSparkUtils;
|
||||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||||
import org.apache.hudi.client.SparkRDDWriteClient;
|
import org.apache.hudi.client.SparkRDDWriteClient;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
@@ -342,7 +342,7 @@ public class DeltaSync implements Serializable {
|
|||||||
// pass in the schema for the Row-to-Avro conversion
|
// pass in the schema for the Row-to-Avro conversion
|
||||||
// to avoid nullability mismatch between Avro schema and Row schema
|
// to avoid nullability mismatch between Avro schema and Row schema
|
||||||
avroRDDOptional = transformed
|
avroRDDOptional = transformed
|
||||||
.map(t -> AvroConversionUtils.createRdd(
|
.map(t -> HoodieSparkUtils.createRdd(
|
||||||
t, this.userProvidedSchemaProvider.getTargetSchema(),
|
t, this.userProvidedSchemaProvider.getTargetSchema(),
|
||||||
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD());
|
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD());
|
||||||
schemaProvider = this.userProvidedSchemaProvider;
|
schemaProvider = this.userProvidedSchemaProvider;
|
||||||
@@ -356,7 +356,7 @@ public class DeltaSync implements Serializable {
|
|||||||
UtilHelpers.createRowBasedSchemaProvider(r.schema(), props, jssc)))
|
UtilHelpers.createRowBasedSchemaProvider(r.schema(), props, jssc)))
|
||||||
.orElse(dataAndCheckpoint.getSchemaProvider());
|
.orElse(dataAndCheckpoint.getSchemaProvider());
|
||||||
avroRDDOptional = transformed
|
avroRDDOptional = transformed
|
||||||
.map(t -> AvroConversionUtils.createRdd(
|
.map(t -> HoodieSparkUtils.createRdd(
|
||||||
t, HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD());
|
t, HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
package org.apache.hudi.utilities.deltastreamer;
|
package org.apache.hudi.utilities.deltastreamer;
|
||||||
|
|
||||||
import org.apache.hudi.AvroConversionUtils;
|
import org.apache.hudi.AvroConversionUtils;
|
||||||
|
import org.apache.hudi.HoodieSparkUtils;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.utilities.UtilHelpers;
|
import org.apache.hudi.utilities.UtilHelpers;
|
||||||
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
|
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
|
||||||
@@ -73,8 +74,8 @@ public final class SourceFormatAdapter {
|
|||||||
// If the source schema is specified through Avro schema,
|
// If the source schema is specified through Avro schema,
|
||||||
// pass in the schema for the Row-to-Avro conversion
|
// pass in the schema for the Row-to-Avro conversion
|
||||||
// to avoid nullability mismatch between Avro schema and Row schema
|
// to avoid nullability mismatch between Avro schema and Row schema
|
||||||
? AvroConversionUtils.createRdd(rdd, r.getSchemaProvider().getSourceSchema(),
|
? HoodieSparkUtils.createRdd(rdd, r.getSchemaProvider().getSourceSchema(),
|
||||||
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD() : AvroConversionUtils.createRdd(rdd,
|
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD() : HoodieSparkUtils.createRdd(rdd,
|
||||||
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD();
|
HOODIE_RECORD_STRUCT_NAME, HOODIE_RECORD_NAMESPACE).toJavaRDD();
|
||||||
})
|
})
|
||||||
.orElse(null)), r.getCheckpointForNextBatch(), r.getSchemaProvider());
|
.orElse(null)), r.getCheckpointForNextBatch(), r.getSchemaProvider());
|
||||||
|
|||||||
@@ -73,6 +73,8 @@
|
|||||||
<include>org.apache.hudi:hudi-spark-client</include>
|
<include>org.apache.hudi:hudi-spark-client</include>
|
||||||
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
|
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
|
||||||
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark2_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark3_2.12</include>
|
||||||
<include>org.apache.hudi:hudi-hive-sync</include>
|
<include>org.apache.hudi:hudi-hive-sync</include>
|
||||||
<include>org.apache.hudi:hudi-sync-common</include>
|
<include>org.apache.hudi:hudi-sync-common</include>
|
||||||
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
||||||
@@ -339,6 +341,18 @@
|
|||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-hdfs</artifactId>
|
<artifactId>hadoop-hdfs</artifactId>
|
||||||
|
|||||||
@@ -66,7 +66,10 @@
|
|||||||
<include>org.apache.hudi:hudi-common</include>
|
<include>org.apache.hudi:hudi-common</include>
|
||||||
<include>org.apache.hudi:hudi-client-common</include>
|
<include>org.apache.hudi:hudi-client-common</include>
|
||||||
<include>org.apache.hudi:hudi-spark-client</include>
|
<include>org.apache.hudi:hudi-spark-client</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark-common</include>
|
||||||
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark2_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark3_2.12</include>
|
||||||
<include>org.apache.hudi:hudi-hive-sync</include>
|
<include>org.apache.hudi:hudi-hive-sync</include>
|
||||||
<include>org.apache.hudi:hudi-sync-common</include>
|
<include>org.apache.hudi:hudi-sync-common</include>
|
||||||
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
||||||
@@ -220,11 +223,26 @@
|
|||||||
<artifactId>hudi-hive-sync</artifactId>
|
<artifactId>hudi-hive-sync</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-timeline-service</artifactId>
|
<artifactId>hudi-timeline-service</artifactId>
|
||||||
|
|||||||
@@ -69,7 +69,10 @@
|
|||||||
<include>org.apache.hudi:hudi-client-common</include>
|
<include>org.apache.hudi:hudi-client-common</include>
|
||||||
<include>org.apache.hudi:hudi-spark-client</include>
|
<include>org.apache.hudi:hudi-spark-client</include>
|
||||||
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
|
<include>org.apache.hudi:hudi-utilities_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark-common</include>
|
||||||
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
<include>org.apache.hudi:hudi-spark_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark2_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.hudi:hudi-spark3_2.12</include>
|
||||||
<include>org.apache.hudi:hudi-hive-sync</include>
|
<include>org.apache.hudi:hudi-hive-sync</include>
|
||||||
<include>org.apache.hudi:hudi-sync-common</include>
|
<include>org.apache.hudi:hudi-sync-common</include>
|
||||||
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
<include>org.apache.hudi:hudi-hadoop-mr</include>
|
||||||
@@ -105,6 +108,7 @@
|
|||||||
<include>io.prometheus:simpleclient_common</include>
|
<include>io.prometheus:simpleclient_common</include>
|
||||||
<include>com.yammer.metrics:metrics-core</include>
|
<include>com.yammer.metrics:metrics-core</include>
|
||||||
<include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
|
<include>org.apache.spark:spark-streaming-kafka-0-10_${scala.binary.version}</include>
|
||||||
|
<include>org.apache.spark:spark-token-provider-kafka-0-10_${scala.binary.version}</include>
|
||||||
<include>org.apache.kafka:kafka_${scala.binary.version}</include>
|
<include>org.apache.kafka:kafka_${scala.binary.version}</include>
|
||||||
<include>com.101tec:zkclient</include>
|
<include>com.101tec:zkclient</include>
|
||||||
<include>org.apache.kafka:kafka-clients</include>
|
<include>org.apache.kafka:kafka-clients</include>
|
||||||
@@ -227,11 +231,26 @@
|
|||||||
</exclusion>
|
</exclusion>
|
||||||
</exclusions>
|
</exclusions>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark-common</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark2_${scala.binary.version}</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.hudi</groupId>
|
||||||
|
<artifactId>hudi-spark3_2.12</artifactId>
|
||||||
|
<version>${project.version}</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hudi</groupId>
|
<groupId>org.apache.hudi</groupId>
|
||||||
<artifactId>hudi-utilities_${scala.binary.version}</artifactId>
|
<artifactId>hudi-utilities_${scala.binary.version}</artifactId>
|
||||||
|
|||||||
33
pom.xml
33
pom.xml
@@ -39,7 +39,7 @@
|
|||||||
<module>hudi-cli</module>
|
<module>hudi-cli</module>
|
||||||
<module>hudi-client</module>
|
<module>hudi-client</module>
|
||||||
<module>hudi-hadoop-mr</module>
|
<module>hudi-hadoop-mr</module>
|
||||||
<module>hudi-spark</module>
|
<module>hudi-spark-datasource</module>
|
||||||
<module>hudi-timeline-service</module>
|
<module>hudi-timeline-service</module>
|
||||||
<module>hudi-utilities</module>
|
<module>hudi-utilities</module>
|
||||||
<module>hudi-sync</module>
|
<module>hudi-sync</module>
|
||||||
@@ -84,6 +84,9 @@
|
|||||||
|
|
||||||
<java.version>1.8</java.version>
|
<java.version>1.8</java.version>
|
||||||
<fasterxml.version>2.6.7</fasterxml.version>
|
<fasterxml.version>2.6.7</fasterxml.version>
|
||||||
|
<fasterxml.jackson.databind.version>2.6.7.3</fasterxml.jackson.databind.version>
|
||||||
|
<fasterxml.jackson.module.scala.version>2.6.7.1</fasterxml.jackson.module.scala.version>
|
||||||
|
<fasterxml.jackson.dataformat.yaml.version>2.7.4</fasterxml.jackson.dataformat.yaml.version>
|
||||||
<kafka.version>2.0.0</kafka.version>
|
<kafka.version>2.0.0</kafka.version>
|
||||||
<glassfish.version>2.17</glassfish.version>
|
<glassfish.version>2.17</glassfish.version>
|
||||||
<parquet.version>1.10.1</parquet.version>
|
<parquet.version>1.10.1</parquet.version>
|
||||||
@@ -103,9 +106,12 @@
|
|||||||
<http.version>4.4.1</http.version>
|
<http.version>4.4.1</http.version>
|
||||||
<spark.version>2.4.4</spark.version>
|
<spark.version>2.4.4</spark.version>
|
||||||
<flink.version>1.11.2</flink.version>
|
<flink.version>1.11.2</flink.version>
|
||||||
|
<spark2.version>2.4.4</spark2.version>
|
||||||
|
<spark3.version>3.0.0</spark3.version>
|
||||||
<avro.version>1.8.2</avro.version>
|
<avro.version>1.8.2</avro.version>
|
||||||
<scala.version>2.11.12</scala.version>
|
<scala.version>2.11.12</scala.version>
|
||||||
<scala.binary.version>2.11</scala.binary.version>
|
<scala.binary.version>2.11</scala.binary.version>
|
||||||
|
<scala12.version>2.12.10</scala12.version>
|
||||||
<apache-rat-plugin.version>0.12</apache-rat-plugin.version>
|
<apache-rat-plugin.version>0.12</apache-rat-plugin.version>
|
||||||
<scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
|
<scala-maven-plugin.version>3.3.1</scala-maven-plugin.version>
|
||||||
<scalatest.version>3.0.1</scalatest.version>
|
<scalatest.version>3.0.1</scalatest.version>
|
||||||
@@ -432,7 +438,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
<version>${fasterxml.version}.3</version>
|
<version>${fasterxml.jackson.databind.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.datatype</groupId>
|
<groupId>com.fasterxml.jackson.datatype</groupId>
|
||||||
@@ -442,7 +448,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.fasterxml.jackson.module</groupId>
|
<groupId>com.fasterxml.jackson.module</groupId>
|
||||||
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
|
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
|
||||||
<version>${fasterxml.version}.1</version>
|
<version>${fasterxml.jackson.module.scala.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- Glassfish -->
|
<!-- Glassfish -->
|
||||||
@@ -1306,7 +1312,7 @@
|
|||||||
<profile>
|
<profile>
|
||||||
<id>scala-2.12</id>
|
<id>scala-2.12</id>
|
||||||
<properties>
|
<properties>
|
||||||
<scala.version>2.12.10</scala.version>
|
<scala.version>${scala12.version}</scala.version>
|
||||||
<scala.binary.version>2.12</scala.binary.version>
|
<scala.binary.version>2.12</scala.binary.version>
|
||||||
</properties>
|
</properties>
|
||||||
<activation>
|
<activation>
|
||||||
@@ -1341,6 +1347,25 @@
|
|||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<profile>
|
||||||
|
<id>spark3</id>
|
||||||
|
<properties>
|
||||||
|
<spark.version>${spark3.version}</spark.version>
|
||||||
|
<scala.version>${scala12.version}</scala.version>
|
||||||
|
<scala.binary.version>2.12</scala.binary.version>
|
||||||
|
<kafka.version>2.4.1</kafka.version>
|
||||||
|
<fasterxml.version>2.10.0</fasterxml.version>
|
||||||
|
<fasterxml.jackson.databind.version>2.10.0</fasterxml.jackson.databind.version>
|
||||||
|
<fasterxml.jackson.module.scala.version>2.10.0</fasterxml.jackson.module.scala.version>
|
||||||
|
<fasterxml.jackson.dataformat.yaml.version>2.10.0</fasterxml.jackson.dataformat.yaml.version>
|
||||||
|
</properties>
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>spark3</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
Reference in New Issue
Block a user