[HUDI-1040] Make Hudi support Spark 3 (#2208)
* Fix flaky MOR unit test * Update Spark APIs to make it be compatible with both spark2 & spark3 * Refactor bulk insert v2 part to make Hudi be able to compile with Spark3 * Add spark3 profile to handle fasterxml & spark version * Create hudi-spark-common module & refactor hudi-spark related modules Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -20,7 +20,7 @@ package org.apache.hudi.integ.testsuite.reader;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
@@ -49,7 +49,7 @@ public class SparkBasedReader {
|
||||
.option(AVRO_SCHEMA_OPTION_KEY, schemaStr)
|
||||
.load(JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq());
|
||||
|
||||
return AvroConversionUtils
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE))
|
||||
.toJavaRDD();
|
||||
@@ -61,7 +61,7 @@ public class SparkBasedReader {
|
||||
Dataset<Row> dataSet = sparkSession.read()
|
||||
.parquet((JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq()));
|
||||
|
||||
return AvroConversionUtils
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
||||
.toJavaRDD();
|
||||
|
||||
@@ -61,9 +61,9 @@ public abstract class ITTestBase {
|
||||
protected static final String HIVESERVER = "/hiveserver";
|
||||
protected static final String PRESTO_COORDINATOR = "/presto-coordinator-1";
|
||||
protected static final String HOODIE_WS_ROOT = "/var/hoodie/ws";
|
||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_app.sh";
|
||||
protected static final String HOODIE_GENERATE_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_generate_app.sh";
|
||||
protected static final String HOODIE_JAVA_STREAMING_APP = HOODIE_WS_ROOT + "/hudi-spark/run_hoodie_streaming_app.sh";
|
||||
protected static final String HOODIE_JAVA_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_app.sh";
|
||||
protected static final String HOODIE_GENERATE_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_generate_app.sh";
|
||||
protected static final String HOODIE_JAVA_STREAMING_APP = HOODIE_WS_ROOT + "/hudi-spark-datasource/hudi-spark/run_hoodie_streaming_app.sh";
|
||||
protected static final String HUDI_HADOOP_BUNDLE =
|
||||
HOODIE_WS_ROOT + "/docker/hoodie/hadoop/hive_base/target/hoodie-hadoop-mr-bundle.jar";
|
||||
protected static final String HUDI_HIVE_SYNC_BUNDLE =
|
||||
|
||||
Reference in New Issue
Block a user