[HUDI-1040] Make Hudi support Spark 3 (#2208)
* Fix flaky MOR unit test * Update Spark APIs to make it be compatible with both spark2 & spark3 * Refactor bulk insert v2 part to make Hudi be able to compile with Spark3 * Add spark3 profile to handle fasterxml & spark version * Create hudi-spark-common module & refactor hudi-spark related modules Co-authored-by: Wenning Ding <wenningd@amazon.com>
This commit is contained in:
@@ -20,7 +20,7 @@ package org.apache.hudi.integ.testsuite.reader;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
@@ -49,7 +49,7 @@ public class SparkBasedReader {
|
||||
.option(AVRO_SCHEMA_OPTION_KEY, schemaStr)
|
||||
.load(JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq());
|
||||
|
||||
return AvroConversionUtils
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE))
|
||||
.toJavaRDD();
|
||||
@@ -61,7 +61,7 @@ public class SparkBasedReader {
|
||||
Dataset<Row> dataSet = sparkSession.read()
|
||||
.parquet((JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq()));
|
||||
|
||||
return AvroConversionUtils
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
||||
.toJavaRDD();
|
||||
|
||||
Reference in New Issue
Block a user