[HUDI-1129] Improving schema evolution support in hudi (#2927)
* Adding support to ingest records with old schema after table's schema is evolved * Rebasing against latest master - Trimming test file to be < 800 lines - Renaming config names * Addressing feedback Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
committed by
GitHub
parent
73d898322b
commit
1196736185
@@ -18,15 +18,18 @@
|
||||
|
||||
package org.apache.hudi.integ.testsuite.reader;
|
||||
|
||||
import java.util.List;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.HoodieSparkUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.utilities.schema.RowBasedSchemaProvider;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.sql.Dataset;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import scala.collection.JavaConverters;
|
||||
|
||||
|
||||
@@ -51,7 +54,7 @@ public class SparkBasedReader {
|
||||
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE))
|
||||
nameSpace.orElse(RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE), false, Option.empty())
|
||||
.toJavaRDD();
|
||||
}
|
||||
|
||||
@@ -63,7 +66,7 @@ public class SparkBasedReader {
|
||||
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE, false, Option.empty())
|
||||
.toJavaRDD();
|
||||
}
|
||||
|
||||
@@ -73,10 +76,11 @@ public class SparkBasedReader {
|
||||
Dataset<Row> dataSet = sparkSession.read()
|
||||
.orc((JavaConverters.asScalaIteratorConverter(listOfPaths.iterator()).asScala().toSeq()));
|
||||
|
||||
return HoodieSparkUtils
|
||||
.createRdd(dataSet.toDF(), structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE)
|
||||
.toJavaRDD();
|
||||
return HoodieSparkUtils.createRdd(dataSet.toDF(),
|
||||
structName.orElse(RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME),
|
||||
RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE,
|
||||
false, Option.empty()
|
||||
).toJavaRDD();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user