[HUDI-3168] Fixing null schema with empty commit in incremental relation (#4513)
This commit is contained in:
@@ -17,8 +17,9 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import java.util.stream.Collectors
|
||||
import org.apache.avro.Schema
|
||||
|
||||
import java.util.stream.Collectors
|
||||
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType}
|
||||
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
|
||||
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
|
||||
@@ -89,9 +90,14 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
} else {
|
||||
schemaResolver.getTableAvroSchemaWithoutMetadataFields()
|
||||
}
|
||||
if (tableSchema.getType == Schema.Type.NULL) {
|
||||
// if there is only one commit in the table and is an empty commit without schema, return empty RDD here
|
||||
StructType(Nil)
|
||||
} else {
|
||||
val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
|
||||
StructType(skeletonSchema.fields ++ dataSchema.fields)
|
||||
}
|
||||
}
|
||||
|
||||
private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key,
|
||||
DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty)
|
||||
@@ -99,6 +105,10 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
override def schema: StructType = usedSchema
|
||||
|
||||
override def buildScan(): RDD[Row] = {
|
||||
if (usedSchema == StructType(Nil)) {
|
||||
// if first commit in a table is an empty commit without schema, return empty RDD here
|
||||
sqlContext.sparkContext.emptyRDD[Row]
|
||||
} else {
|
||||
val regularFileIdToFullPath = mutable.HashMap[String, String]()
|
||||
var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]()
|
||||
|
||||
@@ -167,8 +177,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
.load()
|
||||
}
|
||||
|
||||
if (regularFileIdToFullPath.nonEmpty)
|
||||
{
|
||||
if (regularFileIdToFullPath.nonEmpty) {
|
||||
df = df.union(sqlContext.read.options(sOpts)
|
||||
.schema(usedSchema)
|
||||
.parquet(filteredRegularFullPaths.toList: _*)
|
||||
@@ -182,3 +191,4 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,6 +111,10 @@ public class S3EventsHoodieIncrSource extends HoodieIncrSource {
|
||||
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
|
||||
Dataset<Row> source = metaReader.load(srcPath);
|
||||
|
||||
if (source.isEmpty()) {
|
||||
return Pair.of(Option.empty(), instantEndpts.getRight());
|
||||
}
|
||||
|
||||
String filter = "s3.object.size > 0";
|
||||
if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) {
|
||||
filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'";
|
||||
|
||||
Reference in New Issue
Block a user