[HUDI-3168] Fixing null schema with empty commit in incremental relation (#4513)
This commit is contained in:
@@ -17,8 +17,9 @@
|
|||||||
|
|
||||||
package org.apache.hudi
|
package org.apache.hudi
|
||||||
|
|
||||||
import java.util.stream.Collectors
|
import org.apache.avro.Schema
|
||||||
|
|
||||||
|
import java.util.stream.Collectors
|
||||||
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType}
|
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType}
|
||||||
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
|
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
|
||||||
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
|
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
|
||||||
@@ -89,9 +90,14 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
|||||||
} else {
|
} else {
|
||||||
schemaResolver.getTableAvroSchemaWithoutMetadataFields()
|
schemaResolver.getTableAvroSchemaWithoutMetadataFields()
|
||||||
}
|
}
|
||||||
|
if (tableSchema.getType == Schema.Type.NULL) {
|
||||||
|
// if there is only one commit in the table and is an empty commit without schema, return empty RDD here
|
||||||
|
StructType(Nil)
|
||||||
|
} else {
|
||||||
val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
|
val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
|
||||||
StructType(skeletonSchema.fields ++ dataSchema.fields)
|
StructType(skeletonSchema.fields ++ dataSchema.fields)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key,
|
private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key,
|
||||||
DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty)
|
DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty)
|
||||||
@@ -99,6 +105,10 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
|||||||
override def schema: StructType = usedSchema
|
override def schema: StructType = usedSchema
|
||||||
|
|
||||||
override def buildScan(): RDD[Row] = {
|
override def buildScan(): RDD[Row] = {
|
||||||
|
if (usedSchema == StructType(Nil)) {
|
||||||
|
// if first commit in a table is an empty commit without schema, return empty RDD here
|
||||||
|
sqlContext.sparkContext.emptyRDD[Row]
|
||||||
|
} else {
|
||||||
val regularFileIdToFullPath = mutable.HashMap[String, String]()
|
val regularFileIdToFullPath = mutable.HashMap[String, String]()
|
||||||
var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]()
|
var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]()
|
||||||
|
|
||||||
@@ -140,7 +150,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
|||||||
DataSourceReadOptions.INCR_PATH_GLOB.key,
|
DataSourceReadOptions.INCR_PATH_GLOB.key,
|
||||||
DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)
|
DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)
|
||||||
val (filteredRegularFullPaths, filteredMetaBootstrapFullPaths) = {
|
val (filteredRegularFullPaths, filteredMetaBootstrapFullPaths) = {
|
||||||
if(!pathGlobPattern.equals(DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)) {
|
if (!pathGlobPattern.equals(DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)) {
|
||||||
val globMatcher = new GlobPattern("*" + pathGlobPattern)
|
val globMatcher = new GlobPattern("*" + pathGlobPattern)
|
||||||
(regularFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values,
|
(regularFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values,
|
||||||
metaBootstrapFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values)
|
metaBootstrapFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values)
|
||||||
@@ -167,8 +177,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
|||||||
.load()
|
.load()
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regularFileIdToFullPath.nonEmpty)
|
if (regularFileIdToFullPath.nonEmpty) {
|
||||||
{
|
|
||||||
df = df.union(sqlContext.read.options(sOpts)
|
df = df.union(sqlContext.read.options(sOpts)
|
||||||
.schema(usedSchema)
|
.schema(usedSchema)
|
||||||
.parquet(filteredRegularFullPaths.toList: _*)
|
.parquet(filteredRegularFullPaths.toList: _*)
|
||||||
@@ -181,4 +190,5 @@ class IncrementalRelation(val sqlContext: SQLContext,
|
|||||||
filters.foldLeft(df)((e, f) => e.filter(f)).rdd
|
filters.foldLeft(df)((e, f) => e.filter(f)).rdd
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -111,6 +111,10 @@ public class S3EventsHoodieIncrSource extends HoodieIncrSource {
|
|||||||
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
|
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
|
||||||
Dataset<Row> source = metaReader.load(srcPath);
|
Dataset<Row> source = metaReader.load(srcPath);
|
||||||
|
|
||||||
|
if (source.isEmpty()) {
|
||||||
|
return Pair.of(Option.empty(), instantEndpts.getRight());
|
||||||
|
}
|
||||||
|
|
||||||
String filter = "s3.object.size > 0";
|
String filter = "s3.object.size > 0";
|
||||||
if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) {
|
if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) {
|
||||||
filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'";
|
filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'";
|
||||||
|
|||||||
Reference in New Issue
Block a user