1
0

[HUDI-3168] Fixing null schema with empty commit in incremental relation (#4513)

This commit is contained in:
Vinish Reddy
2022-01-05 22:13:10 +05:30
committed by GitHub
parent 75133f9942
commit eee715b3ff
2 changed files with 88 additions and 74 deletions

View File

@@ -17,8 +17,9 @@
package org.apache.hudi package org.apache.hudi
import java.util.stream.Collectors import org.apache.avro.Schema
import java.util.stream.Collectors
import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType} import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType}
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver} import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline} import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
@@ -89,9 +90,14 @@ class IncrementalRelation(val sqlContext: SQLContext,
} else { } else {
schemaResolver.getTableAvroSchemaWithoutMetadataFields() schemaResolver.getTableAvroSchemaWithoutMetadataFields()
} }
if (tableSchema.getType == Schema.Type.NULL) {
// if there is only one commit in the table and is an empty commit without schema, return empty RDD here
StructType(Nil)
} else {
val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema) val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
StructType(skeletonSchema.fields ++ dataSchema.fields) StructType(skeletonSchema.fields ++ dataSchema.fields)
} }
}
private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key, private val filters = optParams.getOrElse(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.key,
DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty) DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS.defaultValue).split(",").filter(!_.isEmpty)
@@ -99,6 +105,10 @@ class IncrementalRelation(val sqlContext: SQLContext,
override def schema: StructType = usedSchema override def schema: StructType = usedSchema
override def buildScan(): RDD[Row] = { override def buildScan(): RDD[Row] = {
if (usedSchema == StructType(Nil)) {
// if first commit in a table is an empty commit without schema, return empty RDD here
sqlContext.sparkContext.emptyRDD[Row]
} else {
val regularFileIdToFullPath = mutable.HashMap[String, String]() val regularFileIdToFullPath = mutable.HashMap[String, String]()
var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]() var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]()
@@ -167,8 +177,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
.load() .load()
} }
if (regularFileIdToFullPath.nonEmpty) if (regularFileIdToFullPath.nonEmpty) {
{
df = df.union(sqlContext.read.options(sOpts) df = df.union(sqlContext.read.options(sOpts)
.schema(usedSchema) .schema(usedSchema)
.parquet(filteredRegularFullPaths.toList: _*) .parquet(filteredRegularFullPaths.toList: _*)
@@ -182,3 +191,4 @@ class IncrementalRelation(val sqlContext: SQLContext,
} }
} }
} }
}

View File

@@ -111,6 +111,10 @@ public class S3EventsHoodieIncrSource extends HoodieIncrSource {
.option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight()); .option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
Dataset<Row> source = metaReader.load(srcPath); Dataset<Row> source = metaReader.load(srcPath);
if (source.isEmpty()) {
return Pair.of(Option.empty(), instantEndpts.getRight());
}
String filter = "s3.object.size > 0"; String filter = "s3.object.size > 0";
if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) { if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) {
filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'"; filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'";