1
0

Fix bug with incrementally pulling older data

This commit is contained in:
Vinoth Chandar
2018-09-14 12:51:27 +05:30
committed by vinoth chandar
parent bd5af89f12
commit f44bcc5b03
2 changed files with 18 additions and 2 deletions

View File

@@ -88,8 +88,12 @@ class IncrementalRelation(val sqlContext: SQLContext,
.get, classOf[HoodieCommitMetadata])
fileIdToFullPath ++= metadata.getFileIdAndFullPaths(basePath).toMap
}
// unset the path filter, otherwise if end_instant_time is not the latest instant, path filter set for RO view
// will filter out all the files incorrectly.
sqlContext.sparkContext.hadoopConfiguration.unset("mapreduce.input.pathFilter.class");
val sOpts = optParams.filter(p => !p._1.equalsIgnoreCase("path"))
sqlContext.read.options(sOpts)
.schema(latestSchema) // avoid AnalysisException for empty input
.parquet(fileIdToFullPath.values.toList: _*)
.filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp))
.filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp))