[HUDI-3457] Refactored Spark DataSource Relations to avoid code duplication (#4877)

Refactoring Spark DataSource Relations to avoid code duplication. Following Relations were in scope: - BaseFileOnlyViewRelation - MergeOnReadSnapshotRelaation - MergeOnReadIncrementalRelation
2022-03-18 22:32:16 -07:00
parent 316e38c71e
commit 099c2c099a
22 changed files with 1051 additions and 572 deletions
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala
@@ -0,0 +1,29 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hudi
 object HoodieConversionUtils {
  def toJavaOption[T](opt: Option[T]): org.apache.hudi.common.util.Option[T] =
    if (opt.isDefined) org.apache.hudi.common.util.Option.of(opt.get) else org.apache.hudi.common.util.Option.empty()
  def toScalaOption[T](opt: org.apache.hudi.common.util.Option[T]): Option[T] =
    if (opt.isPresent) Some(opt.get) else None
 }
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
@@ -118,11 +118,6 @@ object HoodieSparkUtils extends SparkAdapterSupport {
    })
  }
  def createInMemoryFileIndex(sparkSession: SparkSession, globbedPaths: Seq[Path]): InMemoryFileIndex = {
    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
    new InMemoryFileIndex(sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
  }
  /**
   * @deprecated please use other overload [[createRdd]]
   */
--- a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -198,7 +198,7 @@ public abstract class BaseHoodieTableFileIndex {
    //       that is under the pending compaction process, new log-file will bear the compaction's instant (on the
    //       timeline) in its name, as opposed to the base-file's commit instant. To make sure we're not filtering
    //       such log-file we have to _always_ include pending compaction instants into consideration
-    // TODO(HUDI-3302) re-evaluate whether we should not filter any commits in here
+    // TODO(HUDI-3302) re-evaluate whether we should filter any commits in here
    HoodieTimeline timeline = metaClient.getCommitsAndCompactionTimeline();
    if (shouldIncludePendingCommits) {
      return timeline;
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -509,7 +509,7 @@ public class TableSchemaResolver {
   * @return
   */
  public static MessageType readSchemaFromLogFile(FileSystem fs, Path path) throws IOException {
-    Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(path), null);
+    try (Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(path), null)) {
      HoodieDataBlock lastBlock = null;
      while (reader.hasNext()) {
        HoodieLogBlock block = reader.next();
@@ -517,11 +517,8 @@ public class TableSchemaResolver {
          lastBlock = (HoodieDataBlock) block;
        }
      }
-    reader.close();
+      return lastBlock != null ? new AvroSchemaConverter().convert(lastBlock.getSchema()) : null;
    if (lastBlock != null) {
      return new AvroSchemaConverter().convert(lastBlock.getSchema());
    }
    return null;
  }
  public boolean isHasOperationField() {
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
@@ -19,21 +19,11 @@
 package org.apache.hudi.hadoop.utils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit;
 import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit;
 import org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo;
@@ -41,14 +31,6 @@ import org.apache.hudi.hadoop.realtime.RealtimeSplit;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import static org.apache.hudi.TypeUtils.unsafeCast;
 public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
@@ -67,41 +49,6 @@ public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
    return false;
  }
  // Return parquet file with a list of log files in the same file group.
  public static List<Pair<Option<HoodieBaseFile>, List<HoodieLogFile>>> groupLogsByBaseFile(Configuration conf, List<Path> partitionPaths) {
    Set<Path> partitionSet = new HashSet<>(partitionPaths);
    // TODO(vc): Should we handle also non-hoodie splits here?
    Map<Path, HoodieTableMetaClient> partitionsToMetaClient = getTableMetaClientByPartitionPath(conf, partitionSet);
    // Get all the base file and it's log files pairs in required partition paths.
    List<Pair<Option<HoodieBaseFile>, List<HoodieLogFile>>> baseAndLogsList = new ArrayList<>();
    partitionSet.forEach(partitionPath -> {
      // for each partition path obtain the data & log file groupings, then map back to inputsplits
      HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
      HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline());
      String relPartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), partitionPath);
      try {
        // Both commit and delta-commits are included - pick the latest completed one
        Option<HoodieInstant> latestCompletedInstant =
            metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants().lastInstant();
        Stream<FileSlice> latestFileSlices = latestCompletedInstant
            .map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))
            .orElse(Stream.empty());
        latestFileSlices.forEach(fileSlice -> {
          List<HoodieLogFile> logFilePaths = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
          baseAndLogsList.add(Pair.of(fileSlice.getBaseFile(), logFilePaths));
        });
      } catch (Exception e) {
        throw new HoodieException("Error obtaining data file/log file grouping: " + partitionPath, e);
      }
    });
    return baseAndLogsList;
  }
  /**
   * Add a field to the existing fields projected.
   */
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyRelation.scala
@@ -0,0 +1,94 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hudi
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.spark.sql.{HoodieCatalystExpressionUtils, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.{BaseRelation, Filter}
 import org.apache.spark.sql.types.StructType
 /**
 * [[BaseRelation]] implementation only reading Base files of Hudi tables, essentially supporting following querying
 * modes:
 * <ul>
 * <li>For COW tables: Snapshot</li>
 * <li>For MOR tables: Read-optimized</li>
 * </ul>
 *
 * NOTE: The reason this Relation is used in liue of Spark's default [[HadoopFsRelation]] is primarily due to the
 * fact that it injects real partition's path as the value of the partition field, which Hudi ultimately persists
 * as part of the record payload. In some cases, however, partition path might not necessarily be equal to the
 * verbatim value of the partition path field (when custom [[KeyGenerator]] is used) therefore leading to incorrect
 * partition field values being written
 */
 class BaseFileOnlyRelation(sqlContext: SQLContext,
                           metaClient: HoodieTableMetaClient,
                           optParams: Map[String, String],
                           userSchema: Option[StructType],
                           globPaths: Seq[Path])
  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) with SparkAdapterSupport {
  override type FileSplit = HoodieBaseFileSplit
  protected override def composeRDD(fileSplits: Seq[HoodieBaseFileSplit],
                                    partitionSchema: StructType,
                                    tableSchema: HoodieTableSchema,
                                    requiredSchema: HoodieTableSchema,
                                    filters: Array[Filter]): HoodieUnsafeRDD = {
    val baseFileReader = createBaseFileReader(
      spark = sparkSession,
      partitionSchema = partitionSchema,
      tableSchema = tableSchema,
      requiredSchema = requiredSchema,
      filters = filters,
      options = optParams,
      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
      //       to configure Parquet reader appropriately
      hadoopConf = new Configuration(conf)
    )
    new HoodieFileScanRDD(sparkSession, baseFileReader, fileSplits)
  }
  protected def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[HoodieBaseFileSplit] = {
    val partitions = listLatestBaseFiles(globPaths, partitionFilters, dataFilters)
    val fileSplits = partitions.values.toSeq.flatMap { files =>
      files.flatMap { file =>
        // TODO move to adapter
        // TODO fix, currently assuming parquet as underlying format
        HoodieDataSourceHelper.splitFiles(
          sparkSession = sparkSession,
          file = file,
          // TODO clarify why this is required
          partitionValues = InternalRow.empty
        )
      }
    }
    val maxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
    sparkAdapter.getFilePartitions(sparkSession, fileSplits, maxSplitBytes).map(HoodieBaseFileSplit.apply)
  }
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyViewRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyViewRelation.scala
@@ -1,141 +0,0 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hudi
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.hadoop.HoodieROTablePathFilter
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{HoodieCatalystExpressionUtils, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.{BaseRelation, Filter}
 import org.apache.spark.sql.types.StructType
 /**
 * [[BaseRelation]] implementation only reading Base files of Hudi tables, essentially supporting following querying
 * modes:
 * <ul>
 * <li>For COW tables: Snapshot</li>
 * <li>For MOR tables: Read-optimized</li>
 * </ul>
 *
 * NOTE: The reason this Relation is used in liue of Spark's default [[HadoopFsRelation]] is primarily due to the
 * fact that it injects real partition's path as the value of the partition field, which Hudi ultimately persists
 * as part of the record payload. In some cases, however, partition path might not necessarily be equal to the
 * verbatim value of the partition path field (when custom [[KeyGenerator]] is used) therefore leading to incorrect
 * partition field values being written
 */
 class BaseFileOnlyViewRelation(sqlContext: SQLContext,
                               metaClient: HoodieTableMetaClient,
                               optParams: Map[String, String],
                               userSchema: Option[StructType],
                               globPaths: Seq[Path])
  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) with SparkAdapterSupport {
  private val fileIndex = HoodieFileIndex(sparkSession, metaClient, userSchema, optParams,
    FileStatusCache.getOrCreate(sqlContext.sparkSession))
  override def doBuildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[InternalRow] = {
    // NOTE: In case list of requested columns doesn't contain the Primary Key one, we
    //       have to add it explicitly so that
    //          - Merging could be performed correctly
    //          - In case 0 columns are to be fetched (for ex, when doing {@code count()} on Spark's [[Dataset]],
    //          Spark still fetches all the rows to execute the query correctly
    //
    //       It's okay to return columns that have not been requested by the caller, as those nevertheless will be
    //       filtered out upstream
    val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
    val (requiredAvroSchema, requiredStructSchema) =
      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
    val filterExpressions = convertToExpressions(filters)
    val (partitionFilters, dataFilters) = HoodieCatalystExpressionUtils.splitPartitionAndDataPredicates(
      sparkSession, filterExpressions, partitionColumns)
    val filePartitions = getPartitions(partitionFilters, dataFilters)
    val partitionSchema = StructType(Nil)
    val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
    val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
    val baseFileReader = createBaseFileReader(
      spark = sparkSession,
      partitionSchema = partitionSchema,
      tableSchema = tableSchema,
      requiredSchema = requiredSchema,
      filters = filters,
      options = optParams,
      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
      //       to configure Parquet reader appropriately
      hadoopConf = new Configuration(conf)
    )
    new HoodieFileScanRDD(sparkSession, baseFileReader, filePartitions)
  }
  private def getPartitions(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[FilePartition] = {
    val partitionDirectories = if (globPaths.isEmpty) {
      val hoodieFileIndex = HoodieFileIndex(sparkSession, metaClient, userSchema, optParams,
        FileStatusCache.getOrCreate(sqlContext.sparkSession))
      hoodieFileIndex.listFiles(partitionFilters, dataFilters)
    } else {
      sqlContext.sparkContext.hadoopConfiguration.setClass(
        "mapreduce.input.pathFilter.class",
        classOf[HoodieROTablePathFilter],
        classOf[org.apache.hadoop.fs.PathFilter])
      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sparkSession, globPaths)
      inMemoryFileIndex.listFiles(partitionFilters, dataFilters)
    }
    val partitions = partitionDirectories.flatMap { partition =>
      partition.files.flatMap { file =>
        // TODO move to adapter
        // TODO fix, currently assuming parquet as underlying format
        HoodieDataSourceHelper.splitFiles(
          sparkSession = sparkSession,
          file = file,
          // TODO clarify why this is required
          partitionValues = InternalRow.empty
        )
      }
    }
    val maxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
    sparkAdapter.getFilePartitions(sparkSession, partitions, maxSplitBytes)
  }
  private def convertToExpressions(filters: Array[Filter]): Array[Expression] = {
    val catalystExpressions = HoodieSparkUtils.convertToCatalystExpressions(filters, tableStructSchema)
    val failedExprs = catalystExpressions.zipWithIndex.filter { case (opt, _) => opt.isEmpty }
    if (failedExprs.nonEmpty) {
      val failedFilters = failedExprs.map(p => filters(p._2))
      logWarning(s"Failed to convert Filters into Catalyst expressions (${failedFilters.map(_.toString)})")
    }
    catalystExpressions.filter(_.isDefined).map(_.get).toArray
  }
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -107,7 +107,7 @@ class DefaultSource extends RelationProvider
        case (COPY_ON_WRITE, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) |
             (COPY_ON_WRITE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) |
             (MERGE_ON_READ, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) =>
-          new BaseFileOnlyViewRelation(sqlContext, metaClient, parameters, userSchema, globPaths)
+          new BaseFileOnlyRelation(sqlContext, metaClient, parameters, userSchema, globPaths)
        case (COPY_ON_WRITE, QUERY_TYPE_INCREMENTAL_OPT_VAL, _) =>
          new IncrementalRelation(sqlContext, parameters, userSchema, metaClient)
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
@@ -20,22 +20,28 @@ package org.apache.hudi
 import org.apache.avro.Schema
 import org.apache.avro.generic.GenericRecord
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path, PathFilter}
 import org.apache.hadoop.hbase.io.hfile.CacheConfig
 import org.apache.hadoop.mapred.JobConf
-import org.apache.hudi.HoodieBaseRelation.isMetadataTable
+import org.apache.hudi.HoodieBaseRelation.{getPartitionPath, isMetadataTable}
 import org.apache.hudi.HoodieConversionUtils.toScalaOption
 import org.apache.hudi.common.config.SerializableConfiguration
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
-import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.util.StringUtils
 import org.apache.hudi.hadoop.HoodieROTablePathFilter
 import org.apache.hudi.io.storage.HoodieHFileReader
 import org.apache.hudi.metadata.{HoodieMetadataPayload, HoodieTableMetadata}
 import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.avro.SchemaConverters
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
 import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionDirectory, PartitionedFile}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
 import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}
 import org.apache.spark.sql.types.StructType
@@ -44,6 +50,8 @@ import org.apache.spark.sql.{Row, SQLContext, SparkSession}
 import scala.collection.JavaConverters._
 import scala.util.Try
 trait HoodieFileSplit {}
 case class HoodieTableSchema(structTypeSchema: StructType, avroSchemaStr: String)
 case class HoodieTableState(recordKeyField: String,
@@ -53,36 +61,33 @@ case class HoodieTableState(recordKeyField: String,
 * Hoodie BaseRelation which extends [[PrunedFilteredScan]].
 */
 abstract class HoodieBaseRelation(val sqlContext: SQLContext,
-                                  metaClient: HoodieTableMetaClient,
+                                  val metaClient: HoodieTableMetaClient,
-                                  optParams: Map[String, String],
+                                  val optParams: Map[String, String],
                                  userSchema: Option[StructType])
  extends BaseRelation with PrunedFilteredScan with Logging {
  type FileSplit <: HoodieFileSplit
  imbueConfigs(sqlContext)
  protected val sparkSession: SparkSession = sqlContext.sparkSession
  protected lazy val conf: Configuration = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
  protected lazy val jobConf = new JobConf(conf)
  protected lazy val tableConfig: HoodieTableConfig = metaClient.getTableConfig
  protected lazy val basePath: String = metaClient.getBasePath
  // If meta fields are enabled, always prefer key from the meta field as opposed to user-specified one
  // NOTE: This is historical behavior which is preserved as is
  protected lazy val recordKeyField: String =
-    if (metaClient.getTableConfig.populateMetaFields()) HoodieRecord.RECORD_KEY_METADATA_FIELD
+    if (tableConfig.populateMetaFields()) HoodieRecord.RECORD_KEY_METADATA_FIELD
-    else metaClient.getTableConfig.getRecordKeyFieldProp
+    else tableConfig.getRecordKeyFieldProp
  protected lazy val preCombineFieldOpt: Option[String] = getPrecombineFieldProperty
-  /**
+  protected lazy val specifiedQueryTimestamp: Option[String] =
   * @VisibleInTests
   */
  lazy val mandatoryColumns: Seq[String] = {
    if (isMetadataTable(metaClient)) {
      Seq(HoodieMetadataPayload.KEY_FIELD_NAME, HoodieMetadataPayload.SCHEMA_FIELD_NAME_TYPE)
    } else {
      Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
    }
  }
  protected lazy val specifiedQueryInstant: Option[String] =
    optParams.get(DataSourceReadOptions.TIME_TRAVEL_AS_OF_INSTANT.key)
      .map(HoodieSqlCommonUtils.formatQueryInstant)
@@ -100,16 +105,40 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
  protected val tableStructSchema: StructType = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
-  protected val partitionColumns: Array[String] = metaClient.getTableConfig.getPartitionFields.orElse(Array.empty)
+  protected val partitionColumns: Array[String] = tableConfig.getPartitionFields.orElse(Array.empty)
-  protected def getPrecombineFieldProperty: Option[String] =
+  /**
-    Option(metaClient.getTableConfig.getPreCombineField)
+   * NOTE: PLEASE READ THIS CAREFULLY
-      .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match {
+   *
-      // NOTE: This is required to compensate for cases when empty string is used to stub
+   * Even though [[HoodieFileIndex]] initializes eagerly listing all of the files w/in the given Hudi table,
-      //       property value to avoid it being set with the default value
+   * this variable itself is _lazy_ (and have to stay that way) which guarantees that it's not initialized, until
-      // TODO(HUDI-3456) cleanup
+   * it's actually accessed
-      case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f)
+   */
-      case _ => None
+  protected lazy val fileIndex: HoodieFileIndex =
    HoodieFileIndex(sparkSession, metaClient, Some(tableStructSchema), optParams,
      FileStatusCache.getOrCreate(sparkSession))
  /**
   * @VisibleInTests
   */
  lazy val mandatoryColumns: Seq[String] = {
    if (isMetadataTable(metaClient)) {
      Seq(HoodieMetadataPayload.KEY_FIELD_NAME, HoodieMetadataPayload.SCHEMA_FIELD_NAME_TYPE)
    } else {
      // TODO this is MOR table requirement, not necessary for COW
      Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
    }
  }
  protected def timeline: HoodieTimeline =
  // NOTE: We're including compaction here since it's not considering a "commit" operation
    metaClient.getCommitsAndCompactionTimeline.filterCompletedInstants
  protected def latestInstant: Option[HoodieInstant] =
    toScalaOption(timeline.lastInstant())
  protected def queryTimestamp: Option[String] = {
    specifiedQueryTimestamp.orElse(toScalaOption(timeline.lastInstant()).map(i => i.getTimestamp))
  }
  override def schema: StructType = tableStructSchema
@@ -130,22 +159,129 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
   * NOTE: DO NOT OVERRIDE THIS METHOD
   */
  override final def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
    // NOTE: In case list of requested columns doesn't contain the Primary Key one, we
    //       have to add it explicitly so that
    //          - Merging could be performed correctly
    //          - In case 0 columns are to be fetched (for ex, when doing {@code count()} on Spark's [[Dataset]],
    //          Spark still fetches all the rows to execute the query correctly
    //
    //       It's okay to return columns that have not been requested by the caller, as those nevertheless will be
    //       filtered out upstream
    val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
    val (requiredAvroSchema, requiredStructSchema) =
      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
    val filterExpressions = convertToExpressions(filters)
    val (partitionFilters, dataFilters) = filterExpressions.partition(isPartitionPredicate)
    val fileSplits = collectFileSplits(partitionFilters, dataFilters)
    val partitionSchema = StructType(Nil)
    val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
    val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
    // Here we rely on a type erasure, to workaround inherited API restriction and pass [[RDD[InternalRow]]] back as [[RDD[Row]]]
    // Please check [[needConversion]] scala-doc for more details
-    doBuildScan(requiredColumns, filters).asInstanceOf[RDD[Row]]
+    if (fileSplits.nonEmpty)
      composeRDD(fileSplits, partitionSchema, tableSchema, requiredSchema, filters).asInstanceOf[RDD[Row]]
    else
      sparkSession.sparkContext.emptyRDD
  }
-  protected def doBuildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[InternalRow]
+  /**
   * Composes RDD provided file splits to read from, table and partition schemas, data filters to be applied
   *
   * @param fileSplits      file splits to be handled by the RDD
   * @param partitionSchema target table's partition schema
   * @param tableSchema     target table's schema
   * @param requiredSchema  projected schema required by the reader
   * @param filters         data filters to be applied
   * @return instance of RDD (implementing [[HoodieUnsafeRDD]])
   */
  protected def composeRDD(fileSplits: Seq[FileSplit],
                           partitionSchema: StructType,
                           tableSchema: HoodieTableSchema,
                           requiredSchema: HoodieTableSchema,
                           filters: Array[Filter]): HoodieUnsafeRDD
  /**
   * Provided with partition and date filters collects target file splits to read records from, while
   * performing pruning if necessary
   *
   * @param partitionFilters partition filters to be applied
   * @param dataFilters data filters to be applied
   * @return list of [[FileSplit]] to fetch records from
   */
  protected def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[FileSplit]
  protected def listLatestBaseFiles(globbedPaths: Seq[Path], partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Map[Path, Seq[FileStatus]] = {
    val partitionDirs = if (globbedPaths.isEmpty) {
      fileIndex.listFiles(partitionFilters, dataFilters)
    } else {
      val inMemoryFileIndex = HoodieInMemoryFileIndex.create(sparkSession, globbedPaths)
      inMemoryFileIndex.listFiles(partitionFilters, dataFilters)
    }
    val fsView = new HoodieTableFileSystemView(metaClient, timeline, partitionDirs.flatMap(_.files).toArray)
    val latestBaseFiles = fsView.getLatestBaseFiles.iterator().asScala.toList.map(_.getFileStatus)
    latestBaseFiles.groupBy(getPartitionPath)
  }
  protected def convertToExpressions(filters: Array[Filter]): Array[Expression] = {
    val catalystExpressions = HoodieSparkUtils.convertToCatalystExpressions(filters, tableStructSchema)
    val failedExprs = catalystExpressions.zipWithIndex.filter { case (opt, _) => opt.isEmpty }
    if (failedExprs.nonEmpty) {
      val failedFilters = failedExprs.map(p => filters(p._2))
      logWarning(s"Failed to convert Filters into Catalyst expressions (${failedFilters.map(_.toString)})")
    }
    catalystExpressions.filter(_.isDefined).map(_.get).toArray
  }
  /**
   * Checks whether given expression only references partition columns
   * (and involves no sub-query)
   */
  protected def isPartitionPredicate(condition: Expression): Boolean = {
    // Validates that the provided names both resolve to the same entity
    val resolvedNameEquals = sparkSession.sessionState.analyzer.resolver
    condition.references.forall { r => partitionColumns.exists(resolvedNameEquals(r.name, _)) } &&
      !SubqueryExpression.hasSubquery(condition)
  }
  protected final def appendMandatoryColumns(requestedColumns: Array[String]): Array[String] = {
    val missing = mandatoryColumns.filter(col => !requestedColumns.contains(col))
    requestedColumns ++ missing
  }
  private def getPrecombineFieldProperty: Option[String] =
    Option(tableConfig.getPreCombineField)
      .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match {
      // NOTE: This is required to compensate for cases when empty string is used to stub
      //       property value to avoid it being set with the default value
      // TODO(HUDI-3456) cleanup
      case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f)
      case _ => None
    }
  private def imbueConfigs(sqlContext: SQLContext): Unit = {
    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true")
    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true")
    // TODO(HUDI-3639) vectorized reader has to be disabled to make sure MORIncrementalRelation is working properly
    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
  }
 }
 object HoodieBaseRelation {
-  def isMetadataTable(metaClient: HoodieTableMetaClient) =
+  def getPartitionPath(fileStatus: FileStatus): Path =
    fileStatus.getPath.getParent
  def isMetadataTable(metaClient: HoodieTableMetaClient): Boolean =
    HoodieTableMetadata.isMetadataTable(metaClient.getBasePath)
  /**
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala
@@ -23,6 +23,7 @@ import org.apache.hudi.common.model.HoodieBaseFile
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.exception.HoodieException
 import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -157,7 +158,7 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
    logInfo("Building file index..")
    val fileStatuses  = if (globPaths.nonEmpty) {
      // Load files from the global paths if it has defined to be compatible with the original mode
-      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(_sqlContext.sparkSession, globPaths)
+      val inMemoryFileIndex = HoodieInMemoryFileIndex.create(_sqlContext.sparkSession, globPaths)
      inMemoryFileIndex.allFiles()
    } else { // Load files by the HoodieFileIndex.
        HoodieFileIndex(sqlContext.sparkSession, metaClient, Some(schema), optParams,
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
@@ -65,20 +65,6 @@ object HoodieDataSourceHelper extends PredicateHelper {
    }
  }
  /**
   * Extract the required schema from [[InternalRow]]
   */
  def extractRequiredSchema(
      iter: Iterator[InternalRow],
      requiredSchema: StructType,
      requiredFieldPos: Seq[Int]): Iterator[InternalRow] = {
    val unsafeProjection = UnsafeProjection.create(requiredSchema)
    val rows = iter.map { row =>
      unsafeProjection(createInternalRowWithSchema(row, requiredSchema, requiredFieldPos))
    }
    rows
  }
  /**
   * Convert [[InternalRow]] to [[SpecificInternalRow]].
   */
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -348,7 +348,7 @@ object HoodieFileIndex extends Logging {
          }
        } catch {
          case NonFatal(e) =>
-            logWarning("Fail to convert filters for TimestampBaseAvroKeyGenerator.")
+            logWarning("Fail to convert filters for TimestampBaseAvroKeyGenerator", e)
            partitionFilters
        }
      }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
@@ -24,12 +24,14 @@ import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, SchemaColumnConvertNotSupportedException}
 import org.apache.spark.{Partition, TaskContext}
 case class HoodieBaseFileSplit(filePartition: FilePartition) extends HoodieFileSplit
 /**
 * TODO eval if we actually need it
 */
 class HoodieFileScanRDD(@transient private val sparkSession: SparkSession,
                        readFunction: PartitionedFile => Iterator[InternalRow],
-                        @transient fileSplits: Seq[FilePartition])
+                        @transient fileSplits: Seq[HoodieBaseFileSplit])
  extends HoodieUnsafeRDD(sparkSession.sparkContext) {
  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
@@ -77,5 +79,5 @@ class HoodieFileScanRDD(@transient private val sparkSession: SparkSession,
    iterator.asInstanceOf[Iterator[InternalRow]]
  }
-  override protected def getPartitions: Array[Partition] = fileSplits.toArray
+  override protected def getPartitions: Array[Partition] = fileSplits.map(_.filePartition).toArray
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -56,7 +56,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
                           tableState: HoodieTableState,
                           tableSchema: HoodieTableSchema,
                           requiredSchema: HoodieTableSchema,
-                           @transient fileSplits: List[HoodieMergeOnReadFileSplit])
+                           @transient fileSplits: Seq[HoodieMergeOnReadFileSplit])
  extends HoodieUnsafeRDD(sc) {
  private val confBroadcast = sc.broadcast(new SerializableWritable(config))
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
@@ -20,96 +20,45 @@ package org.apache.hudi
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{GlobPattern, Path}
 import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
-import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.HoodieConversionUtils.toScalaOption
 import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath
 import org.apache.hudi.common.model.{FileSlice, HoodieRecord}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.common.util.StringUtils
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.{getCommitMetadata, getWritePartitionPaths, listAffectedFilesForCommits}
-import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
+import org.apache.spark.sql.SQLContext
-import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 import scala.collection.immutable
 /**
- * Experimental.
+ * @Experimental
 * Relation, that implements the Hoodie incremental view for Merge On Read table.
 *
 */
 class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
-                                     val optParams: Map[String, String],
+                                     optParams: Map[String, String],
-                                     val userSchema: Option[StructType],
+                                     userSchema: Option[StructType],
-                                     val metaClient: HoodieTableMetaClient)
+                                     metaClient: HoodieTableMetaClient)
-  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) {
+  extends MergeOnReadSnapshotRelation(sqlContext, optParams, userSchema, Seq(), metaClient) with HoodieIncrementalRelationTrait {
-  private val commitTimeline = metaClient.getCommitsAndCompactionTimeline.filterCompletedInstants()
+  override type FileSplit = HoodieMergeOnReadFileSplit
-  if (commitTimeline.empty()) {
+
-    throw new HoodieException("No instants to incrementally pull")
+  override protected def timeline: HoodieTimeline = {
-  }
+    val startTimestamp = optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key)
-  if (!optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME.key)) {
+    val endTimestamp = optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key, super.timeline.lastInstant().get.getTimestamp)
-    throw new HoodieException(s"Specify the begin instant time to pull from using " +
+    super.timeline.findInstantsInRange(startTimestamp, endTimestamp)
      s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME.key}")
  }
  if (!metaClient.getTableConfig.populateMetaFields()) {
    throw new HoodieException("Incremental queries are not supported when meta fields are disabled")
  }
-  private val lastInstant = commitTimeline.lastInstant().get()
+  protected override def composeRDD(fileSplits: Seq[HoodieMergeOnReadFileSplit],
-  private val mergeType = optParams.getOrElse(
+                                    partitionSchema: StructType,
-    DataSourceReadOptions.REALTIME_MERGE.key,
+                                    tableSchema: HoodieTableSchema,
-    DataSourceReadOptions.REALTIME_MERGE.defaultValue)
+                                    requiredSchema: HoodieTableSchema,
-
+                                    filters: Array[Filter]): HoodieMergeOnReadRDD = {
  private val commitsTimelineToReturn = commitTimeline.findInstantsInRange(
    optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key),
    optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key, lastInstant.getTimestamp))
  logDebug(s"${commitsTimelineToReturn.getInstants.iterator().toList.map(f => f.toString).mkString(",")}")
  private val commitsToReturn = commitsTimelineToReturn.getInstants.iterator().toList
  private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
  private val fileIndex = if (commitsToReturn.isEmpty) List() else buildFileIndex()
  // Record filters making sure that only records w/in the requested bounds are being fetched as part of the
  // scan collected by this relation
  private lazy val incrementalSpanRecordsFilters: Seq[Filter] = {
    val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD)
    val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp)
    val lessThanFilter = LessThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp)
    Seq(isNotNullFilter, largerThanFilter, lessThanFilter)
  }
  override lazy val mandatoryColumns: Seq[String] = {
    // NOTE: This columns are required for Incremental flow to be able to handle the rows properly, even in
    //       cases when no columns are requested to be fetched (for ex, when using {@code count()} API)
    Seq(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD) ++
      preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
  }
  override def doBuildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[InternalRow] = {
    if (fileIndex.isEmpty) {
      sqlContext.sparkContext.emptyRDD[InternalRow]
    } else {
      logDebug(s"buildScan requiredColumns = ${requiredColumns.mkString(",")}")
      logDebug(s"buildScan filters = ${filters.mkString(",")}")
      // config to ensure the push down filter for parquet will be applied.
      sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true")
      sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true")
      sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
      val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
      val (requiredAvroSchema, requiredStructSchema) =
        HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
      val partitionSchema = StructType(Nil)
      val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
      val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
    val fullSchemaParquetReader = createBaseFileReader(
      spark = sqlContext.sparkSession,
      partitionSchema = partitionSchema,
@@ -122,18 +71,19 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
      //
      // The only filtering applicable here is the filtering to make sure we're only fetching records that
      // fall into incremental span of the timeline being queried
-        filters = incrementalSpanRecordsFilters,
+      filters = incrementalSpanRecordFilters,
      options = optParams,
      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
      //       to configure Parquet reader appropriately
      hadoopConf = new Configuration(conf)
    )
    val requiredSchemaParquetReader = createBaseFileReader(
      spark = sqlContext.sparkSession,
      partitionSchema = partitionSchema,
      tableSchema = tableSchema,
      requiredSchema = requiredSchema,
-        filters = filters ++ incrementalSpanRecordsFilters,
+      filters = filters ++ incrementalSpanRecordFilters,
      options = optParams,
      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
      //       to configure Parquet reader appropriately
@@ -142,79 +92,89 @@ class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
    val hoodieTableState = HoodieTableState(HoodieRecord.RECORD_KEY_METADATA_FIELD, preCombineFieldOpt)
-      // TODO implement incremental span record filtering w/in RDD to make sure returned iterator is appropriately
+    // TODO(HUDI-3639) implement incremental span record filtering w/in RDD to make sure returned iterator is appropriately
    //                 filtered, since file-reader might not be capable to perform filtering
-      new HoodieMergeOnReadRDD(
+    new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader,
-        sqlContext.sparkContext,
+      requiredSchemaParquetReader, hoodieTableState, tableSchema, requiredSchema, fileSplits)
-        jobConf,
+  }
-        fullSchemaParquetReader,
+
-        requiredSchemaParquetReader,
+  override protected def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): List[HoodieMergeOnReadFileSplit] = {
-        hoodieTableState,
+    if (includedCommits.isEmpty) {
-        tableSchema,
+      List()
-        requiredSchema,
+    } else {
-        fileIndex
+      val latestCommit = includedCommits.last.getTimestamp
-      )
+      val commitsMetadata = includedCommits.map(getCommitMetadata(_, timeline)).asJava
      val modifiedFiles = listAffectedFilesForCommits(conf, new Path(metaClient.getBasePath), commitsMetadata)
      val fsView = new HoodieTableFileSystemView(metaClient, timeline, modifiedFiles)
      val modifiedPartitions = getWritePartitionPaths(commitsMetadata)
      val fileSlices = modifiedPartitions.asScala.flatMap { relativePartitionPath =>
        fsView.getLatestMergedFileSlicesBeforeOrOn(relativePartitionPath, latestCommit).iterator().asScala
      }.toSeq
      buildSplits(filterFileSlices(fileSlices, globPattern))
    }
  }
-  def buildFileIndex(): List[HoodieMergeOnReadFileSplit] = {
+  private def filterFileSlices(fileSlices: Seq[FileSlice], pathGlobPattern: String): Seq[FileSlice] = {
-    val metadataList = commitsToReturn.map(instant => getCommitMetadata(instant, commitsTimelineToReturn))
+    val filteredFileSlices = if (!StringUtils.isNullOrEmpty(pathGlobPattern)) {
    val affectedFileStatus = listAffectedFilesForCommits(conf, new Path(metaClient.getBasePath), metadataList)
    val fsView = new HoodieTableFileSystemView(metaClient, commitsTimelineToReturn, affectedFileStatus)
    // Iterate partitions to create splits
    val fileGroups = getWritePartitionPaths(metadataList).flatMap(partitionPath =>
      fsView.getAllFileGroups(partitionPath).iterator()
    ).toList
    val latestCommit = fsView.getLastInstant.get.getTimestamp
    if (log.isDebugEnabled) {
      fileGroups.foreach(f => logDebug(s"current file group id: " +
        s"${f.getFileGroupId} and file slices ${f.getLatestFileSlice.get.toString}"))
    }
    // Filter files based on user defined glob pattern
    val pathGlobPattern = optParams.getOrElse(
      DataSourceReadOptions.INCR_PATH_GLOB.key,
      DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)
    val filteredFileGroup = if (!pathGlobPattern.equals(DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)) {
      val globMatcher = new GlobPattern("*" + pathGlobPattern)
-      fileGroups.filter(fg => {
+      fileSlices.filter(fileSlice => {
-        val latestFileSlice = fg.getLatestFileSlice.get
+        val path = toScalaOption(fileSlice.getBaseFile).map(_.getPath)
-        if (latestFileSlice.getBaseFile.isPresent) {
+          .orElse(toScalaOption(fileSlice.getLatestLogFile).map(_.getPath.toString))
-          globMatcher.matches(latestFileSlice.getBaseFile.get.getPath)
+          .get
-        } else {
+        globMatcher.matches(path)
          globMatcher.matches(latestFileSlice.getLatestLogFile.get.getPath.toString)
        }
      })
    } else {
-      fileGroups
+      fileSlices
    }
-
+    filteredFileSlices
    // Build HoodieMergeOnReadFileSplit.
    filteredFileGroup.map(f => {
      // Ensure get the base file when there is a pending compaction, which means the base file
      // won't be in the latest file slice.
      val baseFiles = f.getAllFileSlices.iterator().filter(slice => slice.getBaseFile.isPresent).toList
      val partitionedFile = if (baseFiles.nonEmpty) {
        val baseFile = baseFiles.head.getBaseFile
        val filePath = MergeOnReadSnapshotRelation.getFilePath(baseFile.get.getFileStatus.getPath)
        Option(PartitionedFile(InternalRow.empty, filePath, 0, baseFile.get.getFileLen))
      }
      else {
        Option.empty
      }
      val logPath = if (f.getLatestFileSlice.isPresent) {
        // If log path doesn't exist, we still include an empty path to avoid using
        // the default parquet reader to ensure the push down filter will be applied.
        Option(f.getLatestFileSlice.get().getLogFiles.iterator().toList)
      }
      else {
        Option.empty
      }
      HoodieMergeOnReadFileSplit(partitionedFile, logPath,
        latestCommit, metaClient.getBasePath, maxCompactionMemoryInBytes, mergeType)
    })
  }
 }
 trait HoodieIncrementalRelationTrait extends HoodieBaseRelation {
  // Validate this Incremental implementation is properly configured
  validate()
  protected lazy val includedCommits: immutable.Seq[HoodieInstant] = timeline.getInstants.iterator().asScala.toList
  // Record filters making sure that only records w/in the requested bounds are being fetched as part of the
  // scan collected by this relation
  protected lazy val incrementalSpanRecordFilters: Seq[Filter] = {
    val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD)
    val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, includedCommits.head.getTimestamp)
    val lessThanFilter = LessThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, includedCommits.last.getTimestamp)
    Seq(isNotNullFilter, largerThanFilter, lessThanFilter)
  }
  override lazy val mandatoryColumns: Seq[String] = {
    // NOTE: This columns are required for Incremental flow to be able to handle the rows properly, even in
    //       cases when no columns are requested to be fetched (for ex, when using {@code count()} API)
    Seq(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD) ++
      preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
  }
  protected def validate(): Unit = {
    if (super.timeline.empty()) {
      throw new HoodieException("No instants to incrementally pull")
    }
    if (!this.optParams.contains(DataSourceReadOptions.BEGIN_INSTANTTIME.key)) {
      throw new HoodieException(s"Specify the begin instant time to pull from using " +
        s"option ${DataSourceReadOptions.BEGIN_INSTANTTIME.key}")
    }
    if (!this.tableConfig.populateMetaFields()) {
      throw new HoodieException("Incremental queries are not supported when meta fields are disabled")
    }
  }
  protected def globPattern: String =
    optParams.getOrElse(DataSourceReadOptions.INCR_PATH_GLOB.key, DataSourceReadOptions.INCR_PATH_GLOB.defaultValue)
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
@@ -21,16 +21,18 @@ package org.apache.hudi
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
-import org.apache.hudi.common.model.{HoodieLogFile, HoodieRecord}
+import org.apache.hudi.HoodieConversionUtils.toScalaOption
 import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
 import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath
 import org.apache.hudi.common.model.{FileSlice, HoodieLogFile}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
-import org.apache.spark.rdd.RDD
+import org.apache.spark.execution.datasources.HoodieInMemoryFileIndex
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
@@ -41,43 +43,28 @@ case class HoodieMergeOnReadFileSplit(dataFile: Option[PartitionedFile],
                                      latestCommit: String,
                                      tablePath: String,
                                      maxCompactionMemoryInBytes: Long,
-                                      mergeType: String)
+                                      mergeType: String) extends HoodieFileSplit
 class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
                                  optParams: Map[String, String],
-                                  val userSchema: Option[StructType],
+                                  userSchema: Option[StructType],
-                                  val globPaths: Seq[Path],
+                                  globPaths: Seq[Path],
-                                  val metaClient: HoodieTableMetaClient)
+                                  metaClient: HoodieTableMetaClient)
  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) {
  override type FileSplit = HoodieMergeOnReadFileSplit
  private val mergeType = optParams.getOrElse(
    DataSourceReadOptions.REALTIME_MERGE.key,
    DataSourceReadOptions.REALTIME_MERGE.defaultValue)
  private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
-  override def doBuildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[InternalRow] = {
+  protected override def composeRDD(fileIndex: Seq[HoodieMergeOnReadFileSplit],
-    log.debug(s" buildScan requiredColumns = ${requiredColumns.mkString(",")}")
+                                    partitionSchema: StructType,
-    log.debug(s" buildScan filters = ${filters.mkString(",")}")
+                                    tableSchema: HoodieTableSchema,
-
+                                    requiredSchema: HoodieTableSchema,
-    // NOTE: In case list of requested columns doesn't contain the Primary Key one, we
+                                    filters: Array[Filter]): HoodieMergeOnReadRDD = {
    //       have to add it explicitly so that
    //          - Merging could be performed correctly
    //          - In case 0 columns are to be fetched (for ex, when doing {@code count()} on Spark's [[Dataset]],
    //          Spark still fetches all the rows to execute the query correctly
    //
    //       It's okay to return columns that have not been requested by the caller, as those nevertheless will be
    //       filtered out upstream
    val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
    val (requiredAvroSchema, requiredStructSchema) =
      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
    val fileIndex = buildFileIndex(filters)
    val partitionSchema = StructType(Nil)
    val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
    val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
    val fullSchemaParquetReader = createBaseFileReader(
      spark = sqlContext.sparkSession,
      partitionSchema = partitionSchema,
@@ -93,6 +80,7 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
      //       to configure Parquet reader appropriately
      hadoopConf = new Configuration(conf)
    )
    val requiredSchemaParquetReader = createBaseFileReader(
      spark = sqlContext.sparkSession,
      partitionSchema = partitionSchema,
@@ -111,90 +99,53 @@ class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
      requiredSchemaParquetReader, tableState, tableSchema, requiredSchema, fileIndex)
  }
-  def buildFileIndex(filters: Array[Filter]): List[HoodieMergeOnReadFileSplit] = {
+  protected override def collectFileSplits(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): List[HoodieMergeOnReadFileSplit] = {
-    if (globPaths.nonEmpty) {
+    val convertedPartitionFilters =
-      // Load files from the global paths if it has defined to be compatible with the original mode
+      HoodieFileIndex.convertFilterForTimestampKeyGenerator(metaClient, partitionFilters)
      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths)
      val fsView = new HoodieTableFileSystemView(metaClient,
        // file-slice after pending compaction-requested instant-time is also considered valid
        metaClient.getCommitsAndCompactionTimeline.filterCompletedAndCompactionInstants,
        inMemoryFileIndex.allFiles().toArray)
      val partitionPaths = fsView.getLatestBaseFiles.iterator().asScala.toList.map(_.getFileStatus.getPath.getParent)
-
+    if (globPaths.isEmpty) {
-      if (partitionPaths.isEmpty) { // If this an empty table, return an empty split list.
+      val fileSlices = fileIndex.listFileSlices(convertedPartitionFilters)
      buildSplits(fileSlices.values.flatten.toSeq)
    } else {
      // TODO refactor to avoid iterating over listed files multiple times
      val partitions = listLatestBaseFiles(globPaths, convertedPartitionFilters, dataFilters)
      val partitionPaths = partitions.keys.toSeq
      if (partitionPaths.isEmpty || latestInstant.isEmpty) {
        // If this an empty table OR it has no completed commits yet, return
        List.empty[HoodieMergeOnReadFileSplit]
      } else {
-        val lastInstant = metaClient.getActiveTimeline.getCommitsTimeline.filterCompletedInstants.lastInstant()
+        val fileSlices = listFileSlices(partitionPaths)
-        if (!lastInstant.isPresent) { // Return empty list if the table has no commit
+        buildSplits(fileSlices)
          List.empty
        } else {
          val queryInstant = specifiedQueryInstant.getOrElse(lastInstant.get().getTimestamp)
          val baseAndLogsList = HoodieRealtimeInputFormatUtils.groupLogsByBaseFile(conf, partitionPaths.asJava).asScala
          val fileSplits = baseAndLogsList.map(kv => {
            val baseFile = kv.getLeft
            val logPaths = if (kv.getRight.isEmpty) Option.empty else Option(kv.getRight.asScala.toList)
            val baseDataPath = if (baseFile.isPresent) {
              Some(PartitionedFile(
                InternalRow.empty,
                MergeOnReadSnapshotRelation.getFilePath(baseFile.get.getFileStatus.getPath),
                0, baseFile.get.getFileLen)
              )
            } else {
              None
      }
-            HoodieMergeOnReadFileSplit(baseDataPath, logPaths, queryInstant,
+    }
  }
  protected def buildSplits(fileSlices: Seq[FileSlice]): List[HoodieMergeOnReadFileSplit] = {
    fileSlices.map { fileSlice =>
      val baseFile = toScalaOption(fileSlice.getBaseFile)
      val logFiles = Option(fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala.toList)
      val partitionedBaseFile = baseFile.map { file =>
        val filePath = getFilePath(file.getFileStatus.getPath)
        PartitionedFile(InternalRow.empty, filePath, 0, file.getFileLen)
      }
      HoodieMergeOnReadFileSplit(partitionedBaseFile, logFiles, queryTimestamp.get,
        metaClient.getBasePath, maxCompactionMemoryInBytes, mergeType)
-          }).toList
+    }.toList
          fileSplits
        }
      }
    } else {
      // Load files by the HoodieFileIndex.
      val hoodieFileIndex = HoodieFileIndex(sqlContext.sparkSession, metaClient,
        Some(tableStructSchema), optParams, FileStatusCache.getOrCreate(sqlContext.sparkSession))
      // Get partition filter and convert to catalyst expression
      val partitionColumns = hoodieFileIndex.partitionSchema.fieldNames.toSet
      val partitionFilters = filters.filter(f => f.references.forall(p => partitionColumns.contains(p)))
      val partitionFilterExpression =
        HoodieSparkUtils.convertToCatalystExpression(partitionFilters, tableStructSchema)
      val convertedPartitionFilterExpression =
        HoodieFileIndex.convertFilterForTimestampKeyGenerator(metaClient, partitionFilterExpression.toSeq)
      // If convert success to catalyst expression, use the partition prune
      val fileSlices = if (convertedPartitionFilterExpression.nonEmpty) {
        hoodieFileIndex.listFileSlices(convertedPartitionFilterExpression)
      } else {
        hoodieFileIndex.listFileSlices(Seq.empty[Expression])
  }
-      if (fileSlices.isEmpty) {
+  private def listFileSlices(partitionPaths: Seq[Path]): Seq[FileSlice] = {
-        // If this an empty table, return an empty split list.
+    // NOTE: It's critical for us to re-use [[InMemoryFileIndex]] to make sure we're leveraging
-        List.empty[HoodieMergeOnReadFileSplit]
+    //       [[FileStatusCache]] and avoid listing the whole table again
-      } else {
+    val inMemoryFileIndex = HoodieInMemoryFileIndex.create(sparkSession, partitionPaths)
-        val fileSplits = fileSlices.values.flatten.map(fileSlice => {
+    val fsView = new HoodieTableFileSystemView(metaClient, timeline, inMemoryFileIndex.allFiles.toArray)
          val latestInstant = metaClient.getActiveTimeline.getCommitsTimeline
            .filterCompletedInstants.lastInstant().get().getTimestamp
          val queryInstant = specifiedQueryInstant.getOrElse(latestInstant)
-          val partitionedFile = if (fileSlice.getBaseFile.isPresent) {
+    val queryTimestamp = this.queryTimestamp.get
            val baseFile = fileSlice.getBaseFile.get()
            val baseFilePath = MergeOnReadSnapshotRelation.getFilePath(baseFile.getFileStatus.getPath)
            Option(PartitionedFile(InternalRow.empty, baseFilePath, 0, baseFile.getFileLen))
          } else {
            Option.empty
          }
-          val logPaths = fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala.toList
+    partitionPaths.flatMap { partitionPath =>
-          val logPathsOptional = if (logPaths.isEmpty) Option.empty else Option(logPaths)
+      val relativePath = getRelativePartitionPath(new Path(basePath), partitionPath)
-
+      fsView.getLatestMergedFileSlicesBeforeOrOn(relativePath, queryTimestamp).iterator().asScala.toSeq
          HoodieMergeOnReadFileSplit(partitionedFile, logPathsOptional, queryInstant, metaClient.getBasePath,
            maxCompactionMemoryInBytes, mergeType)
        }).toList
        fileSplits
      }
    }
  }
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 /**
 * Implementation of the [[BaseHoodieTableFileIndex]] for Spark
@@ -135,12 +134,14 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
   * Fetch list of latest base files w/ corresponding log files, after performing
   * partition pruning
   *
   * TODO unify w/ HoodieFileIndex#listFiles
   *
   * @param partitionFilters partition column filters
   * @return mapping from string partition paths to its base/log files
   */
  def listFileSlices(partitionFilters: Seq[Expression]): Map[String, Seq[FileSlice]] = {
    // Prune the partition path by the partition filters
-    val prunedPartitions = prunePartition(cachedAllInputFileSlices.asScala.keys.toSeq, partitionFilters)
+    val prunedPartitions = prunePartition(cachedAllInputFileSlices.keySet().asScala.toSeq, partitionFilters)
    prunedPartitions.map(partition => {
      (partition.path, cachedAllInputFileSlices.get(partition).asScala)
    }).toMap
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/HoodieHadoopFSUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/HoodieHadoopFSUtils.scala
@@ -0,0 +1,370 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.spark
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.apache.hadoop.fs._
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.util.SerializableConfiguration
 import java.io.FileNotFoundException
 import scala.collection.mutable
 /**
 * NOTE: This method class is replica of HadoopFSUtils from Spark 3.2.1, with the following adjustments
 *
 *    - Filtering out of the listed files is adjusted to include files starting w/ "." (to include Hoodie Delta Log
 *    files)
 */
 object HoodieHadoopFSUtils extends Logging {
  /**
   * Lists a collection of paths recursively. Picks the listing strategy adaptively depending
   * on the number of paths to list.
   *
   * This may only be called on the driver.
   *
   * @param sc                   Spark context used to run parallel listing.
   * @param paths                Input paths to list
   * @param hadoopConf           Hadoop configuration
   * @param filter               Path filter used to exclude leaf files from result
   * @param ignoreMissingFiles   Ignore missing files that happen during recursive listing
   *                             (e.g., due to race conditions)
   * @param ignoreLocality       Whether to fetch data locality info when listing leaf files. If false,
   *                             this will return `FileStatus` without `BlockLocation` info.
   * @param parallelismThreshold The threshold to enable parallelism. If the number of input paths
   *                             is smaller than this value, this will fallback to use
   *                             sequential listing.
   * @param parallelismMax       The maximum parallelism for listing. If the number of input paths is
   *                             larger than this value, parallelism will be throttled to this value
   *                             to avoid generating too many tasks.
   * @return for each input path, the set of discovered files for the path
   */
  def parallelListLeafFiles(sc: SparkContext,
                            paths: Seq[Path],
                            hadoopConf: Configuration,
                            filter: PathFilter,
                            ignoreMissingFiles: Boolean,
                            ignoreLocality: Boolean,
                            parallelismThreshold: Int,
                            parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
    parallelListLeafFilesInternal(sc, paths, hadoopConf, filter, isRootLevel = true,
      ignoreMissingFiles, ignoreLocality, parallelismThreshold, parallelismMax)
  }
  // scalastyle:off parameter.number
  private def parallelListLeafFilesInternal(sc: SparkContext,
                                            paths: Seq[Path],
                                            hadoopConf: Configuration,
                                            filter: PathFilter,
                                            isRootLevel: Boolean,
                                            ignoreMissingFiles: Boolean,
                                            ignoreLocality: Boolean,
                                            parallelismThreshold: Int,
                                            parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
    // Short-circuits parallel listing when serial listing is likely to be faster.
    if (paths.size <= parallelismThreshold) {
      // scalastyle:off return
      return paths.map { path =>
        val leafFiles = listLeafFiles(
          path,
          hadoopConf,
          filter,
          Some(sc),
          ignoreMissingFiles = ignoreMissingFiles,
          ignoreLocality = ignoreLocality,
          isRootPath = isRootLevel,
          parallelismThreshold = parallelismThreshold,
          parallelismMax = parallelismMax)
        (path, leafFiles)
      }
      // scalastyle:on return
    }
    logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." +
      s" The first several paths are: ${paths.take(10).mkString(", ")}.")
    HiveCatalogMetrics.incrementParallelListingJobCount(1)
    val serializableConfiguration = new SerializableConfiguration(hadoopConf)
    val serializedPaths = paths.map(_.toString)
    // Set the number of parallelism to prevent following file listing from generating many tasks
    // in case of large #defaultParallelism.
    val numParallelism = Math.min(paths.size, parallelismMax)
    val previousJobDescription = sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION)
    val statusMap = try {
      val description = paths.size match {
        case 0 =>
          "Listing leaf files and directories 0 paths"
        case 1 =>
          s"Listing leaf files and directories for 1 path:<br/>${paths(0)}"
        case s =>
          s"Listing leaf files and directories for $s paths:<br/>${paths(0)}, ..."
      }
      sc.setJobDescription(description)
      sc
        .parallelize(serializedPaths, numParallelism)
        .mapPartitions { pathStrings =>
          val hadoopConf = serializableConfiguration.value
          pathStrings.map(new Path(_)).toSeq.map { path =>
            val leafFiles = listLeafFiles(
              path = path,
              hadoopConf = hadoopConf,
              filter = filter,
              contextOpt = None, // Can't execute parallel scans on workers
              ignoreMissingFiles = ignoreMissingFiles,
              ignoreLocality = ignoreLocality,
              isRootPath = isRootLevel,
              parallelismThreshold = Int.MaxValue,
              parallelismMax = 0)
            (path, leafFiles)
          }.iterator
        }.map { case (path, statuses) =>
        val serializableStatuses = statuses.map { status =>
          // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
          val blockLocations = status match {
            case f: LocatedFileStatus =>
              f.getBlockLocations.map { loc =>
                SerializableBlockLocation(
                  loc.getNames,
                  loc.getHosts,
                  loc.getOffset,
                  loc.getLength)
              }
            case _ =>
              Array.empty[SerializableBlockLocation]
          }
          SerializableFileStatus(
            status.getPath.toString,
            status.getLen,
            status.isDirectory,
            status.getReplication,
            status.getBlockSize,
            status.getModificationTime,
            status.getAccessTime,
            blockLocations)
        }
        (path.toString, serializableStatuses)
      }.collect()
    } finally {
      sc.setJobDescription(previousJobDescription)
    }
    // turn SerializableFileStatus back to Status
    statusMap.map { case (path, serializableStatuses) =>
      val statuses = serializableStatuses.map { f =>
        val blockLocations = f.blockLocations.map { loc =>
          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
        }
        new LocatedFileStatus(
          new FileStatus(
            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
            new Path(f.path)),
          blockLocations)
      }
      (new Path(path), statuses)
    }
  }
  // scalastyle:on parameter.number
  // scalastyle:off parameter.number
  /**
   * Lists a single filesystem path recursively. If a `SparkContext` object is specified, this
   * function may launch Spark jobs to parallelize listing based on `parallelismThreshold`.
   *
   * If sessionOpt is None, this may be called on executors.
   *
   * @return all children of path that match the specified filter.
   */
  private def listLeafFiles(path: Path,
                            hadoopConf: Configuration,
                            filter: PathFilter,
                            contextOpt: Option[SparkContext],
                            ignoreMissingFiles: Boolean,
                            ignoreLocality: Boolean,
                            isRootPath: Boolean,
                            parallelismThreshold: Int,
                            parallelismMax: Int): Seq[FileStatus] = {
    logTrace(s"Listing $path")
    val fs = path.getFileSystem(hadoopConf)
    // Note that statuses only include FileStatus for the files and dirs directly under path,
    // and does not include anything else recursively.
    val statuses: Array[FileStatus] = try {
      fs match {
        // DistributedFileSystem overrides listLocatedStatus to make 1 single call to namenode
        // to retrieve the file status with the file block location. The reason to still fallback
        // to listStatus is because the default implementation would potentially throw a
        // FileNotFoundException which is better handled by doing the lookups manually below.
        case (_: DistributedFileSystem | _: ViewFileSystem) if !ignoreLocality =>
          val remoteIter = fs.listLocatedStatus(path)
          new Iterator[LocatedFileStatus]() {
            def next(): LocatedFileStatus = remoteIter.next
            def hasNext(): Boolean = remoteIter.hasNext
          }.toArray
        case _ => fs.listStatus(path)
      }
    } catch {
      // If we are listing a root path for SQL (e.g. a top level directory of a table), we need to
      // ignore FileNotFoundExceptions during this root level of the listing because
      //
      //  (a) certain code paths might construct an InMemoryFileIndex with root paths that
      //      might not exist (i.e. not all callers are guaranteed to have checked
      //      path existence prior to constructing InMemoryFileIndex) and,
      //  (b) we need to ignore deleted root paths during REFRESH TABLE, otherwise we break
      //      existing behavior and break the ability drop SessionCatalog tables when tables'
      //      root directories have been deleted (which breaks a number of Spark's own tests).
      //
      // If we are NOT listing a root path then a FileNotFoundException here means that the
      // directory was present in a previous level of file listing but is absent in this
      // listing, likely indicating a race condition (e.g. concurrent table overwrite or S3
      // list inconsistency).
      //
      // The trade-off in supporting existing behaviors / use-cases is that we won't be
      // able to detect race conditions involving root paths being deleted during
      // InMemoryFileIndex construction. However, it's still a net improvement to detect and
      // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion.
      case _: FileNotFoundException if isRootPath || ignoreMissingFiles =>
        logWarning(s"The directory $path was not found. Was it deleted very recently?")
        Array.empty[FileStatus]
    }
    val filteredStatuses =
      statuses.filterNot(status => shouldFilterOutPathName(status.getPath.getName))
    val allLeafStatuses = {
      val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
      val nestedFiles: Seq[FileStatus] = contextOpt match {
        case Some(context) if dirs.size > parallelismThreshold =>
          parallelListLeafFilesInternal(
            context,
            dirs.map(_.getPath),
            hadoopConf = hadoopConf,
            filter = filter,
            isRootLevel = false,
            ignoreMissingFiles = ignoreMissingFiles,
            ignoreLocality = ignoreLocality,
            parallelismThreshold = parallelismThreshold,
            parallelismMax = parallelismMax
          ).flatMap(_._2)
        case _ =>
          dirs.flatMap { dir =>
            listLeafFiles(
              path = dir.getPath,
              hadoopConf = hadoopConf,
              filter = filter,
              contextOpt = contextOpt,
              ignoreMissingFiles = ignoreMissingFiles,
              ignoreLocality = ignoreLocality,
              isRootPath = false,
              parallelismThreshold = parallelismThreshold,
              parallelismMax = parallelismMax)
          }
      }
      val allFiles = topLevelFiles ++ nestedFiles
      if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
    }
    val missingFiles = mutable.ArrayBuffer.empty[String]
    val resolvedLeafStatuses = allLeafStatuses.flatMap {
      case f: LocatedFileStatus =>
        Some(f)
      // NOTE:
      //
      // - Although S3/S3A/S3N file system can be quite slow for remote file metadata
      //   operations, calling `getFileBlockLocations` does no harm here since these file system
      //   implementations don't actually issue RPC for this method.
      //
      // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not
      //   be a big deal since we always use to `parallelListLeafFiles` when the number of
      //   paths exceeds threshold.
      case f if !ignoreLocality =>
        // The other constructor of LocatedFileStatus will call FileStatus.getPermission(),
        // which is very slow on some file system (RawLocalFileSystem, which is launch a
        // subprocess and parse the stdout).
        try {
          val locations = fs.getFileBlockLocations(f, 0, f.getLen).map { loc =>
            // Store BlockLocation objects to consume less memory
            if (loc.getClass == classOf[BlockLocation]) {
              loc
            } else {
              new BlockLocation(loc.getNames, loc.getHosts, loc.getOffset, loc.getLength)
            }
          }
          val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, f.getReplication, f.getBlockSize,
            f.getModificationTime, 0, null, null, null, null, f.getPath, locations)
          if (f.isSymlink) {
            lfs.setSymlink(f.getSymlink)
          }
          Some(lfs)
        } catch {
          case _: FileNotFoundException if ignoreMissingFiles =>
            missingFiles += f.getPath.toString
            None
        }
      case f => Some(f)
    }
    if (missingFiles.nonEmpty) {
      logWarning(
        s"the following files were missing during file scan:\n  ${missingFiles.mkString("\n  ")}")
    }
    resolvedLeafStatuses
  }
  // scalastyle:on parameter.number
  /** A serializable variant of HDFS's BlockLocation. This is required by Hadoop 2.7. */
  private case class SerializableBlockLocation(names: Array[String],
                                               hosts: Array[String],
                                               offset: Long,
                                               length: Long)
  /** A serializable variant of HDFS's FileStatus. This is required by Hadoop 2.7. */
  private case class SerializableFileStatus(path: String,
                                            length: Long,
                                            isDir: Boolean,
                                            blockReplication: Short,
                                            blockSize: Long,
                                            modificationTime: Long,
                                            accessTime: Long,
                                            blockLocations: Array[SerializableBlockLocation])
  /** Checks if we should filter out this path name. */
  def shouldFilterOutPathName(pathName: String): Boolean = {
    // We filter follow paths:
    // 1. everything that starts with _ and ., except _common_metadata and _metadata
    // because Parquet needs to find those metadata files from leaf files returned by this method.
    // We should refactor this logic to not mix metadata files with data files.
    // 2. everything that ends with `._COPYING_`, because this is a intermediate state of file. we
    // should skip this file in case of double reading.
    val exclude = (pathName.startsWith("_") && !pathName.contains("=")) || pathName.endsWith("._COPYING_")
    val include = pathName.startsWith("_common_metadata") || pathName.startsWith("_metadata")
    exclude && !include
  }
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/execution/datasources/HoodieInMemoryFileIndex.scala
@@ -0,0 +1,102 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.spark.execution.datasources
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path, PathFilter}
 import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 import org.apache.spark.HoodieHadoopFSUtils
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.types.StructType
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 class HoodieInMemoryFileIndex(sparkSession: SparkSession,
                              rootPathsSpecified: Seq[Path],
                              parameters: Map[String, String],
                              userSpecifiedSchema: Option[StructType],
                              fileStatusCache: FileStatusCache = NoopCache)
  extends InMemoryFileIndex(sparkSession, rootPathsSpecified, parameters, userSpecifiedSchema, fileStatusCache) {
  /**
   * List leaf files of given paths. This method will submit a Spark job to do parallel
   * listing whenever there is a path having more files than the parallel partition discovery threshold.
   *
   * This is publicly visible for testing.
   *
   * NOTE: This method replicates the one it overrides, however it uses custom method to run parallel
   *       listing that accepts files starting with "."
   */
  override def listLeafFiles(paths: Seq[Path]): mutable.LinkedHashSet[FileStatus] = {
    val startTime = System.nanoTime()
    val output = mutable.LinkedHashSet[FileStatus]()
    val pathsToFetch = mutable.ArrayBuffer[Path]()
    for (path <- paths) {
      fileStatusCache.getLeafFiles(path) match {
        case Some(files) =>
          HiveCatalogMetrics.incrementFileCacheHits(files.length)
          output ++= files
        case None =>
          pathsToFetch += path
      }
      () // for some reasons scalac 2.12 needs this; return type doesn't matter
    }
    val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
    val discovered = bulkListLeafFiles(sparkSession, pathsToFetch, filter, hadoopConf)
    discovered.foreach { case (path, leafFiles) =>
      HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
      fileStatusCache.putLeafFiles(path, leafFiles.toArray)
      output ++= leafFiles
    }
    logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to list leaf files" +
      s" for ${paths.length} paths.")
    output
  }
  protected def bulkListLeafFiles(sparkSession: SparkSession, paths: ArrayBuffer[Path], filter: PathFilter, hadoopConf: Configuration): Seq[(Path, Seq[FileStatus])] = {
    HoodieHadoopFSUtils.parallelListLeafFiles(
      sc = sparkSession.sparkContext,
      paths = paths,
      hadoopConf = hadoopConf,
      filter = new PathFilterWrapper(filter),
      ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles,
      // NOTE: We're disabling fetching Block Info to speed up file listing
      ignoreLocality = true,
      parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold,
      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism)
  }
 }
 object HoodieInMemoryFileIndex {
  def create(sparkSession: SparkSession, globbedPaths: Seq[Path]): HoodieInMemoryFileIndex = {
    val fileStatusCache = FileStatusCache.getOrCreate(sparkSession)
    new HoodieInMemoryFileIndex(sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
  }
 }
 private class PathFilterWrapper(val filter: PathFilter) extends PathFilter with Serializable {
  override def accept(path: Path): Boolean = {
    (filter == null || filter.accept(path)) && !HoodieHadoopFSUtils.shouldFilterOutPathName(path.getName)
  }
 }
--- a/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/test/scala/org/apache/spark/execution/datasources/TestHoodieInMemoryFileIndex.scala
@@ -0,0 +1,60 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.spark.execution.datasources
 import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.SparkSession
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.Test
 import org.junit.jupiter.api.io.TempDir
 import java.io.File
 import java.nio.file.Paths
 class TestHoodieInMemoryFileIndex {
  @Test
  def testCreateInMemoryIndex(@TempDir tempDir: File): Unit = {
    val spark = SparkSession.builder
      .appName("Hoodie Datasource test")
      .master("local[2]")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .getOrCreate
    val folders: Seq[Path] = Seq(
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2").toUri)
    )
    val files: Seq[Path] = Seq(
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1", "file1").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1", "file2").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2", "file3").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2", "file4").toUri)
    )
    folders.foreach(folder => new File(folder.toUri).mkdir())
    files.foreach(file => new File(file.toUri).createNewFile())
    val index = HoodieInMemoryFileIndex.create(spark, Seq(folders(0), folders(1)))
    val indexedFilePaths = index.allFiles().map(fs => fs.getPath)
    assertEquals(files.sortWith(_.toString < _.toString), indexedFilePaths.sortWith(_.toString < _.toString))
    spark.stop()
  }
 }
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
@@ -88,35 +88,6 @@ class TestHoodieSparkUtils {
      .sortWith(_.toString < _.toString), globbedPaths.sortWith(_.toString < _.toString))
  }
  @Test
  def testCreateInMemoryIndex(@TempDir tempDir: File): Unit = {
    val spark = SparkSession.builder
      .appName("Hoodie Datasource test")
      .master("local[2]")
      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .getOrCreate
    val folders: Seq[Path] = Seq(
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2").toUri)
    )
    val files: Seq[Path] = Seq(
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1", "file1").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder1", "file2").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2", "file3").toUri),
      new Path(Paths.get(tempDir.getAbsolutePath, "folder2", "file4").toUri)
    )
    folders.foreach(folder => new File(folder.toUri).mkdir())
    files.foreach(file => new File(file.toUri).createNewFile())
    val index = HoodieSparkUtils.createInMemoryFileIndex(spark, Seq(folders(0), folders(1)))
    val indexedFilePaths = index.allFiles().map(fs => fs.getPath)
    assertEquals(files.sortWith(_.toString < _.toString), indexedFilePaths.sortWith(_.toString < _.toString))
    spark.stop()
  }
  @Test
  def testCreateRddSchemaEvol(): Unit = {
    val spark = SparkSession.builder
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
@@ -112,9 +112,9 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
    val fullColumnsReadStats: Array[(String, Long)] =
      if (HoodieSparkUtils.isSpark3)
        Array(
-          ("rider", 14665),
+          ("rider", 14166),
-          ("rider,driver", 14665),
+          ("rider,driver", 14166),
-          ("rider,driver,tip_history", 14665))
+          ("rider,driver,tip_history", 14166))
      else if (HoodieSparkUtils.isSpark2)
        // TODO re-enable tests (these tests are very unstable currently)
        Array(
@@ -163,11 +163,29 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
      else
        fail("Only Spark 3 and Spark 2 are currently supported")
    // Stats for the reads fetching _all_ columns (currently for MOR to be able to merge
    // records properly full row has to be fetched; note, how amount of bytes read
    // is invariant of the # of columns)
    val fullColumnsReadStats: Array[(String, Long)] =
    if (HoodieSparkUtils.isSpark3)
      Array(
        ("rider", 14166),
        ("rider,driver", 14166),
        ("rider,driver,tip_history", 14166))
    else if (HoodieSparkUtils.isSpark2)
      // TODO re-enable tests (these tests are very unstable currently)
      Array(
        ("rider", -1),
        ("rider,driver", -1),
        ("rider,driver,tip_history", -1))
    else
      fail("Only Spark 3 and Spark 2 are currently supported")
    // Test MOR / Snapshot / Skip-merge
    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL, projectedColumnsReadStats)
    // Test MOR / Snapshot / Payload-combine
-    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, projectedColumnsReadStats)
+    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL, DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL, fullColumnsReadStats)
    // Test MOR / Read Optimized
    runTest(tableState, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, "null", projectedColumnsReadStats)
@@ -209,9 +227,9 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
    val fullColumnsReadStats: Array[(String, Long)] =
      if (HoodieSparkUtils.isSpark3)
        Array(
-          ("rider", 19683),
+          ("rider", 19684),
-          ("rider,driver", 19683),
+          ("rider,driver", 19684),
-          ("rider,driver,tip_history", 19683))
+          ("rider,driver,tip_history", 19684))
      else if (HoodieSparkUtils.isSpark2)
        // TODO re-enable tests (these tests are very unstable currently)
        Array(