[HUDI-2449] Incremental read for Flink (#3686)

2021-09-19 09:06:46 +08:00
parent c7a5c8273b
commit 3354fac42f
16 changed files with 689 additions and 365 deletions
--- a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -197,12 +197,18 @@ public class FlinkOptions extends HoodieConfig {
      .withDescription("Check interval for streaming read of SECOND, default 1 minute");
  public static final String START_COMMIT_EARLIEST = "earliest";
-  public static final ConfigOption<String> READ_STREAMING_START_COMMIT = ConfigOptions
+  public static final ConfigOption<String> READ_START_COMMIT = ConfigOptions
-      .key("read.streaming.start-commit")
+      .key("read.start-commit")
      .stringType()
      .noDefaultValue()
-      .withDescription("Start commit instant for streaming read, the commit time format should be 'yyyyMMddHHmmss', "
+      .withDescription("Start commit instant for reading, the commit time format should be 'yyyyMMddHHmmss', "
-          + "by default reading from the latest instant");
+          + "by default reading from the latest instant for streaming read");
  public static final ConfigOption<String> READ_END_COMMIT = ConfigOptions
      .key("read.end-commit")
      .stringType()
      .noDefaultValue()
      .withDescription("End commit instant for reading, the commit time format should be 'yyyyMMddHHmmss'");
  // ------------------------------------------------------------------------
  //  Write Options
--- a/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/source/FileIndex.java
@@ -28,6 +28,8 @@ import org.apache.flink.configuration.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import javax.annotation.Nullable;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -36,6 +38,7 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 /**
 * A file index which supports listing files efficiently through metadata table.
@@ -137,11 +140,29 @@ public class FileIndex {
    this.partitionPaths = null;
  }
  // -------------------------------------------------------------------------
  //  Getter/Setter
  // -------------------------------------------------------------------------
  /**
   * Sets up explicit partition paths for pruning.
   */
  public void setPartitionPaths(@Nullable Set<String> partitionPaths) {
    if (partitionPaths != null) {
      this.partitionPaths = new ArrayList<>(partitionPaths);
    }
  }
  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
-  private List<String> getOrBuildPartitionPaths() {
+  /**
   * Returns all the relative partition paths.
   *
   * <p>The partition paths are cached once invoked.
   */
  public List<String> getOrBuildPartitionPaths() {
    if (this.partitionPaths != null) {
      return this.partitionPaths;
    }
--- a/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
@@ -0,0 +1,365 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hudi.source;
 import org.apache.hudi.common.model.BaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.InstantRange;
 import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
 import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.fs.Path;
 import org.apache.hadoop.fs.FileStatus;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import javax.annotation.Nullable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
 /**
 * Utilities to generate incremental input splits {@link MergeOnReadInputSplit}.
 * The input splits are used for streaming and incremental read.
 *
 * <p>How to generate the input splits:
 * <ol>
 *   <li>first fetch all the commit metadata for the incremental instants;</li>
 *   <li>resolve the incremental commit file paths;</li>
 *   <li>filter the full file paths by required partitions;</li>
 *   <li>use the file paths from #step 3 as the back-up of the filesystem view.</li>
 * </ol>
 */
 public class IncrementalInputSplits {
  private static final Logger LOG = LoggerFactory.getLogger(IncrementalInputSplits.class);
  private final Configuration conf;
  private final Path path;
  private final long maxCompactionMemoryInBytes;
  // for partition pruning
  private final Set<String> requiredPartitions;
  private IncrementalInputSplits(
      Configuration conf,
      Path path,
      long maxCompactionMemoryInBytes,
      @Nullable Set<String> requiredPartitions) {
    this.conf = conf;
    this.path = path;
    this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
    this.requiredPartitions = requiredPartitions;
  }
  /**
   * Returns the builder.
   */
  public static Builder builder() {
    return new Builder();
  }
  /**
   * Returns the incremental input splits.
   *
   * @param metaClient The meta client
   * @param hadoopConf The hadoop configuration
   * @return The list of incremental input splits or empty if there are no new instants
   */
  public Result inputSplits(
      HoodieTableMetaClient metaClient,
      org.apache.hadoop.conf.Configuration hadoopConf) {
    return inputSplits(metaClient, hadoopConf, null);
  }
  /**
   * Returns the incremental input splits.
   *
   * @param metaClient    The meta client
   * @param hadoopConf    The hadoop configuration
   * @param issuedInstant The last issued instant, only valid in streaming read
   * @return The list of incremental input splits or empty if there are no new instants
   */
  public Result inputSplits(
      HoodieTableMetaClient metaClient,
      org.apache.hadoop.conf.Configuration hadoopConf,
      String issuedInstant) {
    metaClient.reloadActiveTimeline();
    HoodieTimeline commitTimeline = metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants();
    if (commitTimeline.empty()) {
      LOG.warn("No splits found for the table under path " + path);
      return Result.EMPTY;
    }
    List<HoodieInstant> instants = filterInstantsWithRange(commitTimeline, issuedInstant);
    // get the latest instant that satisfies condition
    final HoodieInstant instantToIssue = instants.size() == 0 ? null : instants.get(instants.size() - 1);
    final InstantRange instantRange;
    if (instantToIssue != null) {
      if (issuedInstant != null) {
        // the streaming reader may record the last issued instant, if the issued instant is present,
        // the instant range should be: (issued instant, the latest instant].
        instantRange = InstantRange.getInstance(issuedInstant, instantToIssue.getTimestamp(),
            InstantRange.RangeType.OPEN_CLOSE);
      } else if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent()) {
        // first time consume and has a start commit
        final String startCommit = this.conf.getString(FlinkOptions.READ_START_COMMIT);
        instantRange = startCommit.equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)
            ? null
            : InstantRange.getInstance(startCommit, instantToIssue.getTimestamp(), InstantRange.RangeType.CLOSE_CLOSE);
      } else {
        // first time consume and no start commit, consumes the latest incremental data set.
        instantRange = InstantRange.getInstance(instantToIssue.getTimestamp(), instantToIssue.getTimestamp(),
            InstantRange.RangeType.CLOSE_CLOSE);
      }
    } else {
      LOG.info("No new instant found for the table under path " + path + ", skip reading");
      return Result.EMPTY;
    }
    String tableName = conf.getString(FlinkOptions.TABLE_NAME);
    List<HoodieCommitMetadata> activeMetadataList = instants.stream()
        .map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, commitTimeline)).collect(Collectors.toList());
    List<HoodieCommitMetadata> archivedMetadataList = getArchivedMetadata(metaClient, instantRange, commitTimeline, tableName);
    if (archivedMetadataList.size() > 0) {
      LOG.warn(""
          + "--------------------------------------------------------------------------------\n"
          + "---------- caution: the reader has fall behind too much from the writer,\n"
          + "---------- tweak 'read.tasks' option to add parallelism of read tasks.\n"
          + "--------------------------------------------------------------------------------");
    }
    List<HoodieCommitMetadata> metadataList = archivedMetadataList.size() > 0
        ? mergeList(activeMetadataList, archivedMetadataList)
        : activeMetadataList;
    Set<String> writePartitions = getWritePartitionPaths(metadataList);
    // apply partition push down
    if (this.requiredPartitions != null) {
      writePartitions = writePartitions.stream()
          .filter(this.requiredPartitions::contains).collect(Collectors.toSet());
    }
    FileStatus[] fileStatuses = WriteProfiles.getWritePathsOfInstants(path, hadoopConf, metadataList);
    if (fileStatuses.length == 0) {
      LOG.warn("No files found for reading in user provided path.");
      return Result.EMPTY;
    }
    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, commitTimeline, fileStatuses);
    final String endInstant = instantToIssue.getTimestamp();
    final AtomicInteger cnt = new AtomicInteger(0);
    final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
    List<MergeOnReadInputSplit> inputSplits = writePartitions.stream()
        .map(relPartitionPath -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, endInstant)
            .map(fileSlice -> {
              Option<List<String>> logPaths = Option.ofNullable(fileSlice.getLogFiles()
                  .sorted(HoodieLogFile.getLogFileComparator())
                  .map(logFile -> logFile.getPath().toString())
                  .collect(Collectors.toList()));
              String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
              return new MergeOnReadInputSplit(cnt.getAndAdd(1),
                  basePath, logPaths, endInstant,
                  metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, instantRange);
            }).collect(Collectors.toList()))
        .flatMap(Collection::stream)
        .collect(Collectors.toList());
    return Result.instance(inputSplits, endInstant);
  }
  /**
   * Returns the archived metadata in case the reader consumes untimely or it wants
   * to read from the earliest.
   *
   * <p>Note: should improve it with metadata table when the metadata table is stable enough.
   *
   * @param metaClient     The meta client
   * @param instantRange   The instant range to filter the timeline instants
   * @param commitTimeline The commit timeline
   * @param tableName      The table name
   * @return the list of archived metadata, or empty if there is no need to read the archived timeline
   */
  private List<HoodieCommitMetadata> getArchivedMetadata(
      HoodieTableMetaClient metaClient,
      InstantRange instantRange,
      HoodieTimeline commitTimeline,
      String tableName) {
    if (instantRange == null || commitTimeline.isBeforeTimelineStarts(instantRange.getStartInstant())) {
      // read the archived metadata if:
      // 1. the start commit is 'earliest';
      // 2. the start instant is archived.
      HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
      HoodieTimeline archivedCompleteTimeline = archivedTimeline.getCommitsTimeline().filterCompletedInstants();
      if (!archivedCompleteTimeline.empty()) {
        final String endTs = archivedCompleteTimeline.lastInstant().get().getTimestamp();
        Stream<HoodieInstant> instantStream = archivedCompleteTimeline.getInstants();
        if (instantRange != null) {
          archivedTimeline.loadInstantDetailsInMemory(instantRange.getStartInstant(), endTs);
          instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, instantRange.getStartInstant()));
        } else {
          final String startTs = archivedCompleteTimeline.firstInstant().get().getTimestamp();
          archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
        }
        return instantStream
            .map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, archivedTimeline)).collect(Collectors.toList());
      }
    }
    return Collections.emptyList();
  }
  /**
   * Returns the instants with a given issuedInstant to start from.
   *
   * @param commitTimeline The completed commits timeline
   * @param issuedInstant  The last issued instant that has already been delivered to downstream
   * @return the filtered hoodie instants
   */
  private List<HoodieInstant> filterInstantsWithRange(
      HoodieTimeline commitTimeline,
      final String issuedInstant) {
    HoodieTimeline completedTimeline = commitTimeline.filterCompletedInstants();
    if (issuedInstant != null) {
      // returns early for streaming mode
      return completedTimeline.getInstants()
          .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN, issuedInstant))
          .collect(Collectors.toList());
    }
    Stream<HoodieInstant> instantStream = completedTimeline.getInstants();
    if (this.conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent()
        && !this.conf.get(FlinkOptions.READ_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)) {
      final String startCommit = this.conf.get(FlinkOptions.READ_START_COMMIT);
      instantStream = instantStream
          .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, startCommit));
    }
    if (this.conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent()) {
      final String endCommit = this.conf.get(FlinkOptions.READ_END_COMMIT);
      instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN_OR_EQUALS, endCommit));
    }
    return instantStream.collect(Collectors.toList());
  }
  /**
   * Returns all the incremental write partition paths as a set with the given commits metadata.
   *
   * @param metadataList The commits metadata
   * @return the partition path set
   */
  private Set<String> getWritePartitionPaths(List<HoodieCommitMetadata> metadataList) {
    return metadataList.stream()
        .map(HoodieCommitMetadata::getWritePartitionPaths)
        .flatMap(Collection::stream)
        .collect(Collectors.toSet());
  }
  private static <T> List<T> mergeList(List<T> list1, List<T> list2) {
    List<T> merged = new ArrayList<>(list1);
    merged.addAll(list2);
    return merged;
  }
  // -------------------------------------------------------------------------
  //  Inner Class
  // -------------------------------------------------------------------------
  /**
   * Represents a result of calling {@link #inputSplits}.
   */
  public static class Result {
    private final List<MergeOnReadInputSplit> inputSplits; // input splits
    private final String endInstant; // end instant to consume to
    public static final Result EMPTY = instance(Collections.emptyList(), "");
    public boolean isEmpty() {
      return this.inputSplits.size() == 0;
    }
    public List<MergeOnReadInputSplit> getInputSplits() {
      return this.inputSplits;
    }
    public String getEndInstant() {
      return this.endInstant;
    }
    private Result(List<MergeOnReadInputSplit> inputSplits, String endInstant) {
      this.inputSplits = inputSplits;
      this.endInstant = endInstant;
    }
    public static Result instance(List<MergeOnReadInputSplit> inputSplits, String endInstant) {
      return new Result(inputSplits, endInstant);
    }
  }
  /**
   * Builder for {@link IncrementalInputSplits}.
   */
  public static class Builder {
    private Configuration conf;
    private Path path;
    private long maxCompactionMemoryInBytes;
    // for partition pruning
    private Set<String> requiredPartitions;
    public Builder() {
    }
    public Builder conf(Configuration conf) {
      this.conf = conf;
      return this;
    }
    public Builder path(Path path) {
      this.path = path;
      return this;
    }
    public Builder maxCompactionMemoryInBytes(long maxCompactionMemoryInBytes) {
      this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
      return this;
    }
    public Builder requiredPartitions(@Nullable Set<String> requiredPartitions) {
      this.requiredPartitions = requiredPartitions;
      return this;
    }
    public IncrementalInputSplits build() {
      return new IncrementalInputSplits(Objects.requireNonNull(this.conf), Objects.requireNonNull(this.path),
          this.maxCompactionMemoryInBytes, this.requiredPartitions);
    }
  }
 }
--- a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadMonitoringFunction.java
@@ -18,19 +18,9 @@
 package org.apache.hudi.source;
 import org.apache.hudi.common.model.BaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.InstantRange;
 import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.sink.partitioner.profile.WriteProfiles;
 import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
 import org.apache.hudi.util.StreamerUtil;
@@ -45,24 +35,15 @@ import org.apache.flink.runtime.state.FunctionSnapshotContext;
 import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
 import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
 import org.apache.flink.streaming.api.functions.source.SourceFunction;
 import org.apache.hadoop.fs.FileStatus;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import javax.annotation.Nullable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN_OR_EQUALS;
 /**
 * This is the single (non-parallel) monitoring task which takes a {@link MergeOnReadInputSplit}
@@ -112,21 +93,21 @@ public class StreamReadMonitoringFunction
  private HoodieTableMetaClient metaClient;
-  private final long maxCompactionMemoryInBytes;
+  private final IncrementalInputSplits incrementalInputSplits;
  // for partition pruning
  private final Set<String> requiredPartitionPaths;
  public StreamReadMonitoringFunction(
      Configuration conf,
      Path path,
      long maxCompactionMemoryInBytes,
-      Set<String> requiredPartitionPaths) {
+      @Nullable Set<String> requiredPartitionPaths) {
    this.conf = conf;
    this.path = path;
    this.interval = conf.getInteger(FlinkOptions.READ_STREAMING_CHECK_INTERVAL);
-    this.maxCompactionMemoryInBytes = maxCompactionMemoryInBytes;
+    this.incrementalInputSplits = IncrementalInputSplits.builder()
-    this.requiredPartitionPaths = requiredPartitionPaths;
+        .conf(conf)
        .path(path)
        .maxCompactionMemoryInBytes(maxCompactionMemoryInBytes)
        .requiredPartitions(requiredPartitionPaths).build();
  }
  @Override
@@ -208,98 +189,23 @@ public class StreamReadMonitoringFunction
      // table does not exist
      return;
    }
-    metaClient.reloadActiveTimeline();
+    IncrementalInputSplits.Result result =
-    HoodieTimeline commitTimeline = metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants();
+        incrementalInputSplits.inputSplits(metaClient, this.hadoopConf, this.issuedInstant);
-    if (commitTimeline.empty()) {
+    if (result.isEmpty()) {
-      LOG.warn("No splits found for the table under path " + path);
+      // no new instants, returns early
      return;
    }
    List<HoodieInstant> instants = filterInstantsWithStart(commitTimeline, this.issuedInstant);
    // get the latest instant that satisfies condition
    final HoodieInstant instantToIssue = instants.size() == 0 ? null : instants.get(instants.size() - 1);
    final InstantRange instantRange;
    if (instantToIssue != null) {
      if (this.issuedInstant != null) {
        // had already consumed an instant
        instantRange = InstantRange.getInstance(this.issuedInstant, instantToIssue.getTimestamp(),
            InstantRange.RangeType.OPEN_CLOSE);
      } else if (this.conf.getOptional(FlinkOptions.READ_STREAMING_START_COMMIT).isPresent()) {
        // first time consume and has a start commit
        final String specifiedStart = this.conf.getString(FlinkOptions.READ_STREAMING_START_COMMIT);
        instantRange = specifiedStart.equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)
            ? null
            : InstantRange.getInstance(specifiedStart, instantToIssue.getTimestamp(), InstantRange.RangeType.CLOSE_CLOSE);
      } else {
        // first time consume and no start commit, consumes the latest incremental data set.
        instantRange = InstantRange.getInstance(instantToIssue.getTimestamp(), instantToIssue.getTimestamp(),
            InstantRange.RangeType.CLOSE_CLOSE);
      }
    } else {
      LOG.info("No new instant found for the table under path " + path + ", skip reading");
      return;
    }
    // generate input split:
    // 1. first fetch all the commit metadata for the incremental instants;
    // 2. filter the relative partition paths
    // 3. filter the full file paths
    // 4. use the file paths from #step 3 as the back-up of the filesystem view
    String tableName = conf.getString(FlinkOptions.TABLE_NAME);
    List<HoodieCommitMetadata> activeMetadataList = instants.stream()
        .map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, commitTimeline)).collect(Collectors.toList());
    List<HoodieCommitMetadata> archivedMetadataList = getArchivedMetadata(instantRange, commitTimeline, tableName);
    if (archivedMetadataList.size() > 0) {
      LOG.warn(""
          + "--------------------------------------------------------------------------------\n"
          + "---------- caution: the reader has fall behind too much from the writer,\n"
          + "---------- tweak 'read.tasks' option to add parallelism of read tasks.\n"
          + "--------------------------------------------------------------------------------");
    }
    List<HoodieCommitMetadata> metadataList = archivedMetadataList.size() > 0
        ? mergeList(activeMetadataList, archivedMetadataList)
        : activeMetadataList;
    Set<String> writePartitions = getWritePartitionPaths(metadataList);
    // apply partition push down
    if (this.requiredPartitionPaths.size() > 0) {
      writePartitions = writePartitions.stream()
          .filter(this.requiredPartitionPaths::contains).collect(Collectors.toSet());
    }
    FileStatus[] fileStatuses = WriteProfiles.getWritePathsOfInstants(path, hadoopConf, metadataList);
    if (fileStatuses.length == 0) {
      LOG.warn("No files found for reading in user provided path.");
      return;
    }
-    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, commitTimeline, fileStatuses);
+    for (MergeOnReadInputSplit split : result.getInputSplits()) {
    final String commitToIssue = instantToIssue.getTimestamp();
    final AtomicInteger cnt = new AtomicInteger(0);
    final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
    List<MergeOnReadInputSplit> inputSplits = writePartitions.stream()
        .map(relPartitionPath -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, commitToIssue)
            .map(fileSlice -> {
              Option<List<String>> logPaths = Option.ofNullable(fileSlice.getLogFiles()
                  .sorted(HoodieLogFile.getLogFileComparator())
                  .map(logFile -> logFile.getPath().toString())
                  .collect(Collectors.toList()));
              String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
              return new MergeOnReadInputSplit(cnt.getAndAdd(1),
                  basePath, logPaths, commitToIssue,
                  metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, instantRange);
            }).collect(Collectors.toList()))
        .flatMap(Collection::stream)
        .collect(Collectors.toList());
    for (MergeOnReadInputSplit split : inputSplits) {
      context.collect(split);
    }
    // update the issues instant time
-    this.issuedInstant = commitToIssue;
+    this.issuedInstant = result.getEndInstant();
    LOG.info(""
-        + "------------------------------------------------------------\n"
+            + "------------------------------------------------------------\n"
-        + "---------- consumed to instant: {}\n"
+            + "---------- consumed to instant: {}\n"
-        + "------------------------------------------------------------",
+            + "------------------------------------------------------------",
-        commitToIssue);
+        this.issuedInstant);
  }
  @Override
@@ -343,87 +249,4 @@ public class StreamReadMonitoringFunction
      this.instantState.add(this.issuedInstant);
    }
  }
  /**
   * Returns the archived metadata in case the reader consumes untimely or it wants
   * to read from the earliest.
   *
   * <p>Note: should improve it with metadata table when the metadata table is stable enough.
   *
   * @param instantRange   The instant range to filter the timeline instants
   * @param commitTimeline The commit timeline
   * @param tableName      The table name
   * @return the list of archived metadata, or empty if there is no need to read the archived timeline
   */
  private List<HoodieCommitMetadata> getArchivedMetadata(
      InstantRange instantRange,
      HoodieTimeline commitTimeline,
      String tableName) {
    if (instantRange == null || commitTimeline.isBeforeTimelineStarts(instantRange.getStartInstant())) {
      // read the archived metadata if:
      // 1. the start commit is 'earliest';
      // 2. the start instant is archived.
      HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
      HoodieTimeline archivedCompleteTimeline = archivedTimeline.getCommitsTimeline().filterCompletedInstants();
      if (!archivedCompleteTimeline.empty()) {
        final String endTs = archivedCompleteTimeline.lastInstant().get().getTimestamp();
        Stream<HoodieInstant> instantStream = archivedCompleteTimeline.getInstants();
        if (instantRange != null) {
          archivedTimeline.loadInstantDetailsInMemory(instantRange.getStartInstant(), endTs);
          instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, instantRange.getStartInstant()));
        } else {
          final String startTs = archivedCompleteTimeline.firstInstant().get().getTimestamp();
          archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
        }
        return instantStream
            .map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, archivedTimeline)).collect(Collectors.toList());
      }
    }
    return Collections.emptyList();
  }
  /**
   * Returns the instants with a given issuedInstant to start from.
   *
   * @param commitTimeline The completed commits timeline
   * @param issuedInstant  The last issued instant that has already been delivered to downstream
   * @return the filtered hoodie instants
   */
  private List<HoodieInstant> filterInstantsWithStart(
      HoodieTimeline commitTimeline,
      final String issuedInstant) {
    HoodieTimeline completedTimeline = commitTimeline.filterCompletedInstants();
    if (issuedInstant != null) {
      return completedTimeline.getInstants()
          .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN, issuedInstant))
          .collect(Collectors.toList());
    } else if (this.conf.getOptional(FlinkOptions.READ_STREAMING_START_COMMIT).isPresent()
        && !this.conf.get(FlinkOptions.READ_STREAMING_START_COMMIT).equalsIgnoreCase(FlinkOptions.START_COMMIT_EARLIEST)) {
      String definedStartCommit = this.conf.get(FlinkOptions.READ_STREAMING_START_COMMIT);
      return completedTimeline.getInstants()
          .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, definedStartCommit))
          .collect(Collectors.toList());
    } else {
      return completedTimeline.getInstants().collect(Collectors.toList());
    }
  }
  /**
   * Returns all the incremental write partition paths as a set with the given commits metadata.
   *
   * @param metadataList The commits metadata
   * @return the partition path set
   */
  private Set<String> getWritePartitionPaths(List<HoodieCommitMetadata> metadataList) {
    return metadataList.stream()
        .map(HoodieCommitMetadata::getWritePartitionPaths)
        .flatMap(Collection::stream)
        .collect(Collectors.toSet());
  }
  private static <T> List<T> mergeList(List<T> list1, List<T> list2) {
    List<T> merged = new ArrayList<>(list1);
    merged.addAll(list2);
    return merged;
  }
 }
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
@@ -155,6 +155,8 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
    setupCompactionOptions(conf);
    // hive options
    setupHiveOptions(conf);
    // read options
    setupReadOptions(conf);
    // infer avro schema from physical DDL schema
    inferAvroSchema(conf, schema.toPhysicalRowDataType().notNull().getLogicalType());
  }
@@ -270,6 +272,16 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
    }
  }
  /**
   * Sets up the read options from the table definition.
   */
  private static void setupReadOptions(Configuration conf) {
    if (!conf.getBoolean(FlinkOptions.READ_AS_STREAMING)
        && (conf.getOptional(FlinkOptions.READ_START_COMMIT).isPresent() || conf.getOptional(FlinkOptions.READ_END_COMMIT).isPresent())) {
      conf.setString(FlinkOptions.QUERY_TYPE, FlinkOptions.QUERY_TYPE_INCREMENTAL);
    }
  }
  /**
   * Inferences the deserialization Avro schema from the table schema (e.g. the DDL)
   * if both options {@link FlinkOptions#SOURCE_AVRO_SCHEMA_PATH} and
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
@@ -19,7 +19,6 @@
 package org.apache.hudi.table;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.BaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -31,6 +30,7 @@ import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hadoop.HoodieROTablePathFilter;
 import org.apache.hudi.source.FileIndex;
 import org.apache.hudi.source.IncrementalInputSplits;
 import org.apache.hudi.source.StreamReadMonitoringFunction;
 import org.apache.hudi.source.StreamReadOperator;
 import org.apache.hudi.table.format.FilePathUtils;
@@ -108,6 +108,9 @@ public class HoodieTableSource implements
  private static final int NO_LIMIT_CONSTANT = -1;
  private static final InputFormat<RowData, ?> EMPTY_INPUT_FORMAT =
      new CollectionInputFormat<>(Collections.emptyList(), null);
  private final transient org.apache.hadoop.conf.Configuration hadoopConf;
  private final transient HoodieTableMetaClient metaClient;
  private final long maxCompactionMemoryInBytes;
@@ -220,7 +223,7 @@ public class HoodieTableSource implements
  public Result applyFilters(List<ResolvedExpression> filters) {
    this.filters = new ArrayList<>(filters);
    // refuse all the filters now
-    return Result.of(Collections.emptyList(), new ArrayList<>(filters));
+    return SupportsFilterPushDown.Result.of(Collections.emptyList(), new ArrayList<>(filters));
  }
  @Override
@@ -256,8 +259,8 @@ public class HoodieTableSource implements
    DataType[] schemaTypes = this.schema.getColumnDataTypes().toArray(new DataType[0]);
    return DataTypes.ROW(Arrays.stream(this.requiredPos)
-        .mapToObj(i -> DataTypes.FIELD(schemaFieldNames[i], schemaTypes[i]))
+            .mapToObj(i -> DataTypes.FIELD(schemaFieldNames[i], schemaTypes[i]))
-        .toArray(DataTypes.Field[]::new))
+            .toArray(DataTypes.Field[]::new))
        .bridgedTo(RowData.class);
  }
@@ -268,16 +271,21 @@ public class HoodieTableSource implements
    return requiredPartitions;
  }
  @Nullable
  private Set<String> getRequiredPartitionPaths() {
    if (this.requiredPartitions == null) {
-      return Collections.emptySet();
+      // returns null for non partition pruning
      return null;
    }
    return FilePathUtils.toRelativePartitionPaths(this.partitionKeys, this.requiredPartitions,
        conf.getBoolean(FlinkOptions.HIVE_STYLE_PARTITIONING));
  }
-  private List<MergeOnReadInputSplit> buildFileIndex(Path[] paths) {
+  private List<MergeOnReadInputSplit> buildFileIndex() {
-    if (paths.length == 0) {
+    Set<String> requiredPartitionPaths = getRequiredPartitionPaths();
    fileIndex.setPartitionPaths(requiredPartitionPaths);
    List<String> relPartitionPaths = fileIndex.getOrBuildPartitionPaths();
    if (relPartitionPaths.size() == 0) {
      return Collections.emptyList();
    }
    FileStatus[] fileStatuses = fileIndex.getFilesInPartitions();
@@ -292,19 +300,17 @@ public class HoodieTableSource implements
    final String mergeType = this.conf.getString(FlinkOptions.MERGE_TYPE);
    final AtomicInteger cnt = new AtomicInteger(0);
    // generates one input split for each file group
-    return Arrays.stream(paths).map(partitionPath -> {
+    return relPartitionPaths.stream()
-      String relPartitionPath = FSUtils.getRelativePartitionPath(path, partitionPath);
+        .map(relPartitionPath -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, latestCommit)
-      return fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, latestCommit)
+            .map(fileSlice -> {
-          .map(fileSlice -> {
+              String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
-            String basePath = fileSlice.getBaseFile().map(BaseFile::getPath).orElse(null);
+              Option<List<String>> logPaths = Option.ofNullable(fileSlice.getLogFiles()
-            Option<List<String>> logPaths = Option.ofNullable(fileSlice.getLogFiles()
+                  .sorted(HoodieLogFile.getLogFileComparator())
-                .sorted(HoodieLogFile.getLogFileComparator())
+                  .map(logFile -> logFile.getPath().toString())
-                .map(logFile -> logFile.getPath().toString())
+                  .collect(Collectors.toList()));
-                .collect(Collectors.toList()));
+              return new MergeOnReadInputSplit(cnt.getAndAdd(1), basePath, logPaths, latestCommit,
-            return new MergeOnReadInputSplit(cnt.getAndAdd(1), basePath, logPaths, latestCommit,
+                  metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, null);
-                metaClient.getBasePath(), maxCompactionMemoryInBytes, mergeType, null);
+            }).collect(Collectors.toList()))
          }).collect(Collectors.toList());
    })
        .flatMap(Collection::stream)
        .collect(Collectors.toList());
  }
@@ -319,16 +325,6 @@ public class HoodieTableSource implements
  }
  private InputFormat<RowData, ?> getBatchInputFormat() {
    // When this table has no partition, just return an empty source.
    if (!partitionKeys.isEmpty() && getOrFetchPartitions().isEmpty()) {
      return new CollectionInputFormat<>(Collections.emptyList(), null);
    }
    final Path[] paths = getReadPaths();
    if (paths.length == 0) {
      return new CollectionInputFormat<>(Collections.emptyList(), null);
    }
    final Schema tableAvroSchema = getTableAvroSchema();
    final DataType rowDataType = AvroSchemaConverter.convertToDataType(tableAvroSchema);
    final RowType rowType = (RowType) rowDataType.getLogicalType();
@@ -340,62 +336,37 @@ public class HoodieTableSource implements
        final HoodieTableType tableType = HoodieTableType.valueOf(this.conf.getString(FlinkOptions.TABLE_TYPE));
        switch (tableType) {
          case MERGE_ON_READ:
-            final List<MergeOnReadInputSplit> inputSplits = buildFileIndex(paths);
+            final List<MergeOnReadInputSplit> inputSplits = buildFileIndex();
            if (inputSplits.size() == 0) {
              // When there is no input splits, just return an empty source.
              LOG.warn("No input splits generate for MERGE_ON_READ input format, returns empty collection instead");
-              return new CollectionInputFormat<>(Collections.emptyList(), null);
+              return EMPTY_INPUT_FORMAT;
            }
-            final MergeOnReadTableState hoodieTableState = new MergeOnReadTableState(
+            return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
-                rowType,
+                rowDataType, inputSplits, false);
                requiredRowType,
                tableAvroSchema.toString(),
                AvroSchemaConverter.convertToSchema(requiredRowType).toString(),
                inputSplits,
                conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(","));
            return MergeOnReadInputFormat.builder()
                .config(this.conf)
                .paths(FilePathUtils.toFlinkPaths(paths))
                .tableState(hoodieTableState)
                // use the explicit fields data type because the AvroSchemaConverter
                // is not very stable.
                .fieldTypes(rowDataType.getChildren())
                .defaultPartName(conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME))
                .limit(this.limit)
                .emitDelete(false)
                .build();
          case COPY_ON_WRITE:
-            FileInputFormat<RowData> format = new CopyOnWriteInputFormat(
+            return baseFileOnlyInputFormat();
                FilePathUtils.toFlinkPaths(paths),
                this.schema.getColumnNames().toArray(new String[0]),
                this.schema.getColumnDataTypes().toArray(new DataType[0]),
                this.requiredPos,
                this.conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME),
                this.limit == NO_LIMIT_CONSTANT ? Long.MAX_VALUE : this.limit, // ParquetInputFormat always uses the limit value
                getParquetConf(this.conf, this.hadoopConf),
                this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE)
            );
            format.setFilesFilter(new LatestFileFilter(this.hadoopConf));
            return format;
          default:
            throw new HoodieException("Unexpected table type: " + this.conf.getString(FlinkOptions.TABLE_TYPE));
        }
      case FlinkOptions.QUERY_TYPE_READ_OPTIMIZED:
-        FileInputFormat<RowData> format = new CopyOnWriteInputFormat(
+        return baseFileOnlyInputFormat();
-            FilePathUtils.toFlinkPaths(paths),
+      case FlinkOptions.QUERY_TYPE_INCREMENTAL:
-            this.schema.getColumnNames().toArray(new String[0]),
+        IncrementalInputSplits incrementalInputSplits = IncrementalInputSplits.builder()
-            this.schema.getColumnDataTypes().toArray(new DataType[0]),
+            .conf(conf).path(FilePathUtils.toFlinkPath(path))
-            this.requiredPos,
+            .maxCompactionMemoryInBytes(maxCompactionMemoryInBytes)
-            "default",
+            .requiredPartitions(getRequiredPartitionPaths()).build();
-            this.limit == NO_LIMIT_CONSTANT ? Long.MAX_VALUE : this.limit, // ParquetInputFormat always uses the limit value
+        final IncrementalInputSplits.Result result = incrementalInputSplits.inputSplits(metaClient, hadoopConf);
-            getParquetConf(this.conf, this.hadoopConf),
+        if (result.isEmpty()) {
-            this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE)
+          // When there is no input splits, just return an empty source.
-        );
+          LOG.warn("No input splits generate for incremental read, returns empty collection instead");
-        format.setFilesFilter(new LatestFileFilter(this.hadoopConf));
+          return new CollectionInputFormat<>(Collections.emptyList(), null);
-        return format;
+        }
        return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
            rowDataType, result.getInputSplits(), false);
      default:
-        String errMsg = String.format("Invalid query type : '%s', options ['%s', '%s'] are supported now", queryType,
+        String errMsg = String.format("Invalid query type : '%s', options ['%s', '%s', '%s'] are supported now", queryType,
-            FlinkOptions.QUERY_TYPE_SNAPSHOT, FlinkOptions.QUERY_TYPE_READ_OPTIMIZED);
+            FlinkOptions.QUERY_TYPE_SNAPSHOT, FlinkOptions.QUERY_TYPE_READ_OPTIMIZED, FlinkOptions.QUERY_TYPE_INCREMENTAL);
        throw new HoodieException(errMsg);
    }
  }
@@ -408,56 +379,62 @@ public class HoodieTableSource implements
    final RowType requiredRowType = (RowType) getProducedDataType().notNull().getLogicalType();
    final String queryType = this.conf.getString(FlinkOptions.QUERY_TYPE);
    org.apache.flink.core.fs.Path[] paths = new org.apache.flink.core.fs.Path[0];
    if (FlinkOptions.QUERY_TYPE_SNAPSHOT.equals(queryType)) {
      final HoodieTableType tableType = HoodieTableType.valueOf(this.conf.getString(FlinkOptions.TABLE_TYPE));
-      switch (tableType) {
+      boolean emitDelete = tableType == HoodieTableType.MERGE_ON_READ;
-        case MERGE_ON_READ:
+      return mergeOnReadInputFormat(rowType, requiredRowType, tableAvroSchema,
-          final MergeOnReadTableState hoodieTableState = new MergeOnReadTableState(
+          rowDataType, Collections.emptyList(), emitDelete);
              rowType,
              requiredRowType,
              tableAvroSchema.toString(),
              AvroSchemaConverter.convertToSchema(requiredRowType).toString(),
              Collections.emptyList(),
              conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(","));
          return MergeOnReadInputFormat.builder()
              .config(this.conf)
              .paths(paths)
              .tableState(hoodieTableState)
              // use the explicit fields data type because the AvroSchemaConverter
              // is not very stable.
              .fieldTypes(rowDataType.getChildren())
              .defaultPartName(conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME))
              .limit(this.limit)
              .emitDelete(true)
              .build();
        case COPY_ON_WRITE:
          final MergeOnReadTableState hoodieTableState2 = new MergeOnReadTableState(
              rowType,
              requiredRowType,
              tableAvroSchema.toString(),
              AvroSchemaConverter.convertToSchema(requiredRowType).toString(),
              Collections.emptyList(),
              conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(","));
          return MergeOnReadInputFormat.builder()
              .config(this.conf)
              .paths(paths)
              .tableState(hoodieTableState2)
              // use the explicit fields data type because the AvroSchemaConverter
              // is not very stable.
              .fieldTypes(rowDataType.getChildren())
              .defaultPartName(conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME))
              .limit(this.limit)
              .build();
        default:
          throw new HoodieException("Unexpected table type: " + this.conf.getString(FlinkOptions.TABLE_TYPE));
      }
    }
    String errMsg = String.format("Invalid query type : '%s', options ['%s'] are supported now", queryType,
        FlinkOptions.QUERY_TYPE_SNAPSHOT);
    throw new HoodieException(errMsg);
  }
  private MergeOnReadInputFormat mergeOnReadInputFormat(
      RowType rowType,
      RowType requiredRowType,
      Schema tableAvroSchema,
      DataType rowDataType,
      List<MergeOnReadInputSplit> inputSplits,
      boolean emitDelete) {
    final MergeOnReadTableState hoodieTableState = new MergeOnReadTableState(
        rowType,
        requiredRowType,
        tableAvroSchema.toString(),
        AvroSchemaConverter.convertToSchema(requiredRowType).toString(),
        inputSplits,
        conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(","));
    return MergeOnReadInputFormat.builder()
        .config(this.conf)
        .tableState(hoodieTableState)
        // use the explicit fields' data type because the AvroSchemaConverter
        // is not very stable.
        .fieldTypes(rowDataType.getChildren())
        .defaultPartName(conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME))
        .limit(this.limit)
        .emitDelete(emitDelete)
        .build();
  }
  private InputFormat<RowData, ?> baseFileOnlyInputFormat() {
    final Path[] paths = getReadPaths();
    if (paths.length == 0) {
      return EMPTY_INPUT_FORMAT;
    }
    FileInputFormat<RowData> format = new CopyOnWriteInputFormat(
        FilePathUtils.toFlinkPaths(paths),
        this.schema.getColumnNames().toArray(new String[0]),
        this.schema.getColumnDataTypes().toArray(new DataType[0]),
        this.requiredPos,
        this.conf.getString(FlinkOptions.PARTITION_DEFAULT_NAME),
        this.limit == NO_LIMIT_CONSTANT ? Long.MAX_VALUE : this.limit, // ParquetInputFormat always uses the limit value
        getParquetConf(this.conf, this.hadoopConf),
        this.conf.getBoolean(FlinkOptions.UTC_TIMEZONE)
    );
    format.setFilesFilter(new LatestFileFilter(this.hadoopConf));
    return format;
  }
  private Schema inferSchemaFromDdl() {
    Schema schema = AvroSchemaConverter.convertToSchema(this.schema.toPhysicalRowDataType().getLogicalType());
    return HoodieAvroUtils.addMetadataFields(schema, conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED));
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
@@ -45,7 +45,6 @@ import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
 import org.apache.flink.api.common.io.RichInputFormat;
 import org.apache.flink.api.common.io.statistics.BaseStatistics;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.fs.Path;
 import org.apache.flink.core.io.InputSplitAssigner;
 import org.apache.flink.table.data.GenericRowData;
 import org.apache.flink.table.data.RowData;
@@ -85,8 +84,6 @@ public class MergeOnReadInputFormat
  private transient org.apache.hadoop.conf.Configuration hadoopConf;
  private Path[] paths;
  private final MergeOnReadTableState tableState;
  /**
@@ -134,14 +131,12 @@ public class MergeOnReadInputFormat
  private MergeOnReadInputFormat(
      Configuration conf,
      Path[] paths,
      MergeOnReadTableState tableState,
      List<DataType> fieldTypes,
      String defaultPartName,
      long limit,
      boolean emitDelete) {
    this.conf = conf;
    this.paths = paths;
    this.tableState = tableState;
    this.fieldNames = tableState.getRowType().getFieldNames();
    this.fieldTypes = fieldTypes;
@@ -165,7 +160,7 @@ public class MergeOnReadInputFormat
    this.currentReadCount = 0L;
    this.hadoopConf = StreamerUtil.getHadoopConf();
    if (!(split.getLogPaths().isPresent() && split.getLogPaths().get().size() > 0)) {
-      if (conf.getBoolean(FlinkOptions.READ_AS_STREAMING)) {
+      if (split.getInstantRange() != null) {
        // base file only with commit time filtering
        this.iterator = new BaseFileOnlyFilteringIterator(
            split.getInstantRange(),
@@ -212,16 +207,8 @@ public class MergeOnReadInputFormat
  @Override
  public void configure(Configuration configuration) {
-    if (this.paths.length == 0) {
+    // no operation
-      // file path was not specified yet. Try to set it from the parameters.
+    // may support nested files in the future.
      String filePath = configuration.getString(FlinkOptions.PATH, null);
      if (filePath == null) {
        throw new IllegalArgumentException("File path was not specified in input format or configuration.");
      } else {
        this.paths = new Path[] {new Path(filePath)};
      }
    }
    // may supports nested files in the future.
  }
  @Override
@@ -750,7 +737,6 @@ public class MergeOnReadInputFormat
   */
  public static class Builder {
    private Configuration conf;
    private Path[] paths;
    private MergeOnReadTableState tableState;
    private List<DataType> fieldTypes;
    private String defaultPartName;
@@ -762,11 +748,6 @@ public class MergeOnReadInputFormat
      return this;
    }
    public Builder paths(Path[] paths) {
      this.paths = paths;
      return this;
    }
    public Builder tableState(MergeOnReadTableState tableState) {
      this.tableState = tableState;
      return this;
@@ -793,8 +774,8 @@ public class MergeOnReadInputFormat
    }
    public MergeOnReadInputFormat build() {
-      return new MergeOnReadInputFormat(conf, paths, tableState,
+      return new MergeOnReadInputFormat(conf, tableState, fieldTypes,
-          fieldTypes, defaultPartName, limit, emitDelete);
+          defaultPartName, limit, emitDelete);
    }
  }
--- a/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java
@@ -88,4 +88,18 @@ public class TestFileIndex {
    assertThat(fileStatuses.length, is(1));
    assertTrue(fileStatuses[0].getPath().toString().endsWith(HoodieFileFormat.PARQUET.getFileExtension()));
  }
  @ParameterizedTest
  @ValueSource(booleans = {true, false})
  void testFileListingEmptyTable(boolean enableMetadata) {
    Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
    conf.setBoolean(FlinkOptions.METADATA_ENABLED, enableMetadata);
    FileIndex fileIndex = FileIndex.instance(new Path(tempFile.getAbsolutePath()), conf);
    List<String> partitionKeys = Collections.singletonList("partition");
    List<Map<String, String>> partitions = fileIndex.getPartitions(partitionKeys, "default", false);
    assertThat(partitions.size(), is(0));
    FileStatus[] fileStatuses = fileIndex.getFilesInPartitions();
    assertThat(fileStatuses.length, is(0));
  }
 }
--- a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadMonitoringFunction.java
@@ -144,7 +144,7 @@ public class TestStreamReadMonitoringFunction {
    TestData.writeData(TestData.DATA_SET_INSERT, conf);
    TestData.writeData(TestData.DATA_SET_UPDATE_INSERT, conf);
    String specifiedCommit = TestUtils.getLatestCommit(tempFile.getAbsolutePath());
-    conf.setString(FlinkOptions.READ_STREAMING_START_COMMIT, specifiedCommit);
+    conf.setString(FlinkOptions.READ_START_COMMIT, specifiedCommit);
    StreamReadMonitoringFunction function = TestUtils.getMonitorFunc(conf);
    try (AbstractStreamOperatorTestHarness<MergeOnReadInputSplit> harness = createHarness(function)) {
      harness.setup();
@@ -175,7 +175,7 @@ public class TestStreamReadMonitoringFunction {
    TestData.writeData(TestData.DATA_SET_INSERT, conf);
    TestData.writeData(TestData.DATA_SET_UPDATE_INSERT, conf);
    String specifiedCommit = TestUtils.getLatestCommit(tempFile.getAbsolutePath());
-    conf.setString(FlinkOptions.READ_STREAMING_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST);
+    conf.setString(FlinkOptions.READ_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST);
    StreamReadMonitoringFunction function = TestUtils.getMonitorFunc(conf);
    try (AbstractStreamOperatorTestHarness<MergeOnReadInputSplit> harness = createHarness(function)) {
      harness.setup();
--- a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
@@ -22,7 +22,6 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.table.format.FilePathUtils;
 import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
 import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
 import org.apache.hudi.table.format.mor.MergeOnReadTableState;
@@ -45,7 +44,6 @@ import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.DataType;
 import org.apache.flink.table.types.logical.RowType;
 import org.apache.hadoop.fs.Path;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -263,10 +261,8 @@ public class TestStreamReadOperator {
        AvroSchemaConverter.convertToSchema(TestConfigurations.ROW_TYPE).toString(),
        Collections.emptyList(),
        new String[0]);
    Path[] paths = FilePathUtils.getReadPaths(new Path(basePath), conf, hadoopConf, partitionKeys);
    MergeOnReadInputFormat inputFormat = MergeOnReadInputFormat.builder()
        .config(conf)
        .paths(FilePathUtils.toFlinkPaths(paths))
        .tableState(hoodieTableState)
        .fieldTypes(rowDataType.getChildren())
        .defaultPartName("default").limit(1000L)
--- a/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
@@ -113,7 +113,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
        .option(FlinkOptions.READ_AS_STREAMING, true)
        .option(FlinkOptions.TABLE_TYPE, tableType)
-        .option(FlinkOptions.READ_STREAMING_START_COMMIT, firstCommit)
+        .option(FlinkOptions.READ_START_COMMIT, firstCommit)
        .end();
    streamTableEnv.executeSql(hoodieTableDDL);
    List<Row> rows = execSelectSql(streamTableEnv, "select * from t1", 10);
@@ -186,7 +186,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
        .option(FlinkOptions.READ_AS_STREAMING, true)
        .option(FlinkOptions.TABLE_TYPE, tableType)
-        .option(FlinkOptions.READ_STREAMING_START_COMMIT, specifiedCommit)
+        .option(FlinkOptions.READ_START_COMMIT, specifiedCommit)
        .end();
    streamTableEnv.executeSql(createHoodieTable2);
    List<Row> rows = execSelectSql(streamTableEnv, "select * from t2", 10);
@@ -289,7 +289,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
        .option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
        .option(FlinkOptions.READ_AS_STREAMING, true)
        .option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, 2)
-        .option(FlinkOptions.READ_STREAMING_START_COMMIT, latestCommit)
+        .option(FlinkOptions.READ_START_COMMIT, latestCommit)
        .option(FlinkOptions.CHANGELOG_ENABLED, true)
        .end();
    streamTableEnv.executeSql(hoodieTableDDL);
@@ -343,7 +343,7 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
        .option(FlinkOptions.TABLE_TYPE, FlinkOptions.TABLE_TYPE_MERGE_ON_READ)
        .option(FlinkOptions.READ_AS_STREAMING, true)
-        .option(FlinkOptions.READ_STREAMING_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST)
+        .option(FlinkOptions.READ_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST)
        .option(FlinkOptions.READ_STREAMING_CHECK_INTERVAL, 2)
        // close the async compaction
        .option(FlinkOptions.COMPACTION_ASYNC_ENABLED, false)
@@ -879,6 +879,33 @@ public class HoodieDataSourceITCase extends AbstractTestBase {
    assertRowsEquals(result1, "[+I[1.23, 12345678.12, 12345.12, 123456789.123450000000000000]]");
  }
  @ParameterizedTest
  @EnumSource(value = HoodieTableType.class)
  void testIncrementalRead(HoodieTableType tableType) throws Exception {
    TableEnvironment tableEnv = batchTableEnv;
    Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
    conf.setString(FlinkOptions.TABLE_NAME, "t1");
    conf.setString(FlinkOptions.TABLE_TYPE, tableType.name());
    // write 3 batches of data set
    TestData.writeData(TestData.dataSetInsert(1, 2), conf);
    TestData.writeData(TestData.dataSetInsert(3, 4), conf);
    TestData.writeData(TestData.dataSetInsert(5, 6), conf);
    String latestCommit = TestUtils.getLatestCommit(tempFile.getAbsolutePath());
    String hoodieTableDDL = sql("t1")
        .option(FlinkOptions.PATH, tempFile.getAbsolutePath())
        .option(FlinkOptions.TABLE_TYPE, tableType)
        .option(FlinkOptions.READ_START_COMMIT, latestCommit)
        .end();
    tableEnv.executeSql(hoodieTableDDL);
    List<Row> result = CollectionUtil.iterableToList(
        () -> tableEnv.sqlQuery("select * from t1").execute().collect());
    assertRowsEquals(result, TestData.dataSetInsert(5, 6));
  }
  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
--- a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
@@ -231,6 +231,32 @@ public class TestHoodieTableFactory {
    assertThat(conf2.getInteger(FlinkOptions.ARCHIVE_MAX_COMMITS), is(45));
  }
  @Test
  void testSetupReadOptionsForSource() {
    // definition with simple primary key and partition path
    ResolvedSchema schema1 = SchemaBuilder.instance()
        .field("f0", DataTypes.INT().notNull())
        .field("f1", DataTypes.VARCHAR(20))
        .field("f2", DataTypes.TIMESTAMP(3))
        .field("ts", DataTypes.TIMESTAMP(3))
        .primaryKey("f0")
        .build();
    // set up new retains commits that is less than min archive commits
    this.conf.setString(FlinkOptions.READ_END_COMMIT, "123");
    final MockContext sourceContext1 = MockContext.getInstance(this.conf, schema1, "f2");
    final HoodieTableSource tableSource1 = (HoodieTableSource) new HoodieTableFactory().createDynamicTableSource(sourceContext1);
    final Configuration conf1 = tableSource1.getConf();
    assertThat(conf1.getString(FlinkOptions.QUERY_TYPE), is(FlinkOptions.QUERY_TYPE_INCREMENTAL));
    this.conf.removeConfig(FlinkOptions.READ_END_COMMIT);
    this.conf.setString(FlinkOptions.READ_START_COMMIT, "123");
    final MockContext sourceContext2 = MockContext.getInstance(this.conf, schema1, "f2");
    final HoodieTableSource tableSource2 = (HoodieTableSource) new HoodieTableFactory().createDynamicTableSource(sourceContext2);
    final Configuration conf2 = tableSource2.getConf();
    assertThat(conf2.getString(FlinkOptions.QUERY_TYPE), is(FlinkOptions.QUERY_TYPE_INCREMENTAL));
  }
  @Test
  void testInferAvroSchemaForSink() {
    // infer the schema if not specified
--- a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java
@@ -19,7 +19,6 @@
 package org.apache.hudi.table;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
 import org.apache.hudi.utils.TestConfigurations;
 import org.apache.hudi.utils.TestData;
@@ -31,6 +30,7 @@ import org.apache.flink.configuration.Configuration;
 import org.apache.flink.table.data.RowData;
 import org.apache.hadoop.fs.Path;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.function.ThrowingSupplier;
 import org.junit.jupiter.api.io.TempDir;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,9 +46,9 @@ import java.util.stream.Collectors;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.core.Is.is;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 /**
 * Test cases for HoodieTableSource.
@@ -112,9 +112,9 @@ public class TestHoodieTableSource {
    inputFormat = tableSource.getInputFormat();
    assertThat(inputFormat, is(instanceOf(MergeOnReadInputFormat.class)));
    conf.setString(FlinkOptions.QUERY_TYPE.key(), FlinkOptions.QUERY_TYPE_INCREMENTAL);
-    assertThrows(HoodieException.class,
+    assertDoesNotThrow(
-        () -> tableSource.getInputFormat(),
+        (ThrowingSupplier<? extends InputFormat<RowData, ?>>) tableSource::getInputFormat,
-        "Invalid query type : 'incremental'. Only 'snapshot' is supported now");
+        "Query type: 'incremental' should be supported");
  }
  @Test
--- a/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/format/TestInputFormat.java
@@ -19,6 +19,8 @@
 package org.apache.hudi.table.format;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.table.HoodieTableSource;
 import org.apache.hudi.table.format.cow.CopyOnWriteInputFormat;
@@ -44,6 +46,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.CoreMatchers.is;
@@ -71,12 +74,7 @@ public class TestInputFormat {
    options.forEach((key, value) -> conf.setString(key, value));
    StreamerUtil.initTableIfNotExists(conf);
-    this.tableSource = new HoodieTableSource(
+    this.tableSource = getTableSource(conf);
        TestConfigurations.TABLE_SCHEMA,
        new Path(tempFile.getAbsolutePath()),
        Collections.singletonList("partition"),
        "default",
        conf);
  }
  @ParameterizedTest
@@ -385,10 +383,81 @@ public class TestInputFormat {
    assertThat(actual, is(expected));
  }
  @ParameterizedTest
  @EnumSource(value = HoodieTableType.class)
  void testReadIncrementally(HoodieTableType tableType) throws Exception {
    Map<String, String> options = new HashMap<>();
    options.put(FlinkOptions.QUERY_TYPE.key(), FlinkOptions.QUERY_TYPE_INCREMENTAL);
    beforeEach(tableType, options);
    // write another commit to read again
    for (int i = 0; i < 6; i += 2) {
      List<RowData> dataset = TestData.dataSetInsert(i + 1, i + 2);
      TestData.writeData(dataset, conf);
    }
    HoodieTableMetaClient metaClient = StreamerUtil.createMetaClient(tempFile.getAbsolutePath());
    List<String> commits = metaClient.getCommitsTimeline().filterCompletedInstants().getInstants()
        .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
    assertThat(commits.size(), is(3));
    // only the start commit
    conf.setString(FlinkOptions.READ_START_COMMIT, commits.get(1));
    this.tableSource = getTableSource(conf);
    InputFormat<RowData, ?> inputFormat1 = this.tableSource.getInputFormat();
    assertThat(inputFormat1, instanceOf(MergeOnReadInputFormat.class));
    List<RowData> actual1 = readData(inputFormat1);
    final List<RowData> expected1 = TestData.dataSetInsert(3, 4, 5, 6);
    TestData.assertRowDataEquals(actual1, expected1);
    // only the start commit: earliest
    conf.setString(FlinkOptions.READ_START_COMMIT, FlinkOptions.START_COMMIT_EARLIEST);
    this.tableSource = getTableSource(conf);
    InputFormat<RowData, ?> inputFormat2 = this.tableSource.getInputFormat();
    assertThat(inputFormat2, instanceOf(MergeOnReadInputFormat.class));
    List<RowData> actual2 = readData(inputFormat2);
    final List<RowData> expected2 = TestData.dataSetInsert(1, 2, 3, 4, 5, 6);
    TestData.assertRowDataEquals(actual2, expected2);
    // start and end commit: [start commit, end commit]
    conf.setString(FlinkOptions.READ_START_COMMIT, commits.get(0));
    conf.setString(FlinkOptions.READ_END_COMMIT, commits.get(1));
    this.tableSource = getTableSource(conf);
    InputFormat<RowData, ?> inputFormat3 = this.tableSource.getInputFormat();
    assertThat(inputFormat3, instanceOf(MergeOnReadInputFormat.class));
    List<RowData> actual3 = readData(inputFormat3);
    final List<RowData> expected3 = TestData.dataSetInsert(1, 2, 3, 4);
    TestData.assertRowDataEquals(actual3, expected3);
    // only the end commit: point in time query
    conf.removeConfig(FlinkOptions.READ_START_COMMIT);
    conf.setString(FlinkOptions.READ_END_COMMIT, commits.get(1));
    this.tableSource = getTableSource(conf);
    InputFormat<RowData, ?> inputFormat4 = this.tableSource.getInputFormat();
    assertThat(inputFormat4, instanceOf(MergeOnReadInputFormat.class));
    List<RowData> actual4 = readData(inputFormat4);
    final List<RowData> expected4 = TestData.dataSetInsert(3, 4);
    TestData.assertRowDataEquals(actual4, expected4);
  }
  // -------------------------------------------------------------------------
  //  Utilities
  // -------------------------------------------------------------------------
  private HoodieTableSource getTableSource(Configuration conf) {
    return new HoodieTableSource(
        TestConfigurations.TABLE_SCHEMA,
        new Path(tempFile.getAbsolutePath()),
        Collections.singletonList("partition"),
        "default",
        conf);
  }
  @SuppressWarnings("unchecked, rawtypes")
  private static List<RowData> readData(InputFormat inputFormat) throws IOException {
    InputSplit[] inputSplits = inputFormat.createInputSplits(1);
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestData.java
@@ -284,6 +284,14 @@ public class TestData {
          TimestampData.fromEpochMillis(2), StringData.fromString("par1"))
  );
  public static List<RowData> dataSetInsert(int... ids) {
    List<RowData> inserts = new ArrayList<>();
    Arrays.stream(ids).forEach(i -> inserts.add(
        insertRow(StringData.fromString("id" + i), StringData.fromString("Danny"), 23,
            TimestampData.fromEpochMillis(i), StringData.fromString("par1"))));
    return inserts;
  }
  private static Integer toIdSafely(Object id) {
    if (id == null) {
      return -1;
@@ -424,7 +432,7 @@ public class TestData {
   */
  public static void assertRowDataEquals(List<RowData> rows, List<RowData> expected) {
    String rowsString = rowDataToString(rows);
-    assertThat(rowDataToString(expected), is(rowsString));
+    assertThat(rowsString, is(rowDataToString(expected)));
  }
  /**
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestUtils.java
@@ -28,7 +28,6 @@ import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.fs.Path;
 import java.io.File;
 import java.util.Collections;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -58,6 +57,6 @@ public class TestUtils {
  public static StreamReadMonitoringFunction getMonitorFunc(Configuration conf) {
    final String basePath = conf.getString(FlinkOptions.PATH);
-    return new StreamReadMonitoringFunction(conf, new Path(basePath), 1024 * 1024L, Collections.emptySet());
+    return new StreamReadMonitoringFunction(conf, new Path(basePath), 1024 * 1024L, null);
  }
 }