[HUDI-296] Explore use of spotless to auto fix formatting errors (#945)

- Add spotless format fixing to project - One time reformatting for conformity - Build fails for formatting changes and mvn spotless:apply autofixes them
2019-10-10 20:19:40 +08:00
parent 834c591955
commit b19bed442d
381 changed files with 7350 additions and 9064 deletions
--- a/docker/hoodie/hadoop/base/pom.xml
+++ b/docker/hoodie/hadoop/base/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
--- a/docker/hoodie/hadoop/datanode/pom.xml
+++ b/docker/hoodie/hadoop/datanode/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/historyserver/pom.xml
+++ b/docker/hoodie/hadoop/historyserver/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/hive_base/pom.xml
+++ b/docker/hoodie/hadoop/hive_base/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/namenode/pom.xml
+++ b/docker/hoodie/hadoop/namenode/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/pom.xml
+++ b/docker/hoodie/hadoop/pom.xml
@@ -56,6 +56,7 @@
    <docker.presto.version>0.217</docker.presto.version>
    <dockerfile.maven.version>1.4.3</dockerfile.maven.version>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.basedir}</main.basedir>
  </properties>
  <build>
--- a/docker/hoodie/hadoop/prestobase/pom.xml
+++ b/docker/hoodie/hadoop/prestobase/pom.xml
@@ -32,6 +32,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/spark_base/pom.xml
+++ b/docker/hoodie/hadoop/spark_base/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/sparkadhoc/pom.xml
+++ b/docker/hoodie/hadoop/sparkadhoc/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/sparkmaster/pom.xml
+++ b/docker/hoodie/hadoop/sparkmaster/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/docker/hoodie/hadoop/sparkworker/pom.xml
+++ b/docker/hoodie/hadoop/sparkworker/pom.xml
@@ -30,6 +30,7 @@
  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <checkstyle.skip>true</checkstyle.skip>
    <main.basedir>${project.parent.parent.basedir}</main.basedir>
  </properties>
  <dependencyManagement>
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -29,6 +29,7 @@
  <properties>
    <spring.shell.version>1.2.0.RELEASE</spring.shell.version>
    <jar.mainclass>org.springframework.shell.Bootstrap</jar.mainclass>
    <main.basedir>${project.parent.basedir}</main.basedir>
  </properties>
  <repositories>
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/HoodiePrintHelper.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/HoodiePrintHelper.java
@@ -52,19 +52,16 @@ public class HoodiePrintHelper {
   * @param rows List of rows
   * @return Serialized form for printing
   */
-  public static String print(TableHeader rowHeader,
+  public static String print(TableHeader rowHeader, Map<String, Function<Object, String>> fieldNameToConverterMap,
-      Map<String, Function<Object, String>> fieldNameToConverterMap,
+      String sortByField, boolean isDescending, Integer limit, boolean headerOnly, List<Comparable[]> rows) {
      String sortByField, boolean isDescending, Integer limit, boolean headerOnly,
      List<Comparable[]> rows) {
    if (headerOnly) {
      return HoodiePrintHelper.print(rowHeader);
    }
-    Table table = new Table(rowHeader, fieldNameToConverterMap,
+    Table table =
-        Option.ofNullable(sortByField.isEmpty() ? null : sortByField),
+        new Table(rowHeader, fieldNameToConverterMap, Option.ofNullable(sortByField.isEmpty() ? null : sortByField),
-        Option.ofNullable(isDescending),
+            Option.ofNullable(isDescending), Option.ofNullable(limit <= 0 ? null : limit)).addAllRows(rows).flip();
        Option.ofNullable(limit <= 0 ? null : limit)).addAllRows(rows).flip();
    return HoodiePrintHelper.print(table);
  }
@@ -79,9 +76,8 @@ public class HoodiePrintHelper {
    String[] header = new String[buffer.getFieldNames().size()];
    buffer.getFieldNames().toArray(header);
-    String[][] rows = buffer.getRenderRows().stream()
+    String[][] rows =
-        .map(l -> l.stream().toArray(String[]::new))
+        buffer.getRenderRows().stream().map(l -> l.stream().toArray(String[]::new)).toArray(String[][]::new);
        .toArray(String[][]::new);
    return printTextTable(header, rows);
  }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/Table.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/Table.java
@@ -31,8 +31,7 @@ import java.util.stream.IntStream;
 import org.apache.hudi.common.util.Option;
 /**
- * Table to be rendered. This class takes care of ordering
+ * Table to be rendered. This class takes care of ordering rows and limiting before renderer renders it.
 * rows and limiting before renderer renders it.
 */
 public class Table implements Iterable<List<String>> {
@@ -53,11 +52,8 @@ public class Table implements Iterable<List<String>> {
  // Rows ready for Rendering
  private List<List<String>> renderRows;
-  public Table(TableHeader rowHeader,
+  public Table(TableHeader rowHeader, Map<String, Function<Object, String>> fieldNameToConverterMap,
-      Map<String, Function<Object, String>> fieldNameToConverterMap,
+      Option<String> orderingFieldNameOptional, Option<Boolean> isDescendingOptional, Option<Integer> limitOptional) {
      Option<String> orderingFieldNameOptional,
      Option<Boolean> isDescendingOptional,
      Option<Integer> limitOptional) {
    this.rowHeader = rowHeader;
    this.fieldNameToConverterMap = fieldNameToConverterMap;
    this.orderingFieldNameOptional = orderingFieldNameOptional;
@@ -68,6 +64,7 @@ public class Table implements Iterable<List<String>> {
  /**
   * Main API to add row to the table
   * 
   * @param row Row
   */
  public Table add(List<Comparable> row) {
@@ -86,6 +83,7 @@ public class Table implements Iterable<List<String>> {
  /**
   * Add all rows
   * 
   * @param rows Rows to be aded
   * @return
   */
@@ -96,6 +94,7 @@ public class Table implements Iterable<List<String>> {
  /**
   * Add all rows
   * 
   * @param rows Rows to be added
   * @return
   */
@@ -115,6 +114,7 @@ public class Table implements Iterable<List<String>> {
  /**
   * Sorting of rows by a specified field
   * 
   * @return
   */
  private List<List<Comparable>> orderRows() {
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
@@ -59,8 +59,8 @@ public class ArchivedCommitsCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    System.out.println("===============> Showing only " + limit + " archived commits <===============");
    String basePath = HoodieCLI.tableMetadata.getBasePath();
@@ -86,9 +86,8 @@ public class ArchivedCommitsCommand implements CommandMarker {
          .filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION)
              || r.get("actionType").toString().equals(HoodieTimeline.DELTA_COMMIT_ACTION))
          .flatMap(r -> {
-            HoodieCommitMetadata metadata =
+            HoodieCommitMetadata metadata = (HoodieCommitMetadata) SpecificData.get()
-                (HoodieCommitMetadata) SpecificData.get().deepCopy(HoodieCommitMetadata.SCHEMA$,
+                .deepCopy(HoodieCommitMetadata.SCHEMA$, r.get("hoodieCommitMetadata"));
                    r.get("hoodieCommitMetadata"));
            final String instantTime = r.get("commitTime").toString();
            final String action = r.get("actionType").toString();
            return metadata.getPartitionToWriteStats().values().stream().flatMap(hoodieWriteStats -> {
@@ -118,22 +117,13 @@ public class ArchivedCommitsCommand implements CommandMarker {
      allStats.addAll(readCommits);
      reader.close();
    }
-    TableHeader header = new TableHeader().addTableHeaderField("action")
+    TableHeader header = new TableHeader().addTableHeaderField("action").addTableHeaderField("instant")
-        .addTableHeaderField("instant")
+        .addTableHeaderField("partition").addTableHeaderField("file_id").addTableHeaderField("prev_instant")
-        .addTableHeaderField("partition")
+        .addTableHeaderField("num_writes").addTableHeaderField("num_inserts").addTableHeaderField("num_deletes")
-        .addTableHeaderField("file_id")
+        .addTableHeaderField("num_update_writes").addTableHeaderField("total_log_files")
-        .addTableHeaderField("prev_instant")
+        .addTableHeaderField("total_log_blocks").addTableHeaderField("total_corrupt_log_blocks")
-        .addTableHeaderField("num_writes")
+        .addTableHeaderField("total_rollback_blocks").addTableHeaderField("total_log_records")
-        .addTableHeaderField("num_inserts")
+        .addTableHeaderField("total_updated_records_compacted").addTableHeaderField("total_write_bytes")
        .addTableHeaderField("num_deletes")
        .addTableHeaderField("num_update_writes")
        .addTableHeaderField("total_log_files")
        .addTableHeaderField("total_log_blocks")
        .addTableHeaderField("total_corrupt_log_blocks")
        .addTableHeaderField("total_rollback_blocks")
        .addTableHeaderField("total_log_records")
        .addTableHeaderField("total_updated_records_compacted")
        .addTableHeaderField("total_write_bytes")
        .addTableHeaderField("total_write_errors");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, allStats);
@@ -141,19 +131,19 @@ public class ArchivedCommitsCommand implements CommandMarker {
  @CliCommand(value = "show archived commits", help = "Read commits from archived files and show details")
  public String showCommits(
-      @CliOption(key = {"skipMetadata"}, help = "Skip displaying commit metadata", unspecifiedDefaultValue = "true")
+      @CliOption(key = {"skipMetadata"}, help = "Skip displaying commit metadata",
-      boolean skipMetadata,
+          unspecifiedDefaultValue = "true") boolean skipMetadata,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    System.out.println("===============> Showing only " + limit + " archived commits <===============");
    String basePath = HoodieCLI.tableMetadata.getBasePath();
-    FileStatus[] fsStatuses = FSUtils.getFs(basePath, HoodieCLI.conf)
+    FileStatus[] fsStatuses =
-        .globStatus(new Path(basePath + "/.hoodie/.commits_.archive*"));
+        FSUtils.getFs(basePath, HoodieCLI.conf).globStatus(new Path(basePath + "/.hoodie/.commits_.archive*"));
    List<Comparable[]> allCommits = new ArrayList<>();
    for (FileStatus fs : fsStatuses) {
      // read the archived file
@@ -167,15 +157,13 @@ public class ArchivedCommitsCommand implements CommandMarker {
        List<IndexedRecord> records = blk.getRecords();
        readRecords.addAll(records);
      }
-      List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r).map(r ->
+      List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
-          readCommit(r, skipMetadata))
+          .map(r -> readCommit(r, skipMetadata)).collect(Collectors.toList());
          .collect(Collectors.toList());
      allCommits.addAll(readCommits);
      reader.close();
    }
-    TableHeader header = new TableHeader().addTableHeaderField("CommitTime")
+    TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("CommitType");
        .addTableHeaderField("CommitType");
    if (!skipMetadata) {
      header = header.addTableHeaderField("CommitDetails");
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CleansCommand.java
@@ -63,8 +63,8 @@ public class CleansCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
@@ -74,17 +74,15 @@ public class CleansCommand implements CommandMarker {
    Collections.reverse(cleans);
    for (int i = 0; i < cleans.size(); i++) {
      HoodieInstant clean = cleans.get(i);
-      HoodieCleanMetadata cleanMetadata = AvroUtils
+      HoodieCleanMetadata cleanMetadata =
-          .deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
+          AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
      rows.add(new Comparable[] {clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
          cleanMetadata.getTotalFilesDeleted(), cleanMetadata.getTimeTakenInMillis()});
    }
-    TableHeader header = new TableHeader()
+    TableHeader header =
-        .addTableHeaderField("CleanTime")
+        new TableHeader().addTableHeaderField("CleanTime").addTableHeaderField("EarliestCommandRetained")
-        .addTableHeaderField("EarliestCommandRetained")
+            .addTableHeaderField("Total Files Deleted").addTableHeaderField("Total Time Taken");
        .addTableHeaderField("Total Files Deleted")
        .addTableHeaderField("Total Time Taken");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
@@ -95,13 +93,12 @@ public class CleansCommand implements CommandMarker {
  }
  @CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
-  public String showCleanPartitions(
+  public String showCleanPartitions(@CliOption(key = {"clean"}, help = "clean to show") final String commitTime,
      @CliOption(key = {"clean"}, help = "clean to show") final String commitTime,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
@@ -112,8 +109,8 @@ public class CleansCommand implements CommandMarker {
      return "Clean " + commitTime + " not found in metadata " + timeline;
    }
-    HoodieCleanMetadata cleanMetadata = AvroUtils.deserializeHoodieCleanMetadata(
+    HoodieCleanMetadata cleanMetadata =
-        timeline.getInstantDetails(cleanInstant).get());
+        AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
    List<Comparable[]> rows = new ArrayList<>();
    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
      String path = entry.getKey();
@@ -124,11 +121,8 @@ public class CleansCommand implements CommandMarker {
      rows.add(new Comparable[] {path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
    }
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition Path").addTableHeaderField("Cleaning policy")
-        .addTableHeaderField("Partition Path")
+        .addTableHeaderField("Total Files Successfully Deleted").addTableHeaderField("Total Failed Deletions");
        .addTableHeaderField("Cleaning policy")
        .addTableHeaderField("Total Files Successfully Deleted")
        .addTableHeaderField("Total Failed Deletions");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
@@ -69,12 +69,13 @@ public class CommitsCommand implements CommandMarker {
  }
  @CliCommand(value = "commits show", help = "Show the commits")
-  public String showCommits(@CliOption(key = {
+  public String showCommits(
-      "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
+      @CliOption(key = {"limit"}, mandatory = false, help = "Limit commits",
          unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
@@ -84,16 +85,12 @@ public class CommitsCommand implements CommandMarker {
    Collections.reverse(commits);
    for (int i = 0; i < commits.size(); i++) {
      HoodieInstant commit = commits.get(i);
-      HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get(),
+      HoodieCommitMetadata commitMetadata =
-          HoodieCommitMetadata.class);
+          HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class);
-      rows.add(new Comparable[]{commit.getTimestamp(),
+      rows.add(new Comparable[] {commit.getTimestamp(), commitMetadata.fetchTotalBytesWritten(),
-          commitMetadata.fetchTotalBytesWritten(),
+          commitMetadata.fetchTotalFilesInsert(), commitMetadata.fetchTotalFilesUpdated(),
-          commitMetadata.fetchTotalFilesInsert(),
+          commitMetadata.fetchTotalPartitionsWritten(), commitMetadata.fetchTotalRecordsWritten(),
-          commitMetadata.fetchTotalFilesUpdated(),
+          commitMetadata.fetchTotalUpdateRecordsWritten(), commitMetadata.fetchTotalWriteErrors()});
          commitMetadata.fetchTotalPartitionsWritten(),
          commitMetadata.fetchTotalRecordsWritten(),
          commitMetadata.fetchTotalUpdateRecordsWritten(),
          commitMetadata.fetchTotalWriteErrors()});
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
@@ -101,15 +98,10 @@ public class CommitsCommand implements CommandMarker {
      return NumericUtils.humanReadableByteCount((Double.valueOf(entry.toString())));
    });
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Total Bytes Written")
-        .addTableHeaderField("CommitTime")
+        .addTableHeaderField("Total Files Added").addTableHeaderField("Total Files Updated")
-        .addTableHeaderField("Total Bytes Written")
+        .addTableHeaderField("Total Partitions Written").addTableHeaderField("Total Records Written")
-        .addTableHeaderField("Total Files Added")
+        .addTableHeaderField("Total Update Records Written").addTableHeaderField("Total Errors");
        .addTableHeaderField("Total Files Updated")
        .addTableHeaderField("Total Partitions Written")
        .addTableHeaderField("Total Records Written")
        .addTableHeaderField("Total Update Records Written")
        .addTableHeaderField("Total Errors");
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
  }
@@ -132,8 +124,8 @@ public class CommitsCommand implements CommandMarker {
    }
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-    sparkLauncher
+    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime,
-        .addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath());
+        HoodieCLI.tableMetadata.getBasePath());
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
@@ -146,13 +138,12 @@ public class CommitsCommand implements CommandMarker {
  }
  @CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
-  public String showCommitPartitions(
+  public String showCommitPartitions(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
      @CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
@@ -185,8 +176,7 @@ public class CommitsCommand implements CommandMarker {
        totalBytesWritten += stat.getTotalWriteBytes();
        totalWriteErrors += stat.getTotalWriteErrors();
      }
-      rows.add(new Comparable[]{path, totalFilesAdded, totalFilesUpdated,
+      rows.add(new Comparable[] {path, totalFilesAdded, totalFilesUpdated, totalRecordsInserted, totalRecordsUpdated,
          totalRecordsInserted, totalRecordsUpdated,
          totalBytesWritten, totalWriteErrors});
    }
@@ -195,26 +185,21 @@ public class CommitsCommand implements CommandMarker {
      return NumericUtils.humanReadableByteCount((Long.valueOf(entry.toString())));
    });
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition Path")
-        .addTableHeaderField("Partition Path")
+        .addTableHeaderField("Total Files Added").addTableHeaderField("Total Files Updated")
-        .addTableHeaderField("Total Files Added")
+        .addTableHeaderField("Total Records Inserted").addTableHeaderField("Total Records Updated")
-        .addTableHeaderField("Total Files Updated")
+        .addTableHeaderField("Total Bytes Written").addTableHeaderField("Total Errors");
        .addTableHeaderField("Total Records Inserted")
        .addTableHeaderField("Total Records Updated")
        .addTableHeaderField("Total Bytes Written")
        .addTableHeaderField("Total Errors");
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
  }
  @CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
-  public String showCommitFiles(
+  public String showCommitFiles(@CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
      @CliOption(key = {"commit"}, help = "Commit to show") final String commitTime,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
@@ -232,22 +217,14 @@ public class CommitsCommand implements CommandMarker {
      List<HoodieWriteStat> stats = entry.getValue();
      for (HoodieWriteStat stat : stats) {
        rows.add(new Comparable[] {path, stat.getFileId(), stat.getPrevCommit(), stat.getNumUpdateWrites(),
-            stat.getNumWrites(), stat.getTotalWriteBytes(),
+            stat.getNumWrites(), stat.getTotalWriteBytes(), stat.getTotalWriteErrors(), stat.getFileSizeInBytes()});
            stat.getTotalWriteErrors(),
            stat.getFileSizeInBytes()
        });
      }
    }
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition Path").addTableHeaderField("File ID")
-        .addTableHeaderField("Partition Path")
+        .addTableHeaderField("Previous Commit").addTableHeaderField("Total Records Updated")
-        .addTableHeaderField("File ID")
+        .addTableHeaderField("Total Records Written").addTableHeaderField("Total Bytes Written")
-        .addTableHeaderField("Previous Commit")
+        .addTableHeaderField("Total Errors").addTableHeaderField("File Size");
        .addTableHeaderField("Total Records Updated")
        .addTableHeaderField("Total Records Written")
        .addTableHeaderField("Total Bytes Written")
        .addTableHeaderField("Total Errors")
        .addTableHeaderField("File Size");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
@@ -270,8 +247,8 @@ public class CommitsCommand implements CommandMarker {
    String sourceLatestCommit =
        sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
-    if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
+    if (sourceLatestCommit != null
-        HoodieTimeline.GREATER)) {
+        && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
      // source is behind the target
      List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
          .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
@@ -75,16 +75,15 @@ public class CompactionCommand implements CommandMarker {
  @CliCommand(value = "compactions show all", help = "Shows all compactions that are in active timeline")
  public String compactionsAll(
-      @CliOption(key = {
+      @CliOption(key = {"includeExtraMetadata"}, help = "Include extra metadata",
-          "includeExtraMetadata"}, help = "Include extra metadata", unspecifiedDefaultValue = "false") final
+          unspecifiedDefaultValue = "false") final boolean includeExtraMetadata,
-      boolean includeExtraMetadata,
+      @CliOption(key = {"limit"}, mandatory = false, help = "Limit commits",
-      @CliOption(key = {
+          unspecifiedDefaultValue = "-1") final Integer limit,
          "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
-      boolean headerOnly) throws IOException {
+      throws IOException {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
    HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionTimeline();
    HoodieTimeline commitTimeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
@@ -99,15 +98,14 @@ public class CompactionCommand implements CommandMarker {
      if (!instant.getAction().equals(HoodieTimeline.COMPACTION_ACTION)) {
        try {
          // This could be a completed compaction. Assume a compaction request file is present but skip if fails
-          workload = AvroUtils.deserializeCompactionPlan(
+          workload = AvroUtils.deserializeCompactionPlan(activeTimeline
-              activeTimeline.getInstantAuxiliaryDetails(
+              .getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get());
                  HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get());
        } catch (HoodieIOException ioe) {
          // SKIP
        }
      } else {
-        workload = AvroUtils.deserializeCompactionPlan(activeTimeline.getInstantAuxiliaryDetails(
+        workload = AvroUtils.deserializeCompactionPlan(activeTimeline
-            HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get());
+            .getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get());
      }
      if (null != workload) {
@@ -116,22 +114,18 @@ public class CompactionCommand implements CommandMarker {
          state = State.COMPLETED;
        }
        if (includeExtraMetadata) {
-          rows.add(new Comparable[]{instant.getTimestamp(),
+          rows.add(new Comparable[] {instant.getTimestamp(), state.toString(),
              state.toString(),
              workload.getOperations() == null ? 0 : workload.getOperations().size(),
              workload.getExtraMetadata().toString()});
        } else {
-          rows.add(new Comparable[]{instant.getTimestamp(),
+          rows.add(new Comparable[] {instant.getTimestamp(), state.toString(),
              state.toString(),
              workload.getOperations() == null ? 0 : workload.getOperations().size()});
        }
      }
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Compaction Instant Time").addTableHeaderField("State")
        .addTableHeaderField("Compaction Instant Time")
        .addTableHeaderField("State")
        .addTableHeaderField("Total FileIds to be Compacted");
    if (includeExtraMetadata) {
      header = header.addTableHeaderField("Extra Metadata");
@@ -141,48 +135,37 @@ public class CompactionCommand implements CommandMarker {
  @CliCommand(value = "compaction show", help = "Shows compaction details for a specific compaction instant")
  public String compactionShow(
-      @CliOption(key = "instant", mandatory = true, help = "Base path for the target hoodie dataset") final
+      @CliOption(key = "instant", mandatory = true,
-      String compactionInstantTime,
+          help = "Base path for the target hoodie dataset") final String compactionInstantTime,
-      @CliOption(key = {
+      @CliOption(key = {"limit"}, mandatory = false, help = "Limit commits",
-          "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
+          unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws Exception {
    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-    HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan(
+    HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan(activeTimeline
-        activeTimeline.getInstantAuxiliaryDetails(
+        .getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
            HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
    List<Comparable[]> rows = new ArrayList<>();
    if ((null != workload) && (null != workload.getOperations())) {
      for (HoodieCompactionOperation op : workload.getOperations()) {
-        rows.add(new Comparable[]{op.getPartitionPath(),
+        rows.add(new Comparable[] {op.getPartitionPath(), op.getFileId(), op.getBaseInstantTime(), op.getDataFilePath(),
-            op.getFileId(),
+            op.getDeltaFilePaths().size(), op.getMetrics() == null ? "" : op.getMetrics().toString()});
            op.getBaseInstantTime(),
            op.getDataFilePath(),
            op.getDeltaFilePaths().size(),
            op.getMetrics() == null ? "" : op.getMetrics().toString()
        });
      }
    }
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition Path").addTableHeaderField("File Id")
-        .addTableHeaderField("Partition Path")
+        .addTableHeaderField("Base Instant").addTableHeaderField("Data File Path")
-        .addTableHeaderField("File Id")
+        .addTableHeaderField("Total Delta Files").addTableHeaderField("getMetrics");
        .addTableHeaderField("Base Instant")
        .addTableHeaderField("Data File Path")
        .addTableHeaderField("Total Delta Files")
        .addTableHeaderField("getMetrics");
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
  }
  @CliCommand(value = "compaction schedule", help = "Schedule Compaction")
-  public String scheduleCompact(
+  public String scheduleCompact(@CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G",
-      @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G", help = "Spark executor memory")
+      help = "Spark executor memory") final String sparkMemory) throws Exception {
      final String sparkMemory) throws Exception {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
@@ -190,8 +173,8 @@ public class CompactionCommand implements CommandMarker {
    String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime();
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
-      String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+      String sparkPropertiesPath =
-          scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+          Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
      SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
      sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE.toString(), HoodieCLI.tableMetadata.getBasePath(),
          HoodieCLI.tableMetadata.getTableConfig().getTableName(), compactionInstantTime, sparkMemory);
@@ -209,33 +192,34 @@ public class CompactionCommand implements CommandMarker {
  @CliCommand(value = "compaction run", help = "Run Compaction for given instant time")
  public String compact(
-      @CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie compaction")
+      @CliOption(key = {"parallelism"}, mandatory = true,
-      final String parallelism,
+          help = "Parallelism for hoodie compaction") final String parallelism,
-      @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file")
+      @CliOption(key = "schemaFilePath", mandatory = true,
-      final String schemaFilePath,
+          help = "Path for Avro schema file") final String schemaFilePath,
-      @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G", help = "Spark executor memory")
+      @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
-      final String sparkMemory,
+          help = "Spark executor memory") final String sparkMemory,
-      @CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries")
+      @CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,
-      final String retry,
+      @CliOption(key = "compactionInstant", mandatory = false,
-      @CliOption(key = "compactionInstant", mandatory = false, help = "Base path for the target hoodie dataset")
+          help = "Base path for the target hoodie dataset") String compactionInstantTime)
-          String compactionInstantTime) throws Exception {
+      throws Exception {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
      if (null == compactionInstantTime) {
        // pick outstanding one with lowest timestamp
-        Option<String> firstPendingInstant = HoodieCLI.tableMetadata.reloadActiveTimeline()
+        Option<String> firstPendingInstant =
-            .filterCompletedAndCompactionInstants().filter(instant -> instant.getAction()
+            HoodieCLI.tableMetadata.reloadActiveTimeline().filterCompletedAndCompactionInstants()
-                .equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant().map(HoodieInstant::getTimestamp);
+                .filter(instant -> instant.getAction().equals(HoodieTimeline.COMPACTION_ACTION)).firstInstant()
                .map(HoodieInstant::getTimestamp);
        if (!firstPendingInstant.isPresent()) {
          return "NO PENDING COMPACTION TO RUN";
        }
        compactionInstantTime = firstPendingInstant.get();
      }
-      String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+      String sparkPropertiesPath =
-          scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+          Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
      SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
      sparkLauncher.addAppArgs(SparkCommand.COMPACT_RUN.toString(), HoodieCLI.tableMetadata.getBasePath(),
          HoodieCLI.tableMetadata.getTableConfig().getTableName(), compactionInstantTime, parallelism, schemaFilePath,
@@ -279,8 +263,8 @@ public class CompactionCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly)
+          unspecifiedDefaultValue = "false") boolean headerOnly)
      throws Exception {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
@@ -290,12 +274,11 @@ public class CompactionCommand implements CommandMarker {
    String output = null;
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
      try {
-        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+        String sparkPropertiesPath = Utils
-            scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+            .getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkCommand.COMPACT_VALIDATE.toString(),
+        sparkLauncher.addAppArgs(SparkCommand.COMPACT_VALIDATE.toString(), HoodieCLI.tableMetadata.getBasePath(),
-            HoodieCLI.tableMetadata.getBasePath(), compactionInstant, outputPathStr, parallelism, master,
+            compactionInstant, outputPathStr, parallelism, master, sparkMemory);
            sparkMemory);
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
@@ -307,8 +290,7 @@ public class CompactionCommand implements CommandMarker {
        String message = "\n\n\t COMPACTION PLAN " + (valid ? "VALID" : "INVALID") + "\n\n";
        List<Comparable[]> rows = new ArrayList<>();
        res.stream().forEach(r -> {
-          Comparable[] row = new Comparable[]{r.getOperation().getFileId(),
+          Comparable[] row = new Comparable[] {r.getOperation().getFileId(), r.getOperation().getBaseInstantTime(),
              r.getOperation().getBaseInstantTime(),
              r.getOperation().getDataFilePath().isPresent() ? r.getOperation().getDataFilePath().get() : "",
              r.getOperation().getDeltaFilePaths().size(), r.isSuccess(),
              r.getException().isPresent() ? r.getException().get().getMessage() : ""};
@@ -316,12 +298,8 @@ public class CompactionCommand implements CommandMarker {
        });
        Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
-        TableHeader header = new TableHeader()
+        TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Base Instant Time")
-            .addTableHeaderField("File Id")
+            .addTableHeaderField("Base Data File").addTableHeaderField("Num Delta Files").addTableHeaderField("Valid")
            .addTableHeaderField("Base Instant Time")
            .addTableHeaderField("Base Data File")
            .addTableHeaderField("Num Delta Files")
            .addTableHeaderField("Valid")
            .addTableHeaderField("Error");
        output = message + HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit,
@@ -349,8 +327,8 @@ public class CompactionCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly)
+          unspecifiedDefaultValue = "false") boolean headerOnly)
      throws Exception {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
@@ -360,12 +338,12 @@ public class CompactionCommand implements CommandMarker {
    String output = "";
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
      try {
-        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+        String sparkPropertiesPath = Utils
-            scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+            .getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_PLAN.toString(),
+        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_PLAN.toString(), HoodieCLI.tableMetadata.getBasePath(),
-            HoodieCLI.tableMetadata.getBasePath(), compactionInstant, outputPathStr, parallelism, master,
+            compactionInstant, outputPathStr, parallelism, master, sparkMemory, Boolean.valueOf(skipV).toString(),
-            sparkMemory, Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
+            Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
@@ -373,8 +351,8 @@ public class CompactionCommand implements CommandMarker {
          return "Failed to unschedule compaction for " + compactionInstant;
        }
        List<RenameOpResult> res = deSerializeOperationResult(outputPathStr, HoodieCLI.fs);
-        output = getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly,
+        output =
-            "unschedule pending compaction");
+            getRenamesToBePrinted(res, limit, sortByField, descending, headerOnly, "unschedule pending compaction");
      } finally {
        // Delete tmp file used to serialize result
        if (HoodieCLI.fs.exists(outputPath)) {
@@ -407,12 +385,12 @@ public class CompactionCommand implements CommandMarker {
    String output = "";
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
      try {
-        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+        String sparkPropertiesPath = Utils
-            scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+            .getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(),
+        sparkLauncher.addAppArgs(SparkCommand.COMPACT_UNSCHEDULE_FILE.toString(), HoodieCLI.tableMetadata.getBasePath(),
-            HoodieCLI.tableMetadata.getBasePath(), fileId, outputPathStr, "1", master,
+            fileId, outputPathStr, "1", master, sparkMemory, Boolean.valueOf(skipV).toString(),
-            sparkMemory, Boolean.valueOf(skipV).toString(), Boolean.valueOf(dryRun).toString());
+            Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
@@ -445,8 +423,8 @@ public class CompactionCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") boolean headerOnly)
+          unspecifiedDefaultValue = "false") boolean headerOnly)
      throws Exception {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
@@ -455,12 +433,11 @@ public class CompactionCommand implements CommandMarker {
    String output = "";
    if (HoodieCLI.tableMetadata.getTableType() == HoodieTableType.MERGE_ON_READ) {
      try {
-        String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+        String sparkPropertiesPath = Utils
-            scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+            .getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkCommand.COMPACT_REPAIR.toString(),
+        sparkLauncher.addAppArgs(SparkCommand.COMPACT_REPAIR.toString(), HoodieCLI.tableMetadata.getBasePath(),
-            HoodieCLI.tableMetadata.getBasePath(), compactionInstant, outputPathStr, parallelism, master,
+            compactionInstant, outputPathStr, parallelism, master, sparkMemory, Boolean.valueOf(dryRun).toString());
            sparkMemory, Boolean.valueOf(dryRun).toString());
        Process process = sparkLauncher.launch();
        InputStreamConsumer.captureOutput(process);
        int exitCode = process.waitFor();
@@ -481,41 +458,35 @@ public class CompactionCommand implements CommandMarker {
    }
  }
-  private String getRenamesToBePrinted(List<RenameOpResult> res, Integer limit,
+  private String getRenamesToBePrinted(List<RenameOpResult> res, Integer limit, String sortByField, boolean descending,
-      String sortByField, boolean descending, boolean headerOnly, String operation) {
+      boolean headerOnly, String operation) {
-    Option<Boolean> result = Option.fromJavaOptional(
+    Option<Boolean> result =
-        res.stream().map(r -> r.isExecuted() && r.isSuccess()).reduce(Boolean::logicalAnd));
+        Option.fromJavaOptional(res.stream().map(r -> r.isExecuted() && r.isSuccess()).reduce(Boolean::logicalAnd));
    if (result.isPresent()) {
      System.out.println("There were some file renames that needed to be done to " + operation);
      if (result.get()) {
        System.out.println("All renames successfully completed to " + operation + " done !!");
      } else {
-        System.out.println("Some renames failed. DataSet could be in inconsistent-state. "
+        System.out
-            + "Try running compaction repair");
+            .println("Some renames failed. DataSet could be in inconsistent-state. " + "Try running compaction repair");
      }
      List<Comparable[]> rows = new ArrayList<>();
      res.stream().forEach(r -> {
-        Comparable[] row = new Comparable[] {
+        Comparable[] row =
-            r.getOperation().fileId, r.getOperation().srcPath, r.getOperation().destPath,
+            new Comparable[] {r.getOperation().fileId, r.getOperation().srcPath, r.getOperation().destPath,
-            r.isExecuted(), r.isSuccess(), r.getException().isPresent() ? r.getException().get().getMessage() : ""
+                r.isExecuted(), r.isSuccess(), r.getException().isPresent() ? r.getException().get().getMessage() : ""};
        };
        rows.add(row);
      });
      Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
-      TableHeader header = new TableHeader()
+      TableHeader header = new TableHeader().addTableHeaderField("File Id").addTableHeaderField("Source File Path")
-          .addTableHeaderField("File Id")
+          .addTableHeaderField("Destination File Path").addTableHeaderField("Rename Executed?")
-          .addTableHeaderField("Source File Path")
+          .addTableHeaderField("Rename Succeeded?").addTableHeaderField("Error");
          .addTableHeaderField("Destination File Path")
          .addTableHeaderField("Rename Executed?")
          .addTableHeaderField("Rename Succeeded?")
          .addTableHeaderField("Error");
-      return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending,
+      return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
          limit, headerOnly, rows);
    } else {
      return "No File renames needed to " + operation + ". Operation successful.";
    }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DatasetsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/DatasetsCommand.java
@@ -52,13 +52,12 @@ public class DatasetsCommand implements CommandMarker {
      @CliOption(key = {"maxCheckIntervalMs"}, mandatory = false, unspecifiedDefaultValue = "300000",
          help = "Max wait time for eventual consistency") final Integer maxConsistencyIntervalMs,
      @CliOption(key = {"maxCheckIntervalMs"}, mandatory = false, unspecifiedDefaultValue = "7",
-          help = "Max checks for eventual consistency") final Integer maxConsistencyChecks) throws IOException {
+          help = "Max checks for eventual consistency") final Integer maxConsistencyChecks)
-    HoodieCLI.setConsistencyGuardConfig(
+      throws IOException {
-        ConsistencyGuardConfig.newBuilder()
+    HoodieCLI
-            .withConsistencyCheckEnabled(eventuallyConsistent)
+        .setConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(eventuallyConsistent)
            .withInitialConsistencyCheckIntervalMs(initialConsistencyIntervalMs)
-            .withMaxConsistencyCheckIntervalMs(maxConsistencyIntervalMs)
+            .withMaxConsistencyCheckIntervalMs(maxConsistencyIntervalMs).withMaxConsistencyChecks(maxConsistencyChecks)
            .withMaxConsistencyChecks(maxConsistencyChecks)
            .build());
    HoodieCLI.initConf();
    HoodieCLI.connectTo(path);
@@ -82,7 +81,8 @@ public class DatasetsCommand implements CommandMarker {
      @CliOption(key = {"tableType"}, unspecifiedDefaultValue = "COPY_ON_WRITE",
          help = "Hoodie Table Type. Must be one of : COPY_ON_WRITE or MERGE_ON_READ") final String tableTypeStr,
      @CliOption(key = {"payloadClass"}, unspecifiedDefaultValue = "org.apache.hudi.common.model.HoodieAvroPayload",
-          help = "Payload Class") final String payloadClass) throws IOException {
+          help = "Payload Class") final String payloadClass)
      throws IOException {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
@@ -117,9 +117,7 @@ public class DatasetsCommand implements CommandMarker {
   */
  @CliCommand(value = "desc", help = "Describle Hoodie Table properties")
  public String descTable() {
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Property").addTableHeaderField("Value");
        .addTableHeaderField("Property")
        .addTableHeaderField("Value");
    List<Comparable[]> rows = new ArrayList<>();
    rows.add(new Comparable[] {"basePath", HoodieCLI.tableMetadata.getBasePath()});
    rows.add(new Comparable[] {"metaPath", HoodieCLI.tableMetadata.getMetaPath()});
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
@@ -52,24 +52,23 @@ public class FileSystemViewCommand implements CommandMarker {
  @CliCommand(value = "show fsview all", help = "Show entire file-system view")
  public String showAllFileSlices(
-      @CliOption(key = {"pathRegex"},
+      @CliOption(key = {"pathRegex"}, help = "regex to select files, eg: 2016/08/02",
-          help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") String globRegex,
+          unspecifiedDefaultValue = "*/*/*") String globRegex,
      @CliOption(key = {"readOptimizedOnly"}, help = "Only display read-optimized view",
          unspecifiedDefaultValue = "false") boolean readOptimizedOnly,
      @CliOption(key = {"maxInstant"}, help = "File-Slices upto this instant are displayed",
          unspecifiedDefaultValue = "") String maxInstant,
-      @CliOption(key = {
+      @CliOption(key = {"includeMax"}, help = "Include Max Instant",
-          "includeMax"}, help = "Include Max Instant", unspecifiedDefaultValue = "false") boolean includeMaxInstant,
+          unspecifiedDefaultValue = "false") boolean includeMaxInstant,
-      @CliOption(key = {
+      @CliOption(key = {"includeInflight"}, help = "Include Inflight Instants",
-          "includeInflight"}, help = "Include Inflight Instants", unspecifiedDefaultValue = "false")
+          unspecifiedDefaultValue = "false") boolean includeInflight,
-          boolean includeInflight,
+      @CliOption(key = {"excludeCompaction"}, help = "Exclude compaction Instants",
-      @CliOption(key = {"excludeCompaction"}, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false")
+          unspecifiedDefaultValue = "false") boolean excludeCompaction,
          boolean excludeCompaction,
      @CliOption(key = {"limit"}, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieTableFileSystemView fsView = buildFileSystemView(globRegex, maxInstant, readOptimizedOnly, includeMaxInstant,
@@ -97,15 +96,10 @@ public class FileSystemViewCommand implements CommandMarker {
    fieldNameToConverterMap.put("Total Delta File Size", converterFunction);
    fieldNameToConverterMap.put("Data-File Size", converterFunction);
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("FileId")
-        .addTableHeaderField("Partition")
+        .addTableHeaderField("Base-Instant").addTableHeaderField("Data-File").addTableHeaderField("Data-File Size");
        .addTableHeaderField("FileId")
        .addTableHeaderField("Base-Instant")
        .addTableHeaderField("Data-File")
        .addTableHeaderField("Data-File Size");
    if (!readOptimizedOnly) {
-      header = header.addTableHeaderField("Num Delta Files")
+      header = header.addTableHeaderField("Num Delta Files").addTableHeaderField("Total Delta File Size")
          .addTableHeaderField("Total Delta File Size")
          .addTableHeaderField("Delta Files");
    }
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
@@ -113,25 +107,24 @@ public class FileSystemViewCommand implements CommandMarker {
  @CliCommand(value = "show fsview latest", help = "Show latest file-system view")
  public String showLatestFileSlices(
-      @CliOption(key = {"partitionPath"},
+      @CliOption(key = {"partitionPath"}, help = "A valid paritition path", mandatory = true) String partition,
          help = "A valid paritition path", mandatory = true) String partition,
      @CliOption(key = {"readOptimizedOnly"}, help = "Only display read-optimized view",
          unspecifiedDefaultValue = "false") boolean readOptimizedOnly,
      @CliOption(key = {"maxInstant"}, help = "File-Slices upto this instant are displayed",
          unspecifiedDefaultValue = "") String maxInstant,
      @CliOption(key = {"merge"}, help = "Merge File Slices due to pending compaction",
          unspecifiedDefaultValue = "true") final boolean merge,
-      @CliOption(key = {"includeMax"}, help = "Include Max Instant", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"includeMax"}, help = "Include Max Instant",
-          boolean includeMaxInstant,
+          unspecifiedDefaultValue = "false") boolean includeMaxInstant,
-      @CliOption(key = {"includeInflight"}, help = "Include Inflight Instants", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"includeInflight"}, help = "Include Inflight Instants",
-          boolean includeInflight,
+          unspecifiedDefaultValue = "false") boolean includeInflight,
-      @CliOption(key = {"excludeCompaction"}, help = "Exclude compaction Instants", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"excludeCompaction"}, help = "Exclude compaction Instants",
-          boolean excludeCompaction,
+          unspecifiedDefaultValue = "false") boolean excludeCompaction,
      @CliOption(key = {"limit"}, help = "Limit rows to be displayed", unspecifiedDefaultValue = "-1") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieTableFileSystemView fsView = buildFileSystemView(partition, maxInstant, readOptimizedOnly, includeMaxInstant,
@@ -163,28 +156,25 @@ public class FileSystemViewCommand implements CommandMarker {
      if (!readOptimizedOnly) {
        row[idx++] = fs.getLogFiles().count();
        row[idx++] = fs.getLogFiles().mapToLong(lf -> lf.getFileSize()).sum();
-        long logFilesScheduledForCompactionTotalSize = fs.getLogFiles()
+        long logFilesScheduledForCompactionTotalSize =
-            .filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
+            fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                .mapToLong(lf -> lf.getFileSize()).sum();
        row[idx++] = logFilesScheduledForCompactionTotalSize;
-        long logFilesUnscheduledTotalSize = fs.getLogFiles()
+        long logFilesUnscheduledTotalSize =
-            .filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
+            fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
                .mapToLong(lf -> lf.getFileSize()).sum();
        row[idx++] = logFilesUnscheduledTotalSize;
        double logSelectedForCompactionToBaseRatio =
            dataFileSize > 0 ? logFilesScheduledForCompactionTotalSize / (dataFileSize * 1.0) : -1;
        row[idx++] = logSelectedForCompactionToBaseRatio;
-        double logUnscheduledToBaseRatio =
+        double logUnscheduledToBaseRatio = dataFileSize > 0 ? logFilesUnscheduledTotalSize / (dataFileSize * 1.0) : -1;
            dataFileSize > 0 ? logFilesUnscheduledTotalSize / (dataFileSize * 1.0) : -1;
        row[idx++] = logUnscheduledToBaseRatio;
-        row[idx++] = fs.getLogFiles()
+        row[idx++] = fs.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
            .filter(lf -> lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
            .collect(Collectors.toList()).toString();
-        row[idx++] = fs.getLogFiles()
+        row[idx++] = fs.getLogFiles().filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
            .filter(lf -> !lf.getBaseCommitTime().equals(fs.getBaseInstantTime()))
            .collect(Collectors.toList()).toString();
      }
      rows.add(row);
@@ -200,16 +190,11 @@ public class FileSystemViewCommand implements CommandMarker {
      fieldNameToConverterMap.put("Delta Size - compaction unscheduled", converterFunction);
    }
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Partition").addTableHeaderField("FileId")
-        .addTableHeaderField("Partition")
+        .addTableHeaderField("Base-Instant").addTableHeaderField("Data-File").addTableHeaderField("Data-File Size");
        .addTableHeaderField("FileId")
        .addTableHeaderField("Base-Instant")
        .addTableHeaderField("Data-File")
        .addTableHeaderField("Data-File Size");
    if (!readOptimizedOnly) {
-      header = header.addTableHeaderField("Num Delta Files")
+      header = header.addTableHeaderField("Num Delta Files").addTableHeaderField("Total Delta Size")
          .addTableHeaderField("Total Delta Size")
          .addTableHeaderField("Delta Size - compaction scheduled")
          .addTableHeaderField("Delta Size - compaction unscheduled")
          .addTableHeaderField("Delta To Base Ratio - compaction scheduled")
@@ -222,6 +207,7 @@ public class FileSystemViewCommand implements CommandMarker {
  /**
   * Build File System View
   * 
   * @param globRegex Path Regex
   * @param maxInstant Max Instants to be used for displaying file-instants
   * @param readOptimizedOnly Include only read optimized view
@@ -233,8 +219,8 @@ public class FileSystemViewCommand implements CommandMarker {
   */
  private HoodieTableFileSystemView buildFileSystemView(String globRegex, String maxInstant, boolean readOptimizedOnly,
      boolean includeMaxInstant, boolean includeInflight, boolean excludeCompaction) throws IOException {
-    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(HoodieCLI.tableMetadata.getHadoopConf(),
+    HoodieTableMetaClient metaClient =
-        HoodieCLI.tableMetadata.getBasePath(), true);
+        new HoodieTableMetaClient(HoodieCLI.tableMetadata.getHadoopConf(), HoodieCLI.tableMetadata.getBasePath(), true);
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
    FileStatus[] statuses = fs.globStatus(new Path(globPath));
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HDFSParquetImportCommand.java
@@ -43,17 +43,17 @@ public class HDFSParquetImportCommand implements CommandMarker {
      @CliOption(key = "upsert", mandatory = false, unspecifiedDefaultValue = "false",
          help = "Uses upsert API instead of the default insert API of WriteClient") boolean useUpsert,
      @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset") final String srcPath,
-      @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset") final String
+      @CliOption(key = "targetPath", mandatory = true,
-          targetPath,
+          help = "Base path for the target hoodie dataset") final String targetPath,
      @CliOption(key = "tableName", mandatory = true, help = "Table name") final String tableName,
      @CliOption(key = "tableType", mandatory = true, help = "Table type") final String tableType,
      @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name") final String rowKeyField,
-      @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name") final String
+      @CliOption(key = "partitionPathField", mandatory = true,
-          partitionPathField,
+          help = "Partition path field name") final String partitionPathField,
-      @CliOption(key = {
+      @CliOption(key = {"parallelism"}, mandatory = true,
-          "parallelism"}, mandatory = true, help = "Parallelism for hoodie insert") final String parallelism,
+          help = "Parallelism for hoodie insert") final String parallelism,
-      @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file") final String
+      @CliOption(key = "schemaFilePath", mandatory = true,
-          schemaFilePath,
+          help = "Path for Avro schema file") final String schemaFilePath,
      @CliOption(key = "format", mandatory = true, help = "Format for the input data") final String format,
      @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory") final String sparkMemory,
      @CliOption(key = "retry", mandatory = true, help = "Number of retries") final String retry) throws Exception {
@@ -62,8 +62,8 @@ public class HDFSParquetImportCommand implements CommandMarker {
    boolean initialized = HoodieCLI.initConf();
    HoodieCLI.initFS(initialized);
-    String sparkPropertiesPath = Utils.getDefaultPropertiesFile(
+    String sparkPropertiesPath =
-        JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
+        Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
@@ -72,8 +72,8 @@ public class HDFSParquetImportCommand implements CommandMarker {
      cmd = SparkCommand.UPSERT.toString();
    }
-    sparkLauncher.addAppArgs(cmd, srcPath, targetPath, tableName, tableType, rowKeyField,
+    sparkLauncher.addAppArgs(cmd, srcPath, targetPath, tableName, tableType, rowKeyField, partitionPathField,
-        partitionPathField, parallelism, schemaFilePath, sparkMemory, retry);
+        parallelism, schemaFilePath, sparkMemory, retry);
    Process process = sparkLauncher.launch();
    InputStreamConsumer.captureOutput(process);
    int exitCode = process.waitFor();
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -69,30 +69,29 @@ public class HoodieLogFileCommand implements CommandMarker {
  @CliCommand(value = "show logfile metadata", help = "Read commit metadata from log files")
  public String showLogFileCommits(
-      @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified path for the log file") final
+      @CliOption(key = "logFilePathPattern", mandatory = true,
-      String logFilePathPattern,
+          help = "Fully qualified path for the log file") final String logFilePathPattern,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {"headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-      final boolean headerOnly) throws IOException {
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    FileSystem fs = HoodieCLI.tableMetadata.getFs();
    List<String> logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
        .map(status -> status.getPath().toString()).collect(Collectors.toList());
-    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType,
+    Map<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> commitCountAndMetadata =
-        String>>, Integer>>>
+        Maps.newHashMap();
        commitCountAndMetadata = Maps.newHashMap();
    int totalEntries = 0;
    int numCorruptBlocks = 0;
    int dummyInstantTimeCount = 0;
    for (String logFilePath : logFilePaths) {
      FileStatus[] fsStatus = fs.listStatus(new Path(logFilePath));
-      Schema writerSchema = new AvroSchemaConverter().convert(
+      Schema writerSchema = new AvroSchemaConverter()
-          SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
+          .convert(SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFilePath)));
-      Reader reader = HoodieLogFormat
+      Reader reader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
          .newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
      // read the avro blocks
      while (reader.hasNext()) {
@@ -126,8 +125,8 @@ public class HoodieLogFileCommand implements CommandMarker {
              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
          totalEntries++;
        } else {
-          List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>,
+          List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list =
-              Integer>> list = new ArrayList<>();
+              new ArrayList<>();
          list.add(
              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
          commitCountAndMetadata.put(instantTime, list);
@@ -139,12 +138,11 @@ public class HoodieLogFileCommand implements CommandMarker {
    List<Comparable[]> rows = new ArrayList<>();
    int i = 0;
    ObjectMapper objectMapper = new ObjectMapper();
-    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType,
+    for (Map.Entry<String, List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry : commitCountAndMetadata
-            Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>>> entry
+        .entrySet()) {
        : commitCountAndMetadata.entrySet()) {
      String instantTime = entry.getKey().toString();
-      for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>,
+      for (Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer> tuple3 : entry
-          Map<HeaderMetadataType, String>>, Integer> tuple3 : entry.getValue()) {
+          .getValue()) {
        Comparable[] output = new Comparable[5];
        output[0] = instantTime;
        output[1] = tuple3._3();
@@ -156,21 +154,18 @@ public class HoodieLogFileCommand implements CommandMarker {
      }
    }
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("InstantTime").addTableHeaderField("RecordCount")
-        .addTableHeaderField("InstantTime")
+        .addTableHeaderField("BlockType").addTableHeaderField("HeaderMetadata").addTableHeaderField("FooterMetadata");
        .addTableHeaderField("RecordCount")
        .addTableHeaderField("BlockType")
        .addTableHeaderField("HeaderMetadata")
        .addTableHeaderField("FooterMetadata");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
  @CliCommand(value = "show logfile records", help = "Read records from log files")
-  public String showLogFileRecords(@CliOption(key = {
+  public String showLogFileRecords(
-      "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10") final Integer limit,
+      @CliOption(key = {"limit"}, mandatory = false, help = "Limit commits",
-      @CliOption(key = "logFilePathPattern", mandatory = true, help = "Fully qualified paths for the log files")
+          unspecifiedDefaultValue = "10") final Integer limit,
-      final String logFilePathPattern,
+      @CliOption(key = "logFilePathPattern", mandatory = true,
          help = "Fully qualified paths for the log files") final String logFilePathPattern,
      @CliOption(key = "mergeRecords", mandatory = false, help = "If the records in the log files should be merged",
          unspecifiedDefaultValue = "false") final Boolean shouldMerge)
      throws IOException {
@@ -184,17 +179,16 @@ public class HoodieLogFileCommand implements CommandMarker {
    // TODO : readerSchema can change across blocks/log files, fix this inside Scanner
    AvroSchemaConverter converter = new AvroSchemaConverter();
    // get schema from last log file
-    Schema readerSchema = converter.convert(
+    Schema readerSchema =
-        SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
+        converter.convert(SchemaUtil.readSchemaFromLogFile(fs, new Path(logFilePaths.get(logFilePaths.size() - 1))));
    List<IndexedRecord> allRecords = new ArrayList<>();
    if (shouldMerge) {
      System.out.println("===========================> MERGING RECORDS <===================");
-      HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs,
+      HoodieMergedLogRecordScanner scanner =
-          HoodieCLI.tableMetadata.getBasePath(), logFilePaths, readerSchema,
+          new HoodieMergedLogRecordScanner(fs, HoodieCLI.tableMetadata.getBasePath(), logFilePaths, readerSchema,
-          HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
+              HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp(),
              .getTimestamp(),
              Long.valueOf(HoodieMemoryConfig.DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES),
              Boolean.valueOf(HoodieCompactionConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED),
              Boolean.valueOf(HoodieCompactionConfig.DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED),
@@ -209,10 +203,10 @@ public class HoodieLogFileCommand implements CommandMarker {
      }
    } else {
      for (String logFile : logFilePaths) {
-        Schema writerSchema = new AvroSchemaConverter().convert(
+        Schema writerSchema = new AvroSchemaConverter()
-            SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
+            .convert(SchemaUtil.readSchemaFromLogFile(HoodieCLI.tableMetadata.getFs(), new Path(logFile)));
-        HoodieLogFormat.Reader reader = HoodieLogFormat
+        HoodieLogFormat.Reader reader =
-            .newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
+            HoodieLogFormat.newReader(fs, new HoodieLogFile(new Path(logFile)), writerSchema);
        // read the avro blocks
        while (reader.hasNext()) {
          HoodieLogBlock n = reader.next();
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieSyncCommand.java
@@ -44,19 +44,16 @@ public class HoodieSyncCommand implements CommandMarker {
  public String validateSync(
      @CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode") final String mode,
      @CliOption(key = {"sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database") final String srcDb,
-      @CliOption(key = {
+      @CliOption(key = {"targetDb"}, unspecifiedDefaultValue = "dwh_hoodie",
-          "targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database") final String tgtDb,
+          help = "target database") final String tgtDb,
-      @CliOption(key = {
+      @CliOption(key = {"partitionCount"}, unspecifiedDefaultValue = "5",
-          "partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
+          help = "total number of recent partitions to validate") final int partitionCount,
-      final int partitionCount,
+      @CliOption(key = {"hiveServerUrl"}, mandatory = true,
-      @CliOption(key = {
+          help = "hiveServerURL to connect to") final String hiveServerUrl,
-          "hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to") final String hiveServerUrl,
+      @CliOption(key = {"hiveUser"}, mandatory = false, unspecifiedDefaultValue = "",
-      @CliOption(key = {
+          help = "hive username to connect to") final String hiveUser,
-          "hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to") final
+      @CliOption(key = {"hivePass"}, mandatory = true, unspecifiedDefaultValue = "",
-      String hiveUser,
+          help = "hive password to connect to") final String hivePass)
      @CliOption(key = {
          "hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to") final
      String hivePass)
      throws Exception {
    HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline();
@@ -77,8 +74,8 @@ public class HoodieSyncCommand implements CommandMarker {
    String sourceLatestCommit =
        sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
-    if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit,
+    if (sourceLatestCommit != null
-        HoodieTimeline.GREATER)) {
+        && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
      // source is behind the target
      List<HoodieInstant> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
          .getInstants().collect(Collectors.toList());
@@ -89,8 +86,8 @@ public class HoodieSyncCommand implements CommandMarker {
        long newInserts = CommitUtil.countNewRecords(target,
            commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
        return "Count difference now is (count(" + target.getTableConfig().getTableName() + ") - count("
-            + source.getTableConfig().getTableName()
+            + source.getTableConfig().getTableName() + ") == " + (targetCount - sourceCount) + ". Catch up count is "
-            + ") == " + (targetCount - sourceCount) + ". Catch up count is " + newInserts;
+            + newInserts;
      }
    } else {
      List<HoodieInstant> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
@@ -102,8 +99,8 @@ public class HoodieSyncCommand implements CommandMarker {
        long newInserts = CommitUtil.countNewRecords(source,
            commitsToCatchup.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()));
        return "Count difference now is (count(" + source.getTableConfig().getTableName() + ") - count("
-            + target.getTableConfig().getTableName()
+            + target.getTableConfig().getTableName() + ") == " + (sourceCount - targetCount) + ". Catch up count is "
-            + ") == " + (sourceCount - targetCount) + ". Catch up count is " + newInserts;
+            + newInserts;
      }
    }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
@@ -47,16 +47,15 @@ public class RepairsCommand implements CommandMarker {
    return HoodieCLI.tableMetadata != null;
  }
-  @CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce "
+  @CliCommand(value = "repair deduplicate",
-      + "repaired files to replace with")
+      help = "De-duplicate a partition path contains duplicates & produce " + "repaired files to replace with")
-  public String deduplicate(@CliOption(key = {
+  public String deduplicate(
-      "duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String
+      @CliOption(key = {"duplicatedPartitionPath"}, help = "Partition Path containing the duplicates",
-      duplicatedPartitionPath,
+          mandatory = true) final String duplicatedPartitionPath,
-      @CliOption(key = {
+      @CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files",
-          "repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String
+          mandatory = true) final String repairedOutputPath,
-          repairedOutputPath,
+      @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path",
-      @CliOption(key = {
+          mandatory = true) final String sparkPropertiesPath)
          "sparkProperties"}, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath)
      throws Exception {
    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath,
@@ -73,14 +72,15 @@ public class RepairsCommand implements CommandMarker {
  @CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
-  public String addPartitionMeta(@CliOption(key = {
+  public String addPartitionMeta(
-      "dryrun"}, help = "Should we actually add or just print what would be done", unspecifiedDefaultValue = "true")
+      @CliOption(key = {"dryrun"}, help = "Should we actually add or just print what would be done",
-      final boolean dryRun) throws IOException {
+          unspecifiedDefaultValue = "true") final boolean dryRun)
      throws IOException {
-    String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
+    String latestCommit =
-        .getTimestamp();
+        HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp();
-    List<String> partitionPaths = FSUtils.getAllPartitionFoldersThreeLevelsDown(HoodieCLI.fs,
+    List<String> partitionPaths =
-        HoodieCLI.tableMetadata.getBasePath());
+        FSUtils.getAllPartitionFoldersThreeLevelsDown(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
    Path basePath = new Path(HoodieCLI.tableMetadata.getBasePath());
    String[][] rows = new String[partitionPaths.size() + 1][];
@@ -94,8 +94,8 @@ public class RepairsCommand implements CommandMarker {
      if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
        row[1] = "No";
        if (!dryRun) {
-          HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath,
+          HoodiePartitionMetadata partitionMetadata =
-              partitionPath);
+              new HoodiePartitionMetadata(HoodieCLI.fs, latestCommit, basePath, partitionPath);
          partitionMetadata.trySave(0);
        }
      }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RollbacksCommand.java
@@ -50,8 +50,8 @@ public class RollbacksCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.tableMetadata);
    HoodieTimeline rollback = activeTimeline.getRollbackTimeline().filterCompletedInstants();
@@ -59,8 +59,8 @@ public class RollbacksCommand implements CommandMarker {
    final List<Comparable[]> rows = new ArrayList<>();
    rollback.getInstants().forEach(instant -> {
      try {
-        HoodieRollbackMetadata metadata = AvroUtils.deserializeAvroMetadata(
+        HoodieRollbackMetadata metadata = AvroUtils
-            activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
+            .deserializeAvroMetadata(activeTimeline.getInstantDetails(instant).get(), HoodieRollbackMetadata.class);
        metadata.getCommitsRollback().forEach(c -> {
          Comparable[] row = new Comparable[5];
          row[0] = metadata.getStartRollbackTime();
@@ -74,11 +74,8 @@ public class RollbacksCommand implements CommandMarker {
        e.printStackTrace();
      }
    });
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Instant").addTableHeaderField("Rolledback Instant")
-        .addTableHeaderField("Instant")
+        .addTableHeaderField("Total Files Deleted").addTableHeaderField("Time taken in millis")
        .addTableHeaderField("Rolledback Instant")
        .addTableHeaderField("Total Files Deleted")
        .addTableHeaderField("Time taken in millis")
        .addTableHeaderField("Total Partitions");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
@@ -89,16 +86,17 @@ public class RollbacksCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit  #rows to be displayed", unspecifiedDefaultValue = "10") Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-          "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly)
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    HoodieActiveTimeline activeTimeline = new RollbackTimeline(HoodieCLI.tableMetadata);
    final List<Comparable[]> rows = new ArrayList<>();
    HoodieRollbackMetadata metadata = AvroUtils.deserializeAvroMetadata(
-        activeTimeline.getInstantDetails(new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, rollbackInstant))
+        activeTimeline.getInstantDetails(new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, rollbackInstant)).get(),
-            .get(), HoodieRollbackMetadata.class);
+        HoodieRollbackMetadata.class);
    metadata.getPartitionMetadata().entrySet().forEach(e -> {
-      Stream.concat(e.getValue().getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)),
+      Stream
          .concat(e.getValue().getSuccessDeleteFiles().stream().map(f -> Pair.of(f, true)),
              e.getValue().getFailedDeleteFiles().stream().map(f -> Pair.of(f, false)))
          .forEach(fileWithDeleteStatus -> {
            Comparable[] row = new Comparable[5];
@@ -111,12 +109,8 @@ public class RollbacksCommand implements CommandMarker {
          });
    });
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("Instant").addTableHeaderField("Rolledback Instants")
-        .addTableHeaderField("Instant")
+        .addTableHeaderField("Partition").addTableHeaderField("Deleted File").addTableHeaderField("Succeeded");
        .addTableHeaderField("Rolledback Instants")
        .addTableHeaderField("Partition")
        .addTableHeaderField("Deleted File")
        .addTableHeaderField("Succeeded");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SavepointsCommand.java
@@ -62,8 +62,8 @@ public class SavepointsCommand implements CommandMarker {
  @CliAvailabilityIndicator({"savepoint rollback"})
  public boolean isRollbackToSavepointAvailable() {
-    return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline().getSavePointTimeline()
+    return HoodieCLI.tableMetadata != null
-        .filterCompletedInstants().empty();
+        && !HoodieCLI.tableMetadata.getActiveTimeline().getSavePointTimeline().filterCompletedInstants().empty();
  }
  @CliCommand(value = "savepoints show", help = "Show the savepoints")
@@ -137,8 +137,8 @@ public class SavepointsCommand implements CommandMarker {
  }
  private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
    return new HoodieWriteClient(jsc, config, false);
  }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
@@ -43,8 +43,7 @@ public class SparkMain {
   * Commands
   */
  enum SparkCommand {
-    ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN,
+    ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN, COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR
    COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR
  }
  public static void main(String[] args) throws Exception {
@@ -76,13 +75,12 @@ public class SparkMain {
        break;
      case COMPACT_RUN:
        assert (args.length == 8);
-        returnCode = compact(jsc, args[1], args[2], args[3], Integer.parseInt(args[4]),
+        returnCode = compact(jsc, args[1], args[2], args[3], Integer.parseInt(args[4]), args[5], args[6],
-            args[5], args[6], Integer.parseInt(args[7]), false);
+            Integer.parseInt(args[7]), false);
        break;
      case COMPACT_SCHEDULE:
        assert (args.length == 5);
-        returnCode = compact(jsc, args[1], args[2], args[3], 1,
+        returnCode = compact(jsc, args[1], args[2], args[3], 1, "", args[4], 0, true);
            "", args[4], 0, true);
        break;
      case COMPACT_VALIDATE:
        assert (args.length == 7);
@@ -113,8 +111,7 @@ public class SparkMain {
    System.exit(returnCode);
  }
-  private static int dataLoad(JavaSparkContext jsc, String command,
+  private static int dataLoad(JavaSparkContext jsc, String command, String srcPath, String targetPath, String tableName,
      String srcPath, String targetPath, String tableName,
      String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile, String sparkMaster,
      String sparkMemory, int retry) throws Exception {
    Config cfg = new Config();
@@ -180,9 +177,9 @@ public class SparkMain {
    new HoodieCompactionAdminTool(cfg).run(jsc);
  }
-  private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId,
+  private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePath, String fileId, String outputPath,
-      String outputPath, int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation,
+      int parallelism, String sparkMaster, String sparkMemory, boolean skipValidation, boolean dryRun)
-      boolean dryRun) throws Exception {
+      throws Exception {
    HoodieCompactionAdminTool.Config cfg = new HoodieCompactionAdminTool.Config();
    cfg.basePath = basePath;
    cfg.operation = Operation.UNSCHEDULE_FILE;
@@ -244,8 +241,8 @@ public class SparkMain {
  }
  private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withIndexConfig(
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
    return new HoodieWriteClient(jsc, config);
  }
 }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java
@@ -63,8 +63,9 @@ public class StatsCommand implements CommandMarker {
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {"headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-      final boolean headerOnly) throws IOException {
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    long totalRecordsUpserted = 0;
    long totalRecordsWritten = 0;
@@ -93,31 +94,26 @@ public class StatsCommand implements CommandMarker {
    }
    rows.add(new Comparable[] {"Total", totalRecordsUpserted, totalRecordsWritten, waf});
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Total Upserted")
-        .addTableHeaderField("CommitTime")
+        .addTableHeaderField("Total Written").addTableHeaderField("Write Amplifiation Factor");
        .addTableHeaderField("Total Upserted")
        .addTableHeaderField("Total Written")
        .addTableHeaderField("Write Amplifiation Factor");
    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows);
  }
  private Comparable[] printFileSizeHistogram(String commitTime, Snapshot s) {
-    return new Comparable[]{commitTime, s.getMin(),
+    return new Comparable[] {commitTime, s.getMin(), s.getValue(0.1), s.getMedian(), s.getMean(), s.get95thPercentile(),
-        s.getValue(0.1), s.getMedian(),
+        s.getMax(), s.size(), s.getStdDev()};
        s.getMean(), s.get95thPercentile(),
        s.getMax(), s.size(),
        s.getStdDev()};
  }
  @CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
  public String fileSizeStats(
-      @CliOption(key = {"partitionPath"},
+      @CliOption(key = {"partitionPath"}, help = "regex to select files, eg: 2016/08/02",
-          help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*") final String globRegex,
+          unspecifiedDefaultValue = "*/*/*") final String globRegex,
      @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit,
      @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField,
      @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
-      @CliOption(key = {"headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false")
+      @CliOption(key = {"headeronly"}, help = "Print Header Only",
-      final boolean headerOnly) throws IOException {
+          unspecifiedDefaultValue = "false") final boolean headerOnly)
      throws IOException {
    FileSystem fs = HoodieCLI.fs;
    String globPath = String.format("%s/%s/*", HoodieCLI.tableMetadata.getBasePath(), globRegex);
@@ -145,8 +141,8 @@ public class StatsCommand implements CommandMarker {
    Snapshot s = globalHistogram.getSnapshot();
    rows.add(printFileSizeHistogram("ALL", s));
-    Function<Object, String> converterFunction = entry ->
+    Function<Object, String> converterFunction =
-            NumericUtils.humanReadableByteCount((Double.valueOf(entry.toString())));
+        entry -> NumericUtils.humanReadableByteCount((Double.valueOf(entry.toString())));
    Map<String, Function<Object, String>> fieldNameToConverterMap = new HashMap<>();
    fieldNameToConverterMap.put("Min", converterFunction);
    fieldNameToConverterMap.put("10th", converterFunction);
@@ -156,16 +152,9 @@ public class StatsCommand implements CommandMarker {
    fieldNameToConverterMap.put("Max", converterFunction);
    fieldNameToConverterMap.put("StdDev", converterFunction);
-    TableHeader header = new TableHeader()
+    TableHeader header = new TableHeader().addTableHeaderField("CommitTime").addTableHeaderField("Min")
-        .addTableHeaderField("CommitTime")
+        .addTableHeaderField("10th").addTableHeaderField("50th").addTableHeaderField("avg").addTableHeaderField("95th")
-        .addTableHeaderField("Min")
+        .addTableHeaderField("Max").addTableHeaderField("NumFiles").addTableHeaderField("StdDev");
        .addTableHeaderField("10th")
        .addTableHeaderField("50th")
        .addTableHeaderField("avg")
        .addTableHeaderField("95th")
        .addTableHeaderField("Max")
        .addTableHeaderField("NumFiles")
        .addTableHeaderField("StdDev");
    return HoodiePrintHelper.print(header, fieldNameToConverterMap, sortByField, descending, limit, headerOnly, rows);
  }
 }
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/HiveUtil.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/HiveUtil.java
@@ -52,8 +52,7 @@ public class HiveUtil {
      stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
      stmt.execute("set hive.stats.autogather=false");
      rs = stmt.executeQuery(
-          "select count(`_hoodie_commit_time`) as cnt from " + dbName + "."
+          "select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source.getTableConfig().getTableName());
              + source.getTableConfig().getTableName());
      long count = -1;
      if (rs.next()) {
        count = rs.getLong("cnt");
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java
@@ -40,8 +40,8 @@ public class SparkUtil {
  public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
    String currentJar = new File(SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
        .getAbsolutePath();
-    SparkLauncher sparkLauncher = new SparkLauncher().setAppResource(currentJar)
+    SparkLauncher sparkLauncher =
-        .setMainClass(SparkMain.class.getName());
+        new SparkLauncher().setAppResource(currentJar).setMainClass(SparkMain.class.getName());
    if (!StringUtils.isNullOrEmpty(propertiesFile)) {
      sparkLauncher.setPropertiesFile(propertiesFile);
--- a/hudi-client/pom.xml
+++ b/hudi-client/pom.xml
@@ -26,6 +26,10 @@
  <artifactId>hudi-client</artifactId>
  <packaging>jar</packaging>
  <properties>
    <main.basedir>${project.parent.basedir}</main.basedir>
  </properties>
  <build>
    <plugins>
      <plugin>
--- a/hudi-client/src/main/java/org/apache/hudi/AbstractHoodieClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/AbstractHoodieClient.java
@@ -32,8 +32,8 @@ import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 /**
- * Abstract class taking care of holding common member variables (FileSystem, SparkContext, HoodieConfigs)
+ * Abstract class taking care of holding common member variables (FileSystem, SparkContext, HoodieConfigs) Also, manages
- * Also, manages embedded timeline-server if enabled.
+ * embedded timeline-server if enabled.
 */
 public abstract class AbstractHoodieClient implements Serializable, AutoCloseable {
@@ -45,10 +45,9 @@ public abstract class AbstractHoodieClient implements Serializable, AutoCloseabl
  protected final String basePath;
  /**
-   * Timeline Server has the same lifetime as that of Client.
+   * Timeline Server has the same lifetime as that of Client. Any operations done on the same timeline service will be
-   * Any operations done on the same timeline service will be able to take advantage
+   * able to take advantage of the cached file-system view. New completed actions will be synced automatically in an
-   * of the cached file-system view. New completed actions will be synced automatically
+   * incremental fashion.
   * in an incremental fashion.
   */
  private transient Option<EmbeddedTimelineService> timelineServer;
  private final boolean shouldStopTimelineServer;
--- a/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/CompactionAdminClient.java
@@ -69,8 +69,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
    super(jsc, HoodieWriteConfig.newBuilder().withPath(basePath).build());
  }
-  public CompactionAdminClient(JavaSparkContext jsc, String basePath,
+  public CompactionAdminClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTimelineService> timelineServer) {
      Option<EmbeddedTimelineService> timelineServer) {
    super(jsc, HoodieWriteConfig.newBuilder().withPath(basePath).build(), timelineServer);
  }
@@ -81,8 +80,8 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   * @param metaClient Hoodie Table Meta Client
   * @param compactionInstant Compaction Instant
   */
-  public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient,
+  public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant,
-      String compactionInstant, int parallelism) throws IOException {
+      int parallelism) throws IOException {
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    HoodieTableFileSystemView fsView =
        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
@@ -112,15 +111,13 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   * @param parallelism Parallelism
   * @param dryRun Dry Run
   */
-  public List<RenameOpResult> unscheduleCompactionPlan(
+  public List<RenameOpResult> unscheduleCompactionPlan(String compactionInstant, boolean skipValidation,
-      String compactionInstant, boolean skipValidation, int parallelism, boolean dryRun) throws Exception {
+      int parallelism, boolean dryRun) throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
-    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions =
+    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = getRenamingActionsForUnschedulingCompactionPlan(metaClient,
-        getRenamingActionsForUnschedulingCompactionPlan(metaClient, compactionInstant, parallelism,
+        compactionInstant, parallelism, Option.empty(), skipValidation);
            Option.empty(), skipValidation);
-    List<RenameOpResult> res =
+    List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, parallelism, dryRun);
        runRenamingOps(metaClient, renameActions, parallelism, dryRun);
    Option<Boolean> success =
        Option.fromJavaOptional(res.stream().map(r -> (r.isExecuted() && r.isSuccess())).reduce(Boolean::logicalAnd));
@@ -145,8 +142,8 @@ public class CompactionAdminClient extends AbstractHoodieClient {
  }
  /**
-   * Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files
+   * Remove a fileId from pending compaction. Removes the associated compaction operation and rename delta-files that
-   * that were generated for that file-id after the compaction operation was scheduled.
+   * were generated for that file-id after the compaction operation was scheduled.
   *
   * This operation MUST be executed with compactions and writer turned OFF.
   *
@@ -154,12 +151,11 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   * @param skipValidation Skip validation
   * @param dryRun Dry Run Mode
   */
-  public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId,
+  public List<RenameOpResult> unscheduleCompactionFileId(HoodieFileGroupId fgId, boolean skipValidation, boolean dryRun)
-      boolean skipValidation, boolean dryRun) throws Exception {
+      throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions =
-        getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId,
+        getRenamingActionsForUnschedulingCompactionForFileId(metaClient, fgId, Option.empty(), skipValidation);
            Option.empty(), skipValidation);
    List<RenameOpResult> res = runRenamingOps(metaClient, renameActions, 1, dryRun);
@@ -167,15 +163,15 @@ public class CompactionAdminClient extends AbstractHoodieClient {
      // Ready to remove this file-Id from compaction request
      Pair<String, HoodieCompactionOperation> compactionOperationWithInstant =
          CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fgId);
-      HoodieCompactionPlan plan = CompactionUtils
+      HoodieCompactionPlan plan =
-          .getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
+          CompactionUtils.getCompactionPlan(metaClient, compactionOperationWithInstant.getKey());
-      List<HoodieCompactionOperation> newOps = plan.getOperations().stream()
+      List<HoodieCompactionOperation> newOps = plan.getOperations().stream().filter(
-          .filter(op -> (!op.getFileId().equals(fgId.getFileId()))
+          op -> (!op.getFileId().equals(fgId.getFileId())) && (!op.getPartitionPath().equals(fgId.getPartitionPath())))
-              && (!op.getPartitionPath().equals(fgId.getPartitionPath()))).collect(Collectors.toList());
+          .collect(Collectors.toList());
      HoodieCompactionPlan newPlan =
          HoodieCompactionPlan.newBuilder().setOperations(newOps).setExtraMetadata(plan.getExtraMetadata()).build();
-      HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION,
+      HoodieInstant inflight =
-          compactionOperationWithInstant.getLeft());
+          new HoodieInstant(State.INFLIGHT, COMPACTION_ACTION, compactionOperationWithInstant.getLeft());
      Path inflightPath = new Path(metaClient.getMetaPath(), inflight.getFileName());
      if (metaClient.getFs().exists(inflightPath)) {
        // revert if in inflight state
@@ -189,28 +185,28 @@ public class CompactionAdminClient extends AbstractHoodieClient {
  }
  /**
-   * Renames delta files to make file-slices consistent with the timeline as dictated by Hoodie metadata.
+   * Renames delta files to make file-slices consistent with the timeline as dictated by Hoodie metadata. Use when
-   * Use when compaction unschedule fails partially.
+   * compaction unschedule fails partially.
   *
   * This operation MUST be executed with compactions and writer turned OFF.
   * 
   * @param compactionInstant Compaction Instant to be repaired
   * @param dryRun Dry Run Mode
   */
-  public List<RenameOpResult> repairCompaction(String compactionInstant,
+  public List<RenameOpResult> repairCompaction(String compactionInstant, int parallelism, boolean dryRun)
-      int parallelism, boolean dryRun) throws Exception {
+      throws Exception {
    HoodieTableMetaClient metaClient = createMetaClient(false);
-    List<ValidationOpResult> validationResults =
+    List<ValidationOpResult> validationResults = validateCompactionPlan(metaClient, compactionInstant, parallelism);
-        validateCompactionPlan(metaClient, compactionInstant, parallelism);
+    List<ValidationOpResult> failed =
-    List<ValidationOpResult> failed = validationResults.stream()
+        validationResults.stream().filter(v -> !v.isSuccess()).collect(Collectors.toList());
        .filter(v -> !v.isSuccess()).collect(Collectors.toList());
    if (failed.isEmpty()) {
      return new ArrayList<>();
    }
-    final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
+    final HoodieTableFileSystemView fsView =
-        metaClient.getCommitsAndCompactionTimeline());
+        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
-    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = failed.stream().flatMap(v ->
+    List<Pair<HoodieLogFile, HoodieLogFile>> renameActions =
-        getRenamingActionsToAlignWithCompactionOperation(metaClient, compactionInstant,
+        failed.stream().flatMap(v -> getRenamingActionsToAlignWithCompactionOperation(metaClient, compactionInstant,
            v.getOperation(), Option.of(fsView)).stream()).collect(Collectors.toList());
    return runRenamingOps(metaClient, renameActions, parallelism, dryRun);
  }
@@ -218,11 +214,10 @@ public class CompactionAdminClient extends AbstractHoodieClient {
  /**
   * Construction Compaction Plan from compaction instant
   */
-  private static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient,
+  private static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
-      String compactionInstant) throws IOException {
+      throws IOException {
-    HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
+    HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(metaClient.getActiveTimeline()
-        metaClient.getActiveTimeline().getInstantAuxiliaryDetails(
+        .getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
            HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
    return compactionPlan;
  }
@@ -238,20 +233,18 @@ public class CompactionAdminClient extends AbstractHoodieClient {
  protected static List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsToAlignWithCompactionOperation(
      HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation op,
      Option<HoodieTableFileSystemView> fsViewOpt) {
-    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() :
+    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get()
-        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
+        : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get();
    FileSlice merged =
        fileSystemView.getLatestMergedFileSlicesBeforeOrOn(op.getPartitionPath(), lastInstant.getTimestamp())
            .filter(fs -> fs.getFileId().equals(op.getFileId())).findFirst().get();
-    final int maxVersion =
+    final int maxVersion = op.getDeltaFilePaths().stream().map(lf -> FSUtils.getFileVersionFromLog(new Path(lf)))
            op.getDeltaFilePaths().stream().map(lf -> FSUtils.getFileVersionFromLog(new Path(lf)))
        .reduce((x, y) -> x > y ? x : y).orElse(0);
    List<HoodieLogFile> logFilesToBeMoved =
        merged.getLogFiles().filter(lf -> lf.getLogVersion() > maxVersion).collect(Collectors.toList());
    return logFilesToBeMoved.stream().map(lf -> {
-      Preconditions.checkArgument(lf.getLogVersion() - maxVersion > 0,
+      Preconditions.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane");
          "Expect new log version to be sane");
      HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(),
          FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()),
              compactionInstant, lf.getLogVersion() - maxVersion, HoodieLogFormat.UNKNOWN_WRITE_TOKEN)));
@@ -285,11 +278,10 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   * @param operation Compaction Operation
   * @param fsViewOpt File System View
   */
-  private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient,
+  private ValidationOpResult validateCompactionOperation(HoodieTableMetaClient metaClient, String compactionInstant,
-      String compactionInstant, CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt)
+      CompactionOperation operation, Option<HoodieTableFileSystemView> fsViewOpt) throws IOException {
-      throws IOException {
+    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get()
-    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() :
+        : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    Option<HoodieInstant> lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant();
    try {
      if (lastInstant.isPresent()) {
@@ -300,16 +292,15 @@ public class CompactionAdminClient extends AbstractHoodieClient {
          FileSlice fs = fileSliceOptional.get();
          Option<HoodieDataFile> df = fs.getDataFile();
          if (operation.getDataFilePath().isPresent()) {
-            String expPath = metaClient.getFs().getFileStatus(new Path(operation.getDataFilePath().get())).getPath()
+            String expPath =
-                .toString();
+                metaClient.getFs().getFileStatus(new Path(operation.getDataFilePath().get())).getPath().toString();
-            Preconditions.checkArgument(df.isPresent(), "Data File must be present. File Slice was : "
+            Preconditions.checkArgument(df.isPresent(),
-                + fs + ", operation :" + operation);
+                "Data File must be present. File Slice was : " + fs + ", operation :" + operation);
            Preconditions.checkArgument(df.get().getPath().equals(expPath),
                "Base Path in operation is specified as " + expPath + " but got path " + df.get().getPath());
          }
          Set<HoodieLogFile> logFilesInFileSlice = fs.getLogFiles().collect(Collectors.toSet());
-          Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFilePaths().stream()
+          Set<HoodieLogFile> logFilesInCompactionOp = operation.getDeltaFilePaths().stream().map(dp -> {
              .map(dp -> {
            try {
              FileStatus[] fileStatuses = metaClient.getFs().listStatus(new Path(dp));
              Preconditions.checkArgument(fileStatuses.length == 1, "Expect only 1 file-status");
@@ -320,25 +311,23 @@ public class CompactionAdminClient extends AbstractHoodieClient {
              throw new HoodieIOException(ioe.getMessage(), ioe);
            }
          }).collect(Collectors.toSet());
-          Set<HoodieLogFile> missing =
+          Set<HoodieLogFile> missing = logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf))
              logFilesInCompactionOp.stream().filter(lf -> !logFilesInFileSlice.contains(lf))
              .collect(Collectors.toSet());
          Preconditions.checkArgument(missing.isEmpty(),
-              "All log files specified in compaction operation is not present. Missing :" + missing
+              "All log files specified in compaction operation is not present. Missing :" + missing + ", Exp :"
-                  + ", Exp :" + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
+                  + logFilesInCompactionOp + ", Got :" + logFilesInFileSlice);
-          Set<HoodieLogFile> diff =
+          Set<HoodieLogFile> diff = logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf))
              logFilesInFileSlice.stream().filter(lf -> !logFilesInCompactionOp.contains(lf))
              .collect(Collectors.toSet());
          Preconditions.checkArgument(diff.stream().allMatch(lf -> lf.getBaseCommitTime().equals(compactionInstant)),
              "There are some log-files which are neither specified in compaction plan "
                  + "nor present after compaction request instant. Some of these :" + diff);
        } else {
-          throw new CompactionValidationException("Unable to find file-slice for file-id (" + operation.getFileId()
+          throw new CompactionValidationException(
-              + " Compaction operation is invalid.");
+              "Unable to find file-slice for file-id (" + operation.getFileId() + " Compaction operation is invalid.");
        }
      } else {
-        throw new CompactionValidationException("Unable to find any committed instant. Compaction Operation may "
+        throw new CompactionValidationException(
-            + "be pointing to stale file-slices");
+            "Unable to find any committed instant. Compaction Operation may " + "be pointing to stale file-slices");
      }
    } catch (CompactionValidationException | IllegalArgumentException e) {
      return new ValidationOpResult(operation, false, Option.of(e));
@@ -374,8 +363,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
        }).collect();
      } else {
        log.info("Dry-Run Mode activated for rename operations");
-        return renameActions.parallelStream()
+        return renameActions.parallelStream().map(lfPair -> new RenameOpResult(lfPair, false, false, Option.empty()))
            .map(lfPair -> new RenameOpResult(lfPair, false, false, Option.empty()))
            .collect(Collectors.toList());
      }
    }
@@ -395,18 +383,18 @@ public class CompactionAdminClient extends AbstractHoodieClient {
  protected List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionPlan(
      HoodieTableMetaClient metaClient, String compactionInstant, int parallelism,
      Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
-    HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() :
+    HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get()
-        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
+        : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant);
    if (plan.getOperations() != null) {
-      log.info("Number of Compaction Operations :" + plan.getOperations().size()
+      log.info(
-          + " for instant :" + compactionInstant);
+          "Number of Compaction Operations :" + plan.getOperations().size() + " for instant :" + compactionInstant);
      List<CompactionOperation> ops = plan.getOperations().stream()
          .map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList());
      return jsc.parallelize(ops, parallelism).flatMap(op -> {
        try {
-          return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant,
+          return getRenamingActionsForUnschedulingCompactionOperation(metaClient, compactionInstant, op,
-              op, Option.of(fsView), skipValidation).iterator();
+              Option.of(fsView), skipValidation).iterator();
        } catch (IOException ioe) {
          throw new HoodieIOException(ioe.getMessage(), ioe);
        } catch (CompactionValidationException ve) {
@@ -434,8 +422,8 @@ public class CompactionAdminClient extends AbstractHoodieClient {
      HoodieTableMetaClient metaClient, String compactionInstant, CompactionOperation operation,
      Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
    List<Pair<HoodieLogFile, HoodieLogFile>> result = new ArrayList<>();
-    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() :
+    HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get()
-        new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
+        : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline());
    if (!skipValidation) {
      validateCompactionOperation(metaClient, compactionInstant, operation, Option.of(fileSystemView));
    }
@@ -445,13 +433,11 @@ public class CompactionAdminClient extends AbstractHoodieClient {
            .filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
    List<HoodieLogFile> logFilesToRepair =
        merged.getLogFiles().filter(lf -> lf.getBaseCommitTime().equals(compactionInstant))
-                    .sorted(HoodieLogFile.getLogFileComparator())
+            .sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
                    .collect(Collectors.toList());
    FileSlice fileSliceForCompaction =
        fileSystemView.getLatestFileSlicesBeforeOrOn(operation.getPartitionPath(), operation.getBaseInstantTime(), true)
            .filter(fs -> fs.getFileId().equals(operation.getFileId())).findFirst().get();
-    int maxUsedVersion =
+    int maxUsedVersion = fileSliceForCompaction.getLogFiles().findFirst().map(HoodieLogFile::getLogVersion)
        fileSliceForCompaction.getLogFiles().findFirst().map(HoodieLogFile::getLogVersion)
        .orElse(HoodieLogFile.LOGFILE_BASE_VERSION - 1);
    String logExtn = fileSliceForCompaction.getLogFiles().findFirst().map(lf -> "." + lf.getFileExtension())
        .orElse(HoodieLogFile.DELTA_EXTENSION);
@@ -479,8 +465,8 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   *         compaction.
   */
  public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionForFileId(
-      HoodieTableMetaClient metaClient, HoodieFileGroupId fgId,
+      HoodieTableMetaClient metaClient, HoodieFileGroupId fgId, Option<HoodieTableFileSystemView> fsViewOpt,
-      Option<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException {
+      boolean skipValidation) throws IOException {
    Map<HoodieFileGroupId, Pair<String, HoodieCompactionOperation>> allPendingCompactions =
        CompactionUtils.getAllPendingCompactionOperations(metaClient);
    if (allPendingCompactions.containsKey(fgId)) {
@@ -496,20 +482,19 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   */
  public static class RenameOpResult extends OperationResult<RenameInfo> {
-    public RenameOpResult() {
+    public RenameOpResult() {}
    public RenameOpResult(Pair<HoodieLogFile, HoodieLogFile> op, boolean success, Option<Exception> exception) {
      super(
          new RenameInfo(op.getKey().getFileId(), op.getKey().getPath().toString(), op.getRight().getPath().toString()),
          success, exception);
    }
-    public RenameOpResult(Pair<HoodieLogFile, HoodieLogFile> op, boolean success,
+    public RenameOpResult(Pair<HoodieLogFile, HoodieLogFile> op, boolean executed, boolean success,
        Option<Exception> exception) {
-      super(new RenameInfo(op.getKey().getFileId(), op.getKey().getPath().toString(),
+      super(
-          op.getRight().getPath().toString()), success, exception);
+          new RenameInfo(op.getKey().getFileId(), op.getKey().getPath().toString(), op.getRight().getPath().toString()),
-    }
+          executed, success, exception);
    public RenameOpResult(
        Pair<HoodieLogFile, HoodieLogFile> op, boolean executed, boolean success,
        Option<Exception> exception) {
      super(new RenameInfo(op.getKey().getFileId(), op.getKey().getPath().toString(),
          op.getRight().getPath().toString()), executed, success, exception);
    }
  }
@@ -518,11 +503,9 @@ public class CompactionAdminClient extends AbstractHoodieClient {
   */
  public static class ValidationOpResult extends OperationResult<CompactionOperation> {
-    public ValidationOpResult() {
+    public ValidationOpResult() {}
    }
-    public ValidationOpResult(
+    public ValidationOpResult(CompactionOperation operation, boolean success, Option<Exception> exception) {
        CompactionOperation operation, boolean success, Option<Exception> exception) {
      super(operation, success, exception);
    }
  }
@@ -533,8 +516,7 @@ public class CompactionAdminClient extends AbstractHoodieClient {
    public String srcPath;
    public String destPath;
-    public RenameInfo() {
+    public RenameInfo() {}
    }
    public RenameInfo(String fileId, String srcPath, String destPath) {
      this.fileId = fileId;
--- a/hudi-client/src/main/java/org/apache/hudi/HoodieReadClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/HoodieReadClient.java
@@ -58,9 +58,8 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  private static final Logger logger = LogManager.getLogger(HoodieReadClient.class);
  /**
-   * TODO: We need to persist the index type into hoodie.properties and be able to access the index
+   * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple
-   * just with a simple basepath pointing to the dataset. Until, then just always assume a
+   * basepath pointing to the dataset. Until, then just always assume a BloomIndex
   * BloomIndex
   */
  private final transient HoodieIndex<T> index;
  private final HoodieTimeline commitTimeline;
@@ -70,13 +69,11 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  /**
   * @param basePath path to Hoodie dataset
   */
-  public HoodieReadClient(JavaSparkContext jsc, String basePath,
+  public HoodieReadClient(JavaSparkContext jsc, String basePath, Option<EmbeddedTimelineService> timelineService) {
      Option<EmbeddedTimelineService> timelineService) {
    this(jsc, HoodieWriteConfig.newBuilder().withPath(basePath)
        // by default we use HoodieBloomIndex
-        .withIndexConfig(
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build(),
-            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        timelineService);
        .build(), timelineService);
  }
  /**
@@ -130,8 +127,7 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  private void assertSqlContext() {
    if (!sqlContextOpt.isPresent()) {
-      throw new IllegalStateException(
+      throw new IllegalStateException("SQLContext must be set, when performing dataframe operations");
          "SQLContext must be set, when performing dataframe operations");
    }
  }
@@ -152,17 +148,16 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
   */
  public Dataset<Row> readROView(JavaRDD<HoodieKey> hoodieKeys, int parallelism) {
    assertSqlContext();
-    JavaPairRDD<HoodieKey, Option<Pair<String, String>>> lookupResultRDD = index
+    JavaPairRDD<HoodieKey, Option<Pair<String, String>>> lookupResultRDD =
-        .fetchRecordLocation(hoodieKeys, jsc, hoodieTable);
+        index.fetchRecordLocation(hoodieKeys, jsc, hoodieTable);
-    JavaPairRDD<HoodieKey, Option<String>> keyToFileRDD = lookupResultRDD
+    JavaPairRDD<HoodieKey, Option<String>> keyToFileRDD =
-        .mapToPair(r -> new Tuple2<>(r._1, convertToDataFilePath(r._2)));
+        lookupResultRDD.mapToPair(r -> new Tuple2<>(r._1, convertToDataFilePath(r._2)));
    List<String> paths = keyToFileRDD.filter(keyFileTuple -> keyFileTuple._2().isPresent())
        .map(keyFileTuple -> keyFileTuple._2().get()).collect();
    // record locations might be same for multiple keys, so need a unique list
    Set<String> uniquePaths = new HashSet<>(paths);
-    Dataset<Row> originalDF = sqlContextOpt.get().read()
+    Dataset<Row> originalDF = sqlContextOpt.get().read().parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
        .parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
    StructType schema = originalDF.schema();
    JavaPairRDD<HoodieKey, Row> keyRowRDD = originalDF.javaRDD().mapToPair(row -> {
      HoodieKey key = new HoodieKey(row.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD),
@@ -176,18 +171,16 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  }
  /**
-   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[FullFilePath]]
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[FullFilePath]] If the optional
-   * If the optional FullFilePath value is not present, then the key is not found. If the
+   * FullFilePath value is not present, then the key is not found. If the FullFilePath value is present, it is the path
-   * FullFilePath value is present, it is the path component (without scheme) of the URI underlying
+   * component (without scheme) of the URI underlying file
   * file
   */
  public JavaPairRDD<HoodieKey, Option<String>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
    return index.fetchRecordLocation(hoodieKeys, jsc, hoodieTable);
  }
  /**
-   * Filter out HoodieRecords that already exists in the output folder. This is useful in
+   * Filter out HoodieRecords that already exists in the output folder. This is useful in deduplication.
   * deduplication.
   *
   * @param hoodieRecords Input RDD of Hoodie records.
   * @return A subset of hoodieRecords RDD, with existing records filtered out.
@@ -198,27 +191,27 @@ public class HoodieReadClient<T extends HoodieRecordPayload> extends AbstractHoo
  }
  /**
-   * Looks up the index and tags each incoming record with a location of a file that contains the
+   * Looks up the index and tags each incoming record with a location of a file that contains the row (if it is actually
-   * row (if it is actually present).  Input RDD should contain no duplicates if needed.
+   * present). Input RDD should contain no duplicates if needed.
   *
   * @param hoodieRecords Input RDD of Hoodie records
   * @return Tagged RDD of Hoodie records
   */
-  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> hoodieRecords)
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> hoodieRecords) throws HoodieIndexException {
      throws HoodieIndexException {
    return index.tagLocation(hoodieRecords, jsc, hoodieTable);
  }
  /**
   * Return all pending compactions with instant time for clients to decide what to compact next.
   * 
   * @return
   */
  public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() {
-    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(),
+    HoodieTableMetaClient metaClient =
-        hoodieTable.getMetaClient().getBasePath(), true);
+        new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true);
    return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream()
-        .map(instantWorkloadPair ->
+        .map(
-            Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue()))
+            instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue()))
        .collect(Collectors.toList());
  }
 }
--- a/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/HoodieWriteClient.java
--- a/hudi-client/src/main/java/org/apache/hudi/WriteStatus.java
+++ b/hudi-client/src/main/java/org/apache/hudi/WriteStatus.java
@@ -64,14 +64,11 @@ public class WriteStatus implements Serializable {
  }
  /**
-   * Mark write as success, optionally using given parameters for the purpose of calculating some
+   * Mark write as success, optionally using given parameters for the purpose of calculating some aggregate metrics.
-   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * This method is not meant to cache passed arguments, since WriteStatus objects are collected in Spark Driver.
   * objects are collected in Spark Driver.
   *
-   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
   * @param optionalRecordMetadata optional metadata related to data contained in {@link
   * HoodieRecord} before deflation.
   */
  public void markSuccess(HoodieRecord record, Option<Map<String, String>> optionalRecordMetadata) {
    if (trackSuccessRecords) {
@@ -81,14 +78,11 @@ public class WriteStatus implements Serializable {
  }
  /**
-   * Mark write as failed, optionally using given parameters for the purpose of calculating some
+   * Mark write as failed, optionally using given parameters for the purpose of calculating some aggregate metrics. This
-   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * method is not meant to cache passed arguments, since WriteStatus objects are collected in Spark Driver.
   * objects are collected in Spark Driver.
   *
-   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
   * @param optionalRecordMetadata optional metadata related to data contained in {@link
   * HoodieRecord} before deflation.
   */
  public void markFailure(HoodieRecord record, Throwable t, Option<Map<String, String>> optionalRecordMetadata) {
    if (failedRecords.isEmpty() || (random.nextDouble() <= failureFraction)) {
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
@@ -40,10 +40,8 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
  // Turn on inline compaction - after fw delta commits a inline compaction will be run
  public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline";
  // Run a compaction every N delta commits
-  public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP =
+  public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max" + ".delta.commits";
-      "hoodie.compact.inline.max" + ".delta.commits";
+  public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = "hoodie.cleaner.fileversions" + ".retained";
  public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
      "hoodie.cleaner.fileversions" + ".retained";
  public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained";
  public static final String MAX_COMMITS_TO_KEEP_PROP = "hoodie.keep.max.commits";
  public static final String MIN_COMMITS_TO_KEEP_PROP = "hoodie.keep.min.commits";
@@ -56,25 +54,21 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
   * Configs related to specific table types
   **/
  // Number of inserts, that will be put each partition/bucket for writing
-  public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE =
+  public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert" + ".split.size";
      "hoodie.copyonwrite.insert" + ".split.size";
  // The rationale to pick the insert parallelism is the following. Writing out 100MB files,
  // with atleast 1kb records, means 100K records per file. we just overprovision to 500K
  public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000);
  // Config to control whether we control insert split sizes automatically based on average
  // record sizes
-  public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS =
+  public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert" + ".auto.split";
      "hoodie.copyonwrite.insert" + ".auto.split";
  // its off by default
  public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(true);
  // This value is used as a guessimate for the record size, if we can't determine this from
  // previous commits
-  public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE =
+  public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite" + ".record.size.estimate";
      "hoodie.copyonwrite" + ".record.size.estimate";
  // Used to determine how much more can be packed into a small file, before it exceeds the size
  // limit.
-  public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String.valueOf(1024);
      .valueOf(1024);
  public static final String CLEANER_PARALLELISM = "hoodie.cleaner.parallelism";
  public static final String DEFAULT_CLEANER_PARALLELISM = String.valueOf(200);
  public static final String TARGET_IO_PER_COMPACTION_IN_MB_PROP = "hoodie.compaction.target.io";
@@ -82,8 +76,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
  public static final String DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB = String.valueOf(500 * 1024);
  public static final String COMPACTION_STRATEGY_PROP = "hoodie.compaction.strategy";
  // 200GB of target IO per compaction
-  public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class
+  public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class.getName();
      .getName();
  // used to merge records written to log file
  public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
  public static final String PAYLOAD_CLASS_PROP = "hoodie.compaction.payload.class";
@@ -91,15 +84,12 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
  // used to choose a trade off between IO vs Memory when performing compaction process
  // Depending on outputfile_size and memory provided, choose true to avoid OOM for large file
  // size + small memory
-  public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP =
+  public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = "hoodie.compaction.lazy" + ".block.read";
      "hoodie.compaction.lazy" + ".block.read";
  public static final String DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED = "false";
  // used to choose whether to enable reverse log reading (reverse log traversal)
-  public static final String COMPACTION_REVERSE_LOG_READ_ENABLED_PROP =
+  public static final String COMPACTION_REVERSE_LOG_READ_ENABLED_PROP = "hoodie.compaction" + ".reverse.log.read";
      "hoodie.compaction" + ".reverse.log.read";
  public static final String DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED = "false";
-  private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS
+  private static final String DEFAULT_CLEANER_POLICY = HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
      .name();
  private static final String DEFAULT_AUTO_CLEAN = "true";
  private static final String DEFAULT_INLINE_COMPACT = "false";
  private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "1";
@@ -108,8 +98,8 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
  private static final String DEFAULT_MAX_COMMITS_TO_KEEP = "30";
  private static final String DEFAULT_MIN_COMMITS_TO_KEEP = "20";
  private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = String.valueOf(10);
-  public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP = "hoodie.compaction.daybased.target"
+  public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP =
-      + ".partitions";
+      "hoodie.compaction.daybased.target" + ".partitions";
  // 500GB of target IO per compaction (both read and write)
  public static final String DEFAULT_TARGET_PARTITIONS_PER_DAYBASED_COMPACTION = String.valueOf(10);
@@ -188,14 +178,12 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
    }
    public Builder autoTuneInsertSplits(boolean autoTuneInsertSplits) {
-      props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS,
+      props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, String.valueOf(autoTuneInsertSplits));
          String.valueOf(autoTuneInsertSplits));
      return this;
    }
    public Builder approxRecordSize(int recordSizeEstimate) {
-      props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+      props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, String.valueOf(recordSizeEstimate));
          String.valueOf(recordSizeEstimate));
      return this;
    }
@@ -215,32 +203,27 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
    }
    public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
-      props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP,
+      props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP, String.valueOf(targetIOPerCompactionInMB));
          String.valueOf(targetIOPerCompactionInMB));
      return this;
    }
    public Builder withMaxNumDeltaCommitsBeforeCompaction(int maxNumDeltaCommitsBeforeCompaction) {
-      props.setProperty(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP,
+      props.setProperty(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, String.valueOf(maxNumDeltaCommitsBeforeCompaction));
          String.valueOf(maxNumDeltaCommitsBeforeCompaction));
      return this;
    }
    public Builder withCompactionLazyBlockReadEnabled(Boolean compactionLazyBlockReadEnabled) {
-      props.setProperty(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
+      props.setProperty(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, String.valueOf(compactionLazyBlockReadEnabled));
          String.valueOf(compactionLazyBlockReadEnabled));
      return this;
    }
    public Builder withCompactionReverseLogReadEnabled(Boolean compactionReverseLogReadEnabled) {
-      props.setProperty(COMPACTION_REVERSE_LOG_READ_ENABLED_PROP,
+      props.setProperty(COMPACTION_REVERSE_LOG_READ_ENABLED_PROP, String.valueOf(compactionReverseLogReadEnabled));
          String.valueOf(compactionReverseLogReadEnabled));
      return this;
    }
    public Builder withTargetPartitionsPerDayBasedCompaction(int targetPartitionsPerCompaction) {
-      props.setProperty(TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP,
+      props.setProperty(TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP, String.valueOf(targetPartitionsPerCompaction));
          String.valueOf(targetPartitionsPerCompaction));
      return this;
    }
@@ -251,8 +234,7 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
    public HoodieCompactionConfig build() {
      HoodieCompactionConfig config = new HoodieCompactionConfig(props);
-      setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP), AUTO_CLEAN_PROP,
+      setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP), AUTO_CLEAN_PROP, DEFAULT_AUTO_CLEAN);
          DEFAULT_AUTO_CLEAN);
      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_PROP), INLINE_COMPACT_PROP,
          DEFAULT_INLINE_COMPACT);
      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP),
@@ -261,27 +243,25 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
          DEFAULT_CLEANER_POLICY);
      setDefaultOnCondition(props, !props.containsKey(CLEANER_FILE_VERSIONS_RETAINED_PROP),
          CLEANER_FILE_VERSIONS_RETAINED_PROP, DEFAULT_CLEANER_FILE_VERSIONS_RETAINED);
-      setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP),
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP), CLEANER_COMMITS_RETAINED_PROP,
-          CLEANER_COMMITS_RETAINED_PROP, DEFAULT_CLEANER_COMMITS_RETAINED);
+          DEFAULT_CLEANER_COMMITS_RETAINED);
      setDefaultOnCondition(props, !props.containsKey(MAX_COMMITS_TO_KEEP_PROP), MAX_COMMITS_TO_KEEP_PROP,
          DEFAULT_MAX_COMMITS_TO_KEEP);
      setDefaultOnCondition(props, !props.containsKey(MIN_COMMITS_TO_KEEP_PROP), MIN_COMMITS_TO_KEEP_PROP,
          DEFAULT_MIN_COMMITS_TO_KEEP);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES), PARQUET_SMALL_FILE_LIMIT_BYTES,
-          PARQUET_SMALL_FILE_LIMIT_BYTES, DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
+          DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE),
          COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE);
      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS),
          COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS);
      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE),
-          COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+          COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
          DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
      setDefaultOnCondition(props, !props.containsKey(CLEANER_PARALLELISM), CLEANER_PARALLELISM,
          DEFAULT_CLEANER_PARALLELISM);
-      setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP),
+      setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP), COMPACTION_STRATEGY_PROP,
-          COMPACTION_STRATEGY_PROP, DEFAULT_COMPACTION_STRATEGY);
+          DEFAULT_COMPACTION_STRATEGY);
-      setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS_PROP),
+      setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS_PROP), PAYLOAD_CLASS_PROP, DEFAULT_PAYLOAD_CLASS);
          PAYLOAD_CLASS_PROP, DEFAULT_PAYLOAD_CLASS);
      setDefaultOnCondition(props, !props.containsKey(TARGET_IO_PER_COMPACTION_IN_MB_PROP),
          TARGET_IO_PER_COMPACTION_IN_MB_PROP, DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB);
      setDefaultOnCondition(props, !props.containsKey(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP),
@@ -299,13 +279,15 @@ public class HoodieCompactionConfig extends DefaultHoodieConfig {
      // commit instant on timeline, that still has not been cleaned. Could miss some data via incr pull
      int minInstantsToKeep = Integer.parseInt(props.getProperty(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP_PROP));
      int maxInstantsToKeep = Integer.parseInt(props.getProperty(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP_PROP));
-      int cleanerCommitsRetained = Integer
+      int cleanerCommitsRetained =
-          .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
+          Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
      Preconditions.checkArgument(maxInstantsToKeep > minInstantsToKeep);
      Preconditions.checkArgument(minInstantsToKeep > cleanerCommitsRetained,
-          String.format("Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
+          String.format(
-                  + "missing data from few instants.", HoodieCompactionConfig.MIN_COMMITS_TO_KEEP_PROP,
+              "Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
-              minInstantsToKeep, HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, cleanerCommitsRetained));
+                  + "missing data from few instants.",
              HoodieCompactionConfig.MIN_COMMITS_TO_KEEP_PROP, minInstantsToKeep,
              HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP, cleanerCommitsRetained));
      return config;
    }
  }
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieHBaseIndexConfig.java
@@ -32,8 +32,8 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
  public static final String HBASE_GET_BATCH_SIZE_PROP = "hoodie.index.hbase.get.batch.size";
  public static final String HBASE_ZK_ZNODEPARENT = "hoodie.index.hbase.zknode.path";
  /**
-   * Note that if HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP is set to true, this batch size will not
+   * Note that if HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP is set to true, this batch size will not be honored for HBase
-   * be honored for HBase Puts
+   * Puts
   */
  public static final String HBASE_PUT_BATCH_SIZE_PROP = "hoodie.index.hbase.put.batch.size";
@@ -48,18 +48,16 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
  public static final String HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP = "hoodie.index.hbase.put.batch.size.autocompute";
  public static final String DEFAULT_HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE = "false";
  /**
-   * Property to set the fraction of the global share of QPS that should be allocated to this job.
+   * Property to set the fraction of the global share of QPS that should be allocated to this job. Let's say there are 3
-   * Let's say there are 3 jobs which have input size in terms of number of rows required for
+   * jobs which have input size in terms of number of rows required for HbaseIndexing as x, 2x, 3x respectively. Then
-   * HbaseIndexing as x, 2x, 3x respectively. Then this fraction for the jobs would be (0.17) 1/6,
+   * this fraction for the jobs would be (0.17) 1/6, 0.33 (2/6) and 0.5 (3/6) respectively.
   * 0.33 (2/6) and 0.5 (3/6) respectively.
   */
  public static final String HBASE_QPS_FRACTION_PROP = "hoodie.index.hbase.qps.fraction";
  /**
-   * Property to set maximum QPS allowed per Region Server. This should be same across various
+   * Property to set maximum QPS allowed per Region Server. This should be same across various jobs. This is intended to
-   * jobs. This is intended to limit the aggregate QPS generated across various jobs to an Hbase
+   * limit the aggregate QPS generated across various jobs to an Hbase Region Server. It is recommended to set this
-   * Region Server. It is recommended to set this value based on global indexing throughput needs
+   * value based on global indexing throughput needs and most importantly, how much the HBase installation in use is
-   * and most importantly, how much the HBase installation in use is able to tolerate without
+   * able to tolerate without Region Servers going down.
   * Region Servers going down.
   */
  public static String HBASE_MAX_QPS_PER_REGION_SERVER_PROP = "hoodie.index.hbase.max.qps.per.region.server";
  /**
@@ -71,8 +69,7 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
   */
  public static final int DEFAULT_HBASE_MAX_QPS_PER_REGION_SERVER = 1000;
  /**
-   * Default is 50%, which means a total of 2 jobs can run using HbaseIndex without overwhelming
+   * Default is 50%, which means a total of 2 jobs can run using HbaseIndex without overwhelming Region Servers
   * Region Servers
   */
  public static final float DEFAULT_HBASE_QPS_FRACTION = 0.5f;
@@ -218,18 +215,15 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
    /**
     * <p>
-     * Method to set maximum QPS allowed per Region Server. This should be same across various
+     * Method to set maximum QPS allowed per Region Server. This should be same across various jobs. This is intended to
-     * jobs. This is intended to limit the aggregate QPS generated across various jobs to an
+     * limit the aggregate QPS generated across various jobs to an Hbase Region Server.
     * Hbase Region Server.
     * </p>
     * <p>
-     * It is recommended to set this value based on your global indexing throughput needs and
+     * It is recommended to set this value based on your global indexing throughput needs and most importantly, how much
-     * most importantly, how much your HBase installation is able to tolerate without Region
+     * your HBase installation is able to tolerate without Region Servers going down.
     * Servers going down.
     * </p>
     */
-    public HoodieHBaseIndexConfig.Builder hbaseIndexMaxQPSPerRegionServer(
+    public HoodieHBaseIndexConfig.Builder hbaseIndexMaxQPSPerRegionServer(int maxQPSPerRegionServer) {
        int maxQPSPerRegionServer) {
      // This should be same across various jobs
      props.setProperty(HoodieHBaseIndexConfig.HBASE_MAX_QPS_PER_REGION_SERVER_PROP,
          String.valueOf(maxQPSPerRegionServer));
@@ -238,30 +232,30 @@ public class HoodieHBaseIndexConfig extends DefaultHoodieConfig {
    public HoodieHBaseIndexConfig build() {
      HoodieHBaseIndexConfig config = new HoodieHBaseIndexConfig(props);
-      setDefaultOnCondition(props, !props.containsKey(HBASE_GET_BATCH_SIZE_PROP),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_GET_BATCH_SIZE_PROP), HBASE_GET_BATCH_SIZE_PROP,
-          HBASE_GET_BATCH_SIZE_PROP, String.valueOf(DEFAULT_HBASE_BATCH_SIZE));
+          String.valueOf(DEFAULT_HBASE_BATCH_SIZE));
-      setDefaultOnCondition(props, !props.containsKey(HBASE_PUT_BATCH_SIZE_PROP),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_PUT_BATCH_SIZE_PROP), HBASE_PUT_BATCH_SIZE_PROP,
-          HBASE_PUT_BATCH_SIZE_PROP, String.valueOf(DEFAULT_HBASE_BATCH_SIZE));
+          String.valueOf(DEFAULT_HBASE_BATCH_SIZE));
      setDefaultOnCondition(props, !props.containsKey(HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP),
          HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE_PROP, String.valueOf(DEFAULT_HBASE_PUT_BATCH_SIZE_AUTO_COMPUTE));
-      setDefaultOnCondition(props, !props.containsKey(HBASE_QPS_FRACTION_PROP),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_QPS_FRACTION_PROP), HBASE_QPS_FRACTION_PROP,
-          HBASE_QPS_FRACTION_PROP, String.valueOf(DEFAULT_HBASE_QPS_FRACTION));
+          String.valueOf(DEFAULT_HBASE_QPS_FRACTION));
      setDefaultOnCondition(props, !props.containsKey(HBASE_MAX_QPS_PER_REGION_SERVER_PROP),
          HBASE_MAX_QPS_PER_REGION_SERVER_PROP, String.valueOf(DEFAULT_HBASE_MAX_QPS_PER_REGION_SERVER));
      setDefaultOnCondition(props, !props.containsKey(HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY),
          HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY, String.valueOf(DEFAULT_HOODIE_INDEX_COMPUTE_QPS_DYNAMICALLY));
-      setDefaultOnCondition(props, !props.containsKey(HBASE_INDEX_QPS_ALLOCATOR_CLASS),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_INDEX_QPS_ALLOCATOR_CLASS), HBASE_INDEX_QPS_ALLOCATOR_CLASS,
-          HBASE_INDEX_QPS_ALLOCATOR_CLASS, String.valueOf(DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
+          String.valueOf(DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
      setDefaultOnCondition(props, !props.containsKey(HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS),
          HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS, String.valueOf(DEFAULT_HOODIE_INDEX_DESIRED_PUTS_TIME_IN_SECS));
-      setDefaultOnCondition(props, !props.containsKey(HBASE_ZK_PATH_QPS_ROOT),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_ZK_PATH_QPS_ROOT), HBASE_ZK_PATH_QPS_ROOT,
-          HBASE_ZK_PATH_QPS_ROOT, String.valueOf(DEFAULT_HBASE_ZK_PATH_QPS_ROOT));
+          String.valueOf(DEFAULT_HBASE_ZK_PATH_QPS_ROOT));
      setDefaultOnCondition(props, !props.containsKey(HOODIE_INDEX_HBASE_ZK_SESSION_TIMEOUT_MS),
          HOODIE_INDEX_HBASE_ZK_SESSION_TIMEOUT_MS, String.valueOf(DEFAULT_ZK_SESSION_TIMEOUT_MS));
      setDefaultOnCondition(props, !props.containsKey(HOODIE_INDEX_HBASE_ZK_CONNECTION_TIMEOUT_MS),
          HOODIE_INDEX_HBASE_ZK_CONNECTION_TIMEOUT_MS, String.valueOf(DEFAULT_ZK_CONNECTION_TIMEOUT_MS));
-      setDefaultOnCondition(props, !props.containsKey(HBASE_INDEX_QPS_ALLOCATOR_CLASS),
+      setDefaultOnCondition(props, !props.containsKey(HBASE_INDEX_QPS_ALLOCATOR_CLASS), HBASE_INDEX_QPS_ALLOCATOR_CLASS,
-          HBASE_INDEX_QPS_ALLOCATOR_CLASS, String.valueOf(DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
+          String.valueOf(DEFAULT_HBASE_INDEX_QPS_ALLOCATOR_CLASS));
      return config;
    }
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
@@ -42,8 +42,7 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
  public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
  // Disable explicit bloom index parallelism setting by default - hoodie auto computes
  public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
-  public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP =
+  public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by" + ".ranges";
      "hoodie.bloom.index.prune.by" + ".ranges";
  public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
  public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
  public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
@@ -67,8 +66,7 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
  public static final String DEFAULT_HBASE_BATCH_SIZE = "100";
-  public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL =
+  public static final String BLOOM_INDEX_INPUT_STORAGE_LEVEL = "hoodie.bloom.index.input.storage" + ".level";
      "hoodie.bloom.index.input.storage" + ".level";
  public static final String DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
  private HoodieIndexConfig(Properties props) {
@@ -175,20 +173,18 @@ public class HoodieIndexConfig extends DefaultHoodieConfig {
    public HoodieIndexConfig build() {
      HoodieIndexConfig config = new HoodieIndexConfig(props);
-      setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), INDEX_TYPE_PROP,
+      setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP), INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE);
-          DEFAULT_INDEX_TYPE);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES), BLOOM_FILTER_NUM_ENTRIES,
-      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES),
+          DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
-          BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP), BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP);
-      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP), BLOOM_FILTER_FPP,
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP), BLOOM_INDEX_PARALLELISM_PROP,
-          DEFAULT_BLOOM_FILTER_FPP);
+          DEFAULT_BLOOM_INDEX_PARALLELISM);
      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP),
          BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM);
      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP),
          BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES);
-      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP),
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP), BLOOM_INDEX_USE_CACHING_PROP,
-          BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
+          DEFAULT_BLOOM_INDEX_USE_CACHING);
-      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL),
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_INPUT_STORAGE_LEVEL), BLOOM_INDEX_INPUT_STORAGE_LEVEL,
-          BLOOM_INDEX_INPUT_STORAGE_LEVEL, DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL);
+          DEFAULT_BLOOM_INDEX_INPUT_STORAGE_LEVEL);
      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_TREE_BASED_FILTER_PROP),
          BLOOM_INDEX_TREE_BASED_FILTER_PROP, DEFAULT_BLOOM_INDEX_TREE_BASED_FILTER);
      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_BUCKETIZED_CHECKING_PROP),
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieMemoryConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieMemoryConfig.java
@@ -41,8 +41,7 @@ public class HoodieMemoryConfig extends DefaultHoodieConfig {
  // Default max memory fraction during compaction, excess spills to disk
  public static final String DEFAULT_MAX_MEMORY_FRACTION_FOR_COMPACTION = String.valueOf(0.6);
  // Default memory size per compaction (used if SparkEnv is absent), excess spills to disk
-  public static final long DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES =
+  public static final long DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES = 1024 * 1024 * 1024L; // 1GB
      1024 * 1024 * 1024L; // 1GB
  // Property to set the max memory for merge
  public static final String MAX_MEMORY_FOR_MERGE_PROP = "hoodie.memory.merge.max.size";
  // Property to set the max memory for compaction
@@ -88,20 +87,17 @@ public class HoodieMemoryConfig extends DefaultHoodieConfig {
    }
    public Builder withMaxMemoryFractionPerPartitionMerge(double maxMemoryFractionPerPartitionMerge) {
-      props.setProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP,
+      props.setProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP, String.valueOf(maxMemoryFractionPerPartitionMerge));
          String.valueOf(maxMemoryFractionPerPartitionMerge));
      return this;
    }
    public Builder withMaxMemoryFractionPerCompaction(double maxMemoryFractionPerCompaction) {
-      props.setProperty(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP,
+      props.setProperty(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP, String.valueOf(maxMemoryFractionPerCompaction));
          String.valueOf(maxMemoryFractionPerCompaction));
      return this;
    }
    public Builder withMaxDFSStreamBufferSize(int maxStreamBufferSize) {
-      props.setProperty(MAX_DFS_STREAM_BUFFER_SIZE_PROP,
+      props.setProperty(MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(maxStreamBufferSize));
          String.valueOf(maxStreamBufferSize));
      return this;
    }
@@ -130,19 +126,16 @@ public class HoodieMemoryConfig extends DefaultHoodieConfig {
      if (SparkEnv.get() != null) {
        // 1 GB is the default conf used by Spark, look at SparkContext.scala
-        long executorMemoryInBytes = Utils.memoryStringToMb(SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_PROP,
+        long executorMemoryInBytes = Utils.memoryStringToMb(
-            DEFAULT_SPARK_EXECUTOR_MEMORY_MB)) * 1024
+            SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_PROP, DEFAULT_SPARK_EXECUTOR_MEMORY_MB)) * 1024 * 1024L;
            * 1024L;
        // 0.6 is the default value used by Spark,
        // look at {@link
        // https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/SparkConf.scala#L507}
-        double memoryFraction = Double
+        double memoryFraction = Double.valueOf(
-            .valueOf(SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_FRACTION_PROP,
+            SparkEnv.get().conf().get(SPARK_EXECUTOR_MEMORY_FRACTION_PROP, DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION));
                DEFAULT_SPARK_EXECUTOR_MEMORY_FRACTION));
        double maxMemoryFractionForMerge = Double.valueOf(maxMemoryFraction);
        double userAvailableMemory = executorMemoryInBytes * (1 - memoryFraction);
-        long maxMemoryForMerge = (long) Math
+        long maxMemoryForMerge = (long) Math.floor(userAvailableMemory * maxMemoryFractionForMerge);
            .floor(userAvailableMemory * maxMemoryFractionForMerge);
        return maxMemoryForMerge;
      } else {
        return DEFAULT_MAX_MEMORY_FOR_SPILLABLE_MAP_IN_BYTES;
@@ -151,29 +144,19 @@ public class HoodieMemoryConfig extends DefaultHoodieConfig {
    public HoodieMemoryConfig build() {
      HoodieMemoryConfig config = new HoodieMemoryConfig(props);
-      setDefaultOnCondition(props,
+      setDefaultOnCondition(props, !props.containsKey(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP),
-          !props.containsKey(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP),
+          MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP, DEFAULT_MAX_MEMORY_FRACTION_FOR_COMPACTION);
-          MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP,
+      setDefaultOnCondition(props, !props.containsKey(MAX_MEMORY_FRACTION_FOR_MERGE_PROP),
          DEFAULT_MAX_MEMORY_FRACTION_FOR_COMPACTION);
      setDefaultOnCondition(props,
          !props.containsKey(MAX_MEMORY_FRACTION_FOR_MERGE_PROP),
          MAX_MEMORY_FRACTION_FOR_MERGE_PROP, DEFAULT_MAX_MEMORY_FRACTION_FOR_MERGE);
-      setDefaultOnCondition(props,
+      setDefaultOnCondition(props, !props.containsKey(MAX_MEMORY_FOR_MERGE_PROP), MAX_MEMORY_FOR_MERGE_PROP,
-          !props.containsKey(MAX_MEMORY_FOR_MERGE_PROP),
+          String.valueOf(getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP))));
-          MAX_MEMORY_FOR_MERGE_PROP, String.valueOf(
+      setDefaultOnCondition(props, !props.containsKey(MAX_MEMORY_FOR_COMPACTION_PROP), MAX_MEMORY_FOR_COMPACTION_PROP,
-              getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_MERGE_PROP))));
+          String.valueOf(getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP))));
-      setDefaultOnCondition(props,
+      setDefaultOnCondition(props, !props.containsKey(MAX_DFS_STREAM_BUFFER_SIZE_PROP), MAX_DFS_STREAM_BUFFER_SIZE_PROP,
-          !props.containsKey(MAX_MEMORY_FOR_COMPACTION_PROP),
+          String.valueOf(DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE));
-          MAX_MEMORY_FOR_COMPACTION_PROP, String.valueOf(
+      setDefaultOnCondition(props, !props.containsKey(SPILLABLE_MAP_BASE_PATH_PROP), SPILLABLE_MAP_BASE_PATH_PROP,
-              getMaxMemoryAllowedForMerge(props.getProperty(MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP))));
+          DEFAULT_SPILLABLE_MAP_BASE_PATH);
-      setDefaultOnCondition(props,
+      setDefaultOnCondition(props, !props.containsKey(WRITESTATUS_FAILURE_FRACTION_PROP),
          !props.containsKey(MAX_DFS_STREAM_BUFFER_SIZE_PROP),
          MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE));
      setDefaultOnCondition(props,
          !props.containsKey(SPILLABLE_MAP_BASE_PATH_PROP),
          SPILLABLE_MAP_BASE_PATH_PROP, DEFAULT_SPILLABLE_MAP_BASE_PATH);
      setDefaultOnCondition(props,
          !props.containsKey(WRITESTATUS_FAILURE_FRACTION_PROP),
          WRITESTATUS_FAILURE_FRACTION_PROP, String.valueOf(DEFAULT_WRITESTATUS_FAILURE_FRACTION));
      return config;
    }
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieMetricsConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieMetricsConfig.java
@@ -35,8 +35,7 @@ public class HoodieMetricsConfig extends DefaultHoodieConfig {
  public static final String METRICS_ON = METRIC_PREFIX + ".on";
  public static final boolean DEFAULT_METRICS_ON = false;
  public static final String METRICS_REPORTER_TYPE = METRIC_PREFIX + ".reporter.type";
-  public static final MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE = MetricsReporterType
+  public static final MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE = MetricsReporterType.GRAPHITE;
      .GRAPHITE;
  // Graphite
  public static final String GRAPHITE_PREFIX = METRIC_PREFIX + ".graphite";
@@ -103,8 +102,7 @@ public class HoodieMetricsConfig extends DefaultHoodieConfig {
    public HoodieMetricsConfig build() {
      HoodieMetricsConfig config = new HoodieMetricsConfig(props);
-      setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON,
+      setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON, String.valueOf(DEFAULT_METRICS_ON));
          String.valueOf(DEFAULT_METRICS_ON));
      setDefaultOnCondition(props, !props.containsKey(METRICS_REPORTER_TYPE), METRICS_REPORTER_TYPE,
          DEFAULT_METRICS_REPORTER_TYPE.name());
      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_HOST), GRAPHITE_SERVER_HOST,
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
@@ -38,8 +38,7 @@ public class HoodieStorageConfig extends DefaultHoodieConfig {
  public static final String DEFAULT_PARQUET_PAGE_SIZE_BYTES = String.valueOf(1 * 1024 * 1024);
  // used to size log files
  public static final String LOGFILE_SIZE_MAX_BYTES = "hoodie.logfile.max.size";
-  public static final String DEFAULT_LOGFILE_SIZE_MAX_BYTES = String
+  public static final String DEFAULT_LOGFILE_SIZE_MAX_BYTES = String.valueOf(1024 * 1024 * 1024); // 1 GB
      .valueOf(1024 * 1024 * 1024); // 1 GB
  // used to size data blocks in log file
  public static final String LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES = "hoodie.logfile.data.block.max.size";
  public static final String DEFAULT_LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES = String.valueOf(256 * 1024 * 1024); // 256 MB
@@ -122,20 +121,20 @@ public class HoodieStorageConfig extends DefaultHoodieConfig {
    public HoodieStorageConfig build() {
      HoodieStorageConfig config = new HoodieStorageConfig(props);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES), PARQUET_FILE_MAX_BYTES,
-          PARQUET_FILE_MAX_BYTES, DEFAULT_PARQUET_FILE_MAX_BYTES);
+          DEFAULT_PARQUET_FILE_MAX_BYTES);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES), PARQUET_BLOCK_SIZE_BYTES,
-          PARQUET_BLOCK_SIZE_BYTES, DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
+          DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES), PARQUET_PAGE_SIZE_BYTES,
-          PARQUET_PAGE_SIZE_BYTES, DEFAULT_PARQUET_PAGE_SIZE_BYTES);
+          DEFAULT_PARQUET_PAGE_SIZE_BYTES);
      setDefaultOnCondition(props, !props.containsKey(LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES),
          LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES, DEFAULT_LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES);
-      setDefaultOnCondition(props, !props.containsKey(LOGFILE_SIZE_MAX_BYTES),
+      setDefaultOnCondition(props, !props.containsKey(LOGFILE_SIZE_MAX_BYTES), LOGFILE_SIZE_MAX_BYTES,
-          LOGFILE_SIZE_MAX_BYTES, DEFAULT_LOGFILE_SIZE_MAX_BYTES);
+          DEFAULT_LOGFILE_SIZE_MAX_BYTES);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_COMPRESSION_RATIO),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_COMPRESSION_RATIO), PARQUET_COMPRESSION_RATIO,
-          PARQUET_COMPRESSION_RATIO, DEFAULT_STREAM_COMPRESSION_RATIO);
+          DEFAULT_STREAM_COMPRESSION_RATIO);
-      setDefaultOnCondition(props, !props.containsKey(PARQUET_COMPRESSION_CODEC),
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_COMPRESSION_CODEC), PARQUET_COMPRESSION_CODEC,
-          PARQUET_COMPRESSION_CODEC, DEFAULT_PARQUET_COMPRESSION_CODEC);
+          DEFAULT_PARQUET_COMPRESSION_CODEC);
      setDefaultOnCondition(props, !props.containsKey(LOGFILE_TO_PARQUET_COMPRESSION_RATIO),
          LOGFILE_TO_PARQUET_COMPRESSION_RATIO, DEFAULT_LOGFILE_TO_PARQUET_COMPRESSION_RATIO);
      return config;
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -61,8 +61,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
  private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
  private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
-  private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP =
+  private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date" + ".partitioning";
      "hoodie.assume.date" + ".partitioning";
  private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
  private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
  private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
@@ -143,8 +142,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public int getWriteBufferLimitBytes() {
-    return Integer
+    return Integer.parseInt(props.getProperty(WRITE_BUFFER_LIMIT_BYTES, DEFAULT_WRITE_BUFFER_LIMIT_BYTES));
        .parseInt(props.getProperty(WRITE_BUFFER_LIMIT_BYTES, DEFAULT_WRITE_BUFFER_LIMIT_BYTES));
  }
  public boolean shouldCombineBeforeInsert() {
@@ -191,18 +189,15 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
   * compaction properties
   **/
  public HoodieCleaningPolicy getCleanerPolicy() {
-    return HoodieCleaningPolicy
+    return HoodieCleaningPolicy.valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
        .valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
  }
  public int getCleanerFileVersionsRetained() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
        .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
  }
  public int getCleanerCommitsRetained() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
        .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
  }
  public int getMaxCommitsToKeep() {
@@ -214,23 +209,19 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public int getParquetSmallFileLimit() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
        .parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
  }
  public int getCopyOnWriteInsertSplitSize() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
        .parseInt(props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
  }
  public int getCopyOnWriteRecordSizeEstimate() {
-    return Integer.parseInt(
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
  }
  public boolean shouldAutoTuneInsertSplits() {
-    return Boolean.parseBoolean(
+    return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
  }
  public int getCleanerParallelism() {
@@ -246,28 +237,23 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public int getInlineCompactDeltaCommitMax() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
        .parseInt(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
  }
  public CompactionStrategy getCompactionStrategy() {
-    return ReflectionUtils
+    return ReflectionUtils.loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
        .loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
  }
  public Long getTargetIOPerCompactionInMB() {
-    return Long
+    return Long.parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
        .parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
  }
  public Boolean getCompactionLazyBlockReadEnabled() {
-    return Boolean
+    return Boolean.valueOf(props.getProperty(HoodieCompactionConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP));
        .valueOf(props.getProperty(HoodieCompactionConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP));
  }
  public Boolean getCompactionReverseLogReadEnabled() {
-    return Boolean.valueOf(
+    return Boolean.valueOf(props.getProperty(HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLED_PROP));
        props.getProperty(HoodieCompactionConfig.COMPACTION_REVERSE_LOG_READ_ENABLED_PROP));
  }
  public String getPayloadClass() {
@@ -275,13 +261,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public int getTargetPartitionsPerDayBasedCompaction() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP));
        .parseInt(props.getProperty(HoodieCompactionConfig.TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP));
  }
  public int getCommitArchivalBatchSize() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.COMMITS_ARCHIVAL_BATCH_SIZE_PROP));
        .parseInt(props.getProperty(HoodieCompactionConfig.COMMITS_ARCHIVAL_BATCH_SIZE_PROP));
  }
  /**
@@ -352,9 +336,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  /**
-   * Fraction of the global share of QPS that should be allocated to this job.
+   * Fraction of the global share of QPS that should be allocated to this job. Let's say there are 3 jobs which have
-   * Let's say there are 3 jobs which have input size in terms of number of rows
+   * input size in terms of number of rows required for HbaseIndexing as x, 2x, 3x respectively. Then this fraction for
   * required for HbaseIndexing as x, 2x, 3x respectively. Then this fraction for
   * the jobs would be (0.17) 1/6, 0.33 (2/6) and 0.5 (3/6) respectively.
   */
  public float getHbaseIndexQPSFraction() {
@@ -370,8 +353,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  /**
-   * This should be same across various jobs. This is intended to limit the aggregate
+   * This should be same across various jobs. This is intended to limit the aggregate QPS generated across various
-   * QPS generated across various Hoodie jobs to an Hbase Region Server
+   * Hoodie jobs to an Hbase Region Server
   */
  public int getHbaseIndexMaxQPSPerRegionServer() {
    return Integer.parseInt(props.getProperty(HoodieHBaseIndexConfig.HBASE_MAX_QPS_PER_REGION_SERVER_PROP));
@@ -382,8 +365,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public boolean getBloomIndexPruneByRanges() {
-    return Boolean
+    return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
        .parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
  }
  public boolean getBloomIndexUseCaching() {
@@ -403,8 +385,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public StorageLevel getBloomIndexInputStorageLevel() {
-    return StorageLevel
+    return StorageLevel.fromString(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_INPUT_STORAGE_LEVEL));
        .fromString(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_INPUT_STORAGE_LEVEL));
  }
  /**
@@ -423,8 +404,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public int getLogFileDataBlockMaxSize() {
-    return Integer
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES));
        .parseInt(props.getProperty(HoodieStorageConfig.LOGFILE_DATA_BLOCK_SIZE_MAX_BYTES));
  }
  public int getLogFileMaxSize() {
@@ -451,8 +431,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public MetricsReporterType getMetricsReporterType() {
-    return MetricsReporterType
+    return MetricsReporterType.valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
        .valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
  }
  public String getGraphiteServerHost() {
@@ -475,9 +454,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
  }
  public Double getMaxMemoryFractionPerCompaction() {
-    return Double
+    return Double.valueOf(props.getProperty(HoodieMemoryConfig.MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP));
        .valueOf(
            props.getProperty(HoodieMemoryConfig.MAX_MEMORY_FRACTION_FOR_COMPACTION_PROP));
  }
  public Long getMaxMemoryPerPartitionMerge() {
@@ -637,8 +614,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
    }
    public Builder withAssumeDatePartitioning(boolean assumeDatePartitioning) {
-      props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP,
+      props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP, String.valueOf(assumeDatePartitioning));
          String.valueOf(assumeDatePartitioning));
      return this;
    }
@@ -671,48 +647,42 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
    public HoodieWriteConfig build() {
      // Check for mandatory properties
-      setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
+      setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM, DEFAULT_PARALLELISM);
      setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM), BULKINSERT_PARALLELISM,
          DEFAULT_PARALLELISM);
-      setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM),
+      setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM, DEFAULT_PARALLELISM);
-          BULKINSERT_PARALLELISM, DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP), COMBINE_BEFORE_INSERT_PROP,
-      setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM,
+          DEFAULT_COMBINE_BEFORE_INSERT);
-          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP), COMBINE_BEFORE_UPSERT_PROP,
-      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP),
+          DEFAULT_COMBINE_BEFORE_UPSERT);
-          COMBINE_BEFORE_INSERT_PROP, DEFAULT_COMBINE_BEFORE_INSERT);
+      setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL), WRITE_STATUS_STORAGE_LEVEL,
-      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP),
+          DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
-          COMBINE_BEFORE_UPSERT_PROP, DEFAULT_COMBINE_BEFORE_UPSERT);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP), HOODIE_AUTO_COMMIT_PROP,
-      setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL),
+          DEFAULT_HOODIE_AUTO_COMMIT);
          WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
      setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
          HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
      setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
          HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
-      setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP), HOODIE_WRITE_STATUS_CLASS_PROP,
-          HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
+          DEFAULT_HOODIE_WRITE_STATUS_CLASS);
-      setDefaultOnCondition(props, !props.containsKey(FINALIZE_WRITE_PARALLELISM),
+      setDefaultOnCondition(props, !props.containsKey(FINALIZE_WRITE_PARALLELISM), FINALIZE_WRITE_PARALLELISM,
-          FINALIZE_WRITE_PARALLELISM, DEFAULT_FINALIZE_WRITE_PARALLELISM);
+          DEFAULT_FINALIZE_WRITE_PARALLELISM);
      setDefaultOnCondition(props, !props.containsKey(EMBEDDED_TIMELINE_SERVER_ENABLED),
          EMBEDDED_TIMELINE_SERVER_ENABLED, DEFAULT_EMBEDDED_TIMELINE_SERVER_ENABLED);
      setDefaultOnCondition(props, !props.containsKey(INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
          INITIAL_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_INITIAL_CONSISTENCY_CHECK_INTERVAL_MS));
      setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP),
          MAX_CONSISTENCY_CHECK_INTERVAL_MS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECK_INTERVAL_MS));
-      setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP),
+      setDefaultOnCondition(props, !props.containsKey(MAX_CONSISTENCY_CHECKS_PROP), MAX_CONSISTENCY_CHECKS_PROP,
-          MAX_CONSISTENCY_CHECKS_PROP, String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
+          String.valueOf(DEFAULT_MAX_CONSISTENCY_CHECKS));
      setDefaultOnCondition(props, !props.containsKey(FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP),
          FAIL_ON_TIMELINE_ARCHIVING_ENABLED_PROP, DEFAULT_FAIL_ON_TIMELINE_ARCHIVING_ENABLED);
      // Make sure the props is propagated
-      setDefaultOnCondition(props, !isIndexConfigSet,
+      setDefaultOnCondition(props, !isIndexConfigSet, HoodieIndexConfig.newBuilder().fromProperties(props).build());
-          HoodieIndexConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isStorageConfigSet, HoodieStorageConfig.newBuilder().fromProperties(props).build());
      setDefaultOnCondition(props, !isStorageConfigSet,
          HoodieStorageConfig.newBuilder().fromProperties(props).build());
      setDefaultOnCondition(props, !isCompactionConfigSet,
          HoodieCompactionConfig.newBuilder().fromProperties(props).build());
-      setDefaultOnCondition(props, !isMetricsConfigSet,
+      setDefaultOnCondition(props, !isMetricsConfigSet, HoodieMetricsConfig.newBuilder().fromProperties(props).build());
-          HoodieMetricsConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isMemoryConfigSet, HoodieMemoryConfig.newBuilder().fromProperties(props).build());
      setDefaultOnCondition(props, !isMemoryConfigSet,
          HoodieMemoryConfig.newBuilder().fromProperties(props).build());
      setDefaultOnCondition(props, !isViewConfigSet,
          FileSystemViewStorageConfig.newBuilder().fromProperties(props).build());
      setDefaultOnCondition(props, !isConsistencyGuardSet,
--- a/hudi-client/src/main/java/org/apache/hudi/exception/HoodieAppendException.java
+++ b/hudi-client/src/main/java/org/apache/hudi/exception/HoodieAppendException.java
@@ -19,8 +19,9 @@
 package org.apache.hudi.exception;
 /**
- * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta
+ * <p>
- * commit </p>
+ * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta commit
 * </p>
 */
 public class HoodieAppendException extends HoodieException {
--- a/hudi-client/src/main/java/org/apache/hudi/exception/HoodieCommitException.java
+++ b/hudi-client/src/main/java/org/apache/hudi/exception/HoodieCommitException.java
@@ -19,7 +19,8 @@
 package org.apache.hudi.exception;
 /**
- * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
+ * <p>
 * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
 * </p>
 */
 public class HoodieCommitException extends HoodieException {
--- a/hudi-client/src/main/java/org/apache/hudi/exception/HoodieDependentSystemUnavailableException.java
+++ b/hudi-client/src/main/java/org/apache/hudi/exception/HoodieDependentSystemUnavailableException.java
@@ -20,7 +20,9 @@ package org.apache.hudi.exception;
 /**
- * <p> Exception thrown when dependent system is not available </p>
+ * <p>
 * Exception thrown when dependent system is not available
 * </p>
 */
 public class HoodieDependentSystemUnavailableException extends HoodieException {
--- a/hudi-client/src/main/java/org/apache/hudi/exception/HoodieInsertException.java
+++ b/hudi-client/src/main/java/org/apache/hudi/exception/HoodieInsertException.java
@@ -19,8 +19,9 @@
 package org.apache.hudi.exception;
 /**
- * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk
+ * <p>
- * insert </p>
+ * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk insert
 * </p>
 */
 public class HoodieInsertException extends HoodieException {
--- a/hudi-client/src/main/java/org/apache/hudi/exception/HoodieUpsertException.java
+++ b/hudi-client/src/main/java/org/apache/hudi/exception/HoodieUpsertException.java
@@ -19,8 +19,9 @@
 package org.apache.hudi.exception;
 /**
- * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a
+ * <p>
- * incremental upsert </p>
+ * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a incremental upsert
 * </p>
 */
 public class HoodieUpsertException extends HoodieException {
--- a/hudi-client/src/main/java/org/apache/hudi/func/BulkInsertMapFunction.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/BulkInsertMapFunction.java
@@ -31,16 +31,16 @@ import org.apache.spark.api.java.function.Function2;
 /**
 * Map function that handles a sorted stream of HoodieRecords
 */
-public class BulkInsertMapFunction<T extends HoodieRecordPayload> implements
+public class BulkInsertMapFunction<T extends HoodieRecordPayload>
-    Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
+    implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
  private String commitTime;
  private HoodieWriteConfig config;
  private HoodieTable<T> hoodieTable;
  private List<String> fileIDPrefixes;
-  public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
+  public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config, HoodieTable<T> hoodieTable,
-      HoodieTable<T> hoodieTable, List<String> fileIDPrefixes) {
+      List<String> fileIDPrefixes) {
    this.commitTime = commitTime;
    this.config = config;
    this.hoodieTable = hoodieTable;
--- a/hudi-client/src/main/java/org/apache/hudi/func/CopyOnWriteLazyInsertIterable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/CopyOnWriteLazyInsertIterable.java
@@ -37,11 +37,10 @@ import org.apache.hudi.io.HoodieWriteHandle;
 import org.apache.hudi.table.HoodieTable;
 /**
- * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
+ * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new files.
 * files.
 */
-public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extends
+public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload>
-    LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
+    extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
  protected final HoodieWriteConfig hoodieConfig;
  protected final String commitTime;
@@ -80,25 +79,23 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
   * Transformer function to help transform a HoodieRecord. This transformer is used by BufferedIterator to offload some
   * expensive operations of transformation to the reader thread.
   */
-  static <T extends HoodieRecordPayload> Function<HoodieRecord<T>,
+  static <T extends HoodieRecordPayload> Function<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(
-      HoodieInsertValueGenResult<HoodieRecord>> getTransformFunction(Schema schema) {
+      Schema schema) {
    return hoodieRecord -> new HoodieInsertValueGenResult(hoodieRecord, schema);
  }
  @Override
-  protected void start() {
+  protected void start() {}
  }
  @Override
  protected List<WriteStatus> computeNext() {
    // Executor service used for launching writer thread.
-    BoundedInMemoryExecutor<HoodieRecord<T>,
+    BoundedInMemoryExecutor<HoodieRecord<T>, HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> bufferedIteratorExecutor =
-            HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> bufferedIteratorExecutor = null;
+        null;
    try {
      final Schema schema = new Schema.Parser().parse(hoodieConfig.getSchema());
      bufferedIteratorExecutor =
-          new SparkBoundedInMemoryExecutor<>(hoodieConfig, inputItr,
+          new SparkBoundedInMemoryExecutor<>(hoodieConfig, inputItr, getInsertHandler(), getTransformFunction(schema));
              getInsertHandler(), getTransformFunction(schema));
      final List<WriteStatus> result = bufferedIteratorExecutor.execute();
      assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining();
      return result;
@@ -112,8 +109,7 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
  }
  @Override
-  protected void end() {
+  protected void end() {}
  }
  protected String getNextFileId(String idPfx) {
    return String.format("%s-%d", idPfx, numFilesWritten++);
@@ -124,11 +120,10 @@ public class CopyOnWriteLazyInsertIterable<T extends HoodieRecordPayload> extend
  }
  /**
-   * Consumes stream of hoodie records from in-memory queue and
+   * Consumes stream of hoodie records from in-memory queue and writes to one or more create-handles
   * writes to one or more create-handles
   */
-  protected class CopyOnWriteInsertHandler extends
+  protected class CopyOnWriteInsertHandler
-      BoundedInMemoryQueueConsumer<HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> {
+      extends BoundedInMemoryQueueConsumer<HoodieInsertValueGenResult<HoodieRecord>, List<WriteStatus>> {
    protected final List<WriteStatus> statuses = new ArrayList<>();
    protected HoodieWriteHandle handle;
--- a/hudi-client/src/main/java/org/apache/hudi/func/LazyIterableIterator.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/LazyIterableIterator.java
@@ -21,16 +21,15 @@ package org.apache.hudi.func;
 import java.util.Iterator;
 /**
- * (NOTE: Adapted from Apache SystemML) This class is a generic base class for lazy, single pass
+ * (NOTE: Adapted from Apache SystemML) This class is a generic base class for lazy, single pass inputItr classes in
- * inputItr classes in order to simplify the implementation of lazy iterators for mapPartitions use
+ * order to simplify the implementation of lazy iterators for mapPartitions use cases. Note [SPARK-3369], which gives
- * cases. Note [SPARK-3369], which gives the reasons for backwards compatibility with regard to the
+ * the reasons for backwards compatibility with regard to the iterable API despite Spark's single pass nature.
 * iterable API despite Spark's single pass nature.
 * <p>
 * Provide a way to obtain a inputItr of type O (output), out of an inputItr of type I (input)
 * <p>
- * Things to remember: - Assumes Spark calls hasNext() to check for elements, before calling next()
+ * Things to remember: - Assumes Spark calls hasNext() to check for elements, before calling next() to obtain them -
- * to obtain them - Assumes hasNext() gets called atleast once. - Concrete Implementation is
+ * Assumes hasNext() gets called atleast once. - Concrete Implementation is responsible for calling inputIterator.next()
- * responsible for calling inputIterator.next() and doing the processing in computeNext()
+ * and doing the processing in computeNext()
 */
 public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterator<O> {
--- a/hudi-client/src/main/java/org/apache/hudi/func/MergeOnReadLazyInsertIterable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/MergeOnReadLazyInsertIterable.java
@@ -29,11 +29,9 @@ import org.apache.hudi.io.HoodieAppendHandle;
 import org.apache.hudi.table.HoodieTable;
 /**
- * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
+ * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new log files.
 * log files.
 */
-public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extends
+public class MergeOnReadLazyInsertIterable<T extends HoodieRecordPayload> extends CopyOnWriteLazyInsertIterable<T> {
    CopyOnWriteLazyInsertIterable<T> {
  public MergeOnReadLazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
      String commitTime, HoodieTable<T> hoodieTable, String idPfx) {
--- a/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/OperationResult.java
@@ -32,8 +32,7 @@ public class OperationResult<T> implements Serializable {
  private boolean success;
  private Option<Exception> exception;
-  public OperationResult() {
+  public OperationResult() {}
  }
  public OperationResult(T operation, boolean success, Option<Exception> exception) {
    this.operation = operation;
@@ -67,11 +66,7 @@ public class OperationResult<T> implements Serializable {
  @Override
  public String toString() {
-    return "OperationResult{"
+    return "OperationResult{" + "operation=" + operation + ", executed=" + executed + ", success=" + success
-        + "operation=" + operation
+        + ", exception=" + exception + '}';
        + ", executed=" + executed
        + ", success=" + success
        + ", exception=" + exception
        + '}';
  }
 }
--- a/hudi-client/src/main/java/org/apache/hudi/func/ParquetReaderIterator.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/ParquetReaderIterator.java
@@ -25,8 +25,8 @@ import org.apache.hudi.exception.HoodieIOException;
 import org.apache.parquet.hadoop.ParquetReader;
 /**
- * This class wraps a parquet reader and provides an iterator based api to
+ * This class wraps a parquet reader and provides an iterator based api to read from a parquet file. This is used in
- * read from a parquet file. This is used in {@link BoundedInMemoryQueue}
+ * {@link BoundedInMemoryQueue}
 */
 public class ParquetReaderIterator<T> implements Iterator<T> {
--- a/hudi-client/src/main/java/org/apache/hudi/func/SparkBoundedInMemoryExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/func/SparkBoundedInMemoryExecutor.java
@@ -36,17 +36,13 @@ public class SparkBoundedInMemoryExecutor<I, O, E> extends BoundedInMemoryExecut
  final TaskContext sparkThreadTaskContext;
  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, final Iterator<I> inputItr,
-      BoundedInMemoryQueueConsumer<O, E> consumer,
+      BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
      Function<I, O> bufferedIteratorTransform) {
    this(hoodieConfig, new IteratorBasedQueueProducer<>(inputItr), consumer, bufferedIteratorTransform);
  }
-  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig,
+  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer,
-      BoundedInMemoryQueueProducer<I> producer,
+      BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
-      BoundedInMemoryQueueConsumer<O, E> consumer,
+    super(hoodieConfig.getWriteBufferLimitBytes(), producer, Option.of(consumer), bufferedIteratorTransform);
      Function<I, O> bufferedIteratorTransform) {
    super(hoodieConfig.getWriteBufferLimitBytes(), producer,
        Option.of(consumer), bufferedIteratorTransform);
    this.sparkThreadTaskContext = TaskContext.get();
  }
--- a/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/HoodieIndex.java
@@ -65,18 +65,18 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
  }
  /**
-   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[partitionPath, fileID]]
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Option[partitionPath, fileID]] If the
-   * If the optional is empty, then the key is not found.
+   * optional is empty, then the key is not found.
   */
  public abstract JavaPairRDD<HoodieKey, Option<Pair<String, String>>> fetchRecordLocation(
      JavaRDD<HoodieKey> hoodieKeys, final JavaSparkContext jsc, HoodieTable<T> hoodieTable);
  /**
-   * Looks up the index and tags each incoming record with a location of a file that contains the
+   * Looks up the index and tags each incoming record with a location of a file that contains the row (if it is actually
-   * row (if it is actually present)
+   * present)
   */
-  public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+  public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, JavaSparkContext jsc,
-      JavaSparkContext jsc, HoodieTable<T> hoodieTable) throws HoodieIndexException;
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
  /**
   * Extracts the location of written records, and updates the index.
@@ -84,8 +84,7 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
   * TODO(vc): We may need to propagate the record as well in a WriteStatus class
   */
  public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, JavaSparkContext jsc,
-      HoodieTable<T> hoodieTable)
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
      throws HoodieIndexException;
  /**
   * Rollback the efffects of the commit made at commitTime.
@@ -93,17 +92,17 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
  public abstract boolean rollbackCommit(String commitTime);
  /**
-   * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the
+   * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`. Such an
-   * `partitionPath`. Such an implementation is able to obtain the same mapping, for two hoodie keys
+   * implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey` but different
-   * with same `recordKey` but different `partitionPath`
+   * `partitionPath`
   *
   * @return whether or not, the index implementation is global in nature
   */
  public abstract boolean isGlobal();
  /**
-   * This is used by storage to determine, if its safe to send inserts, straight to the log, i.e
+   * This is used by storage to determine, if its safe to send inserts, straight to the log, i.e having a
-   * having a {@link FileSlice}, with no data file.
+   * {@link FileSlice}, with no data file.
   *
   * @return Returns true/false depending on whether the impl has this capability
   */
@@ -111,8 +110,8 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
  /**
-   * An index is "implicit" with respect to storage, if just writing new data to a file slice,
+   * An index is "implicit" with respect to storage, if just writing new data to a file slice, updates the index as
-   * updates the index as well. This is used by storage, to save memory footprint in certain cases.
+   * well. This is used by storage, to save memory footprint in certain cases.
   */
  public abstract boolean isImplicitWithStorage();
--- a/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/InMemoryHashIndex.java
@@ -40,7 +40,9 @@ import org.apache.spark.api.java.function.Function2;
 /**
- * Hoodie Index implementation backed by an in-memory Hash map. <p> ONLY USE FOR LOCAL TESTING
+ * Hoodie Index implementation backed by an in-memory Hash map.
 * <p>
 * ONLY USE FOR LOCAL TESTING
 */
 public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
@@ -122,12 +124,10 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
  /**
   * Function that tags each HoodieRecord with an existing location, if known.
   */
-  class LocationTagFunction implements
+  class LocationTagFunction implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
      Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
    @Override
-    public Iterator<HoodieRecord<T>> call(Integer partitionNum,
+    public Iterator<HoodieRecord<T>> call(Integer partitionNum, Iterator<HoodieRecord<T>> hoodieRecordIterator) {
        Iterator<HoodieRecord<T>> hoodieRecordIterator) {
      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
      while (hoodieRecordIterator.hasNext()) {
        HoodieRecord<T> rec = hoodieRecordIterator.next();
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/BucketizedBloomCheckPartitioner.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/BucketizedBloomCheckPartitioner.java
@@ -35,6 +35,7 @@ import org.apache.spark.Partitioner;
 * Partitions bloom filter checks by spreading out comparisons across buckets of work.
 *
 * Each bucket incurs the following cost
 * 
 * <pre>
 *   1) Read bloom filter from file footer
 *   2) Check keys against bloom filter
@@ -47,6 +48,7 @@ import org.apache.spark.Partitioner;
 * could bound the amount of skew to std_dev(numberOfBucketsPerPartition) * cost of (3), lower than sort partitioning.
 *
 * Approach has two goals :
 * 
 * <pre>
 *   1) Pack as many buckets from same file group into same partition, to amortize cost of (1) and (2) further
 *   2) Spread buckets across partitions evenly to achieve skew reduction
@@ -76,8 +78,7 @@ public class BucketizedBloomCheckPartitioner extends Partitioner {
    Map<String, Integer> bucketsPerFileGroup = new HashMap<>();
    // Compute the buckets needed per file group, using simple uniform distribution
-    fileGroupToComparisons.forEach((f, c) ->
+    fileGroupToComparisons.forEach((f, c) -> bucketsPerFileGroup.put(f, (int) Math.ceil((c * 1.0) / keysPerBucket)));
        bucketsPerFileGroup.put(f, (int) Math.ceil((c * 1.0) / keysPerBucket)));
    int totalBuckets = bucketsPerFileGroup.values().stream().mapToInt(i -> i).sum();
    // If totalBuckets > targetPartitions, no need to have extra partitions
    this.partitions = Math.min(targetPartitions, totalBuckets);
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
@@ -78,12 +78,12 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
    }
    // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey)
-    JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD =
-        .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));
+        recordRDD.mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));
    // Lookup indexes for all the partition/recordkey pair
-    JavaPairRDD<HoodieKey, HoodieRecordLocation> keyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc,
+    JavaPairRDD<HoodieKey, HoodieRecordLocation> keyFilenamePairRDD =
-        hoodieTable);
+        lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable);
    // Cache the result, for subsequent stages.
    if (config.getBloomIndexUseCaching()) {
@@ -96,8 +96,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
    // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys
    // Cost: 4 sec.
-    JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(keyFilenamePairRDD,
+    JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(keyFilenamePairRDD, recordRDD);
        recordRDD);
    if (config.getBloomIndexUseCaching()) {
      recordRDD.unpersist(); // unpersist the input Record RDD
@@ -108,8 +107,8 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
  }
  /**
-   * Returns an RDD mapping each HoodieKey with a partitionPath/fileID which contains it. Option.Empty if the key is
+   * Returns an RDD mapping each HoodieKey with a partitionPath/fileID which contains it. Option.Empty if the key is not
-   * not found.
+   * found.
   *
   * @param hoodieKeys keys to lookup
   * @param jsc spark context
@@ -118,12 +117,12 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
  @Override
  public JavaPairRDD<HoodieKey, Option<Pair<String, String>>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
      JavaSparkContext jsc, HoodieTable<T> hoodieTable) {
-    JavaPairRDD<String, String> partitionRecordKeyPairRDD = hoodieKeys
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD =
-        .mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
+        hoodieKeys.mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
    // Lookup indexes for all the partition/recordkey pair
-    JavaPairRDD<HoodieKey, HoodieRecordLocation> recordKeyLocationRDD = lookupIndex(partitionRecordKeyPairRDD, jsc,
+    JavaPairRDD<HoodieKey, HoodieRecordLocation> recordKeyLocationRDD =
-        hoodieTable);
+        lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable);
    JavaPairRDD<HoodieKey, String> keyHoodieKeyPairRDD = hoodieKeys.mapToPair(key -> new Tuple2<>(key, null));
    return keyHoodieKeyPairRDD.leftOuterJoin(recordKeyLocationRDD).mapToPair(keyLoc -> {
@@ -149,19 +148,19 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
    List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
    // Step 2: Load all involved files as <Partition, filename> pairs
-    List<Tuple2<String, BloomIndexFileInfo>> fileInfoList = loadInvolvedFiles(affectedPartitionPathList, jsc,
+    List<Tuple2<String, BloomIndexFileInfo>> fileInfoList =
-        hoodieTable);
+        loadInvolvedFiles(affectedPartitionPathList, jsc, hoodieTable);
-    final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo = fileInfoList.stream()
+    final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo =
-        .collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
+        fileInfoList.stream().collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
    // Step 3: Obtain a RDD, for each incoming record, that already exists, with the file id,
    // that contains it.
-    Map<String, Long> comparisonsPerFileGroup = computeComparisonsPerFileGroup(recordsPerPartition, partitionToFileInfo,
+    Map<String, Long> comparisonsPerFileGroup =
-        partitionRecordKeyPairRDD);
+        computeComparisonsPerFileGroup(recordsPerPartition, partitionToFileInfo, partitionRecordKeyPairRDD);
    int safeParallelism = computeSafeParallelism(recordsPerPartition, comparisonsPerFileGroup);
    int joinParallelism = determineParallelism(partitionRecordKeyPairRDD.partitions().size(), safeParallelism);
-    return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD, joinParallelism,
+    return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD, joinParallelism, hoodieTable,
-        hoodieTable, comparisonsPerFileGroup);
+        comparisonsPerFileGroup);
  }
  /**
@@ -175,8 +174,8 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
    if (config.getBloomIndexPruneByRanges()) {
      // we will just try exploding the input and then count to determine comparisons
      // FIX(vc): Only do sampling here and extrapolate?
-      fileToComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo,
+      fileToComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo, partitionRecordKeyPairRDD)
-          partitionRecordKeyPairRDD).mapToPair(t -> t).countByKey();
+          .mapToPair(t -> t).countByKey();
    } else {
      fileToComparisons = new HashMap<>();
      partitionToFileInfo.entrySet().stream().forEach(e -> {
@@ -191,34 +190,41 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
  /**
   * Compute the minimum parallelism needed to play well with the spark 2GB limitation.. The index lookup can be skewed
-   * in three dimensions : #files, #partitions, #records <p> To be able to smoothly handle skews, we need to compute how
+   * in three dimensions : #files, #partitions, #records
-   * to split each partitions into subpartitions. We do it here, in a way that keeps the amount of each Spark join
+   * <p>
-   * partition to < 2GB. <p> If {@link HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is
+   * To be able to smoothly handle skews, we need to compute how to split each partitions into subpartitions. We do it
-   * specified as a NON-zero number, then that is used explicitly.
+   * here, in a way that keeps the amount of each Spark join partition to < 2GB.
   * <p>
   * If {@link HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is specified as a NON-zero number, then that is used
   * explicitly.
   */
  int computeSafeParallelism(Map<String, Long> recordsPerPartition, Map<String, Long> comparisonsPerFileGroup) {
    long totalComparisons = comparisonsPerFileGroup.values().stream().mapToLong(Long::longValue).sum();
    long totalFiles = comparisonsPerFileGroup.size();
    long totalRecords = recordsPerPartition.values().stream().mapToLong(Long::longValue).sum();
    int parallelism = (int) (totalComparisons / MAX_ITEMS_PER_SHUFFLE_PARTITION + 1);
-    logger.info(String.format("TotalRecords %d, TotalFiles %d, TotalAffectedPartitions %d, TotalComparisons %d, "
+    logger.info(String.format(
-        + "SafeParallelism %d", totalRecords, totalFiles, recordsPerPartition.size(), totalComparisons, parallelism));
+        "TotalRecords %d, TotalFiles %d, TotalAffectedPartitions %d, TotalComparisons %d, " + "SafeParallelism %d",
        totalRecords, totalFiles, recordsPerPartition.size(), totalComparisons, parallelism));
    return parallelism;
  }
  /**
-   * Its crucial to pick the right parallelism. <p> totalSubPartitions : this is deemed safe limit, to be nice with
+   * Its crucial to pick the right parallelism.
-   * Spark. inputParallelism : typically number of input file splits <p> We pick the max such that, we are always safe,
+   * <p>
-   * but go higher if say a there are a lot of input files. (otherwise, we will fallback to number of partitions in
+   * totalSubPartitions : this is deemed safe limit, to be nice with Spark. inputParallelism : typically number of input
-   * input and end up with slow performance)
+   * file splits
   * <p>
   * We pick the max such that, we are always safe, but go higher if say a there are a lot of input files. (otherwise,
   * we will fallback to number of partitions in input and end up with slow performance)
   */
  private int determineParallelism(int inputParallelism, int totalSubPartitions) {
    // If bloom index parallelism is set, use it to to check against the input parallelism and
    // take the max
    int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
    int joinParallelism = Math.max(totalSubPartitions, indexParallelism);
-    logger.info("InputParallelism: ${" + inputParallelism + "}, " + "IndexParallelism: ${" + config
+    logger.info("InputParallelism: ${" + inputParallelism + "}, " + "IndexParallelism: ${"
-        .getBloomIndexParallelism() + "}, " + "TotalSubParts: ${" + totalSubPartitions + "}, "
+        + config.getBloomIndexParallelism() + "}, " + "TotalSubParts: ${" + totalSubPartitions + "}, "
        + "Join Parallelism set to : " + joinParallelism);
    return joinParallelism;
  }
@@ -231,11 +237,10 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
      final HoodieTable hoodieTable) {
    // Obtain the latest data files from all the partitions.
-    List<Pair<String, String>> partitionPathFileIDList = jsc
+    List<Pair<String, String>> partitionPathFileIDList =
-        .parallelize(partitions, Math.max(partitions.size(), 1))
+        jsc.parallelize(partitions, Math.max(partitions.size(), 1)).flatMap(partitionPath -> {
-        .flatMap(partitionPath -> {
+          Option<HoodieInstant> latestCommitTime =
-          Option<HoodieInstant> latestCommitTime = hoodieTable.getMetaClient().getCommitsTimeline()
+              hoodieTable.getMetaClient().getCommitsTimeline().filterCompletedInstants().lastInstant();
              .filterCompletedInstants().lastInstant();
          List<Pair<String, String>> filteredFiles = new ArrayList<>();
          if (latestCommitTime.isPresent()) {
            filteredFiles = hoodieTable.getROFileSystemView()
@@ -259,8 +264,7 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
      }).collect();
    } else {
      return partitionPathFileIDList.stream()
-          .map(pf -> new Tuple2<>(pf.getKey(), new BloomIndexFileInfo(pf.getValue())))
+          .map(pf -> new Tuple2<>(pf.getKey(), new BloomIndexFileInfo(pf.getValue()))).collect(toList());
          .collect(toList());
    }
  }
@@ -307,8 +311,8 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
  JavaRDD<Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(
      final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
      JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
-    IndexFileFilter indexFileFilter = config.useBloomIndexTreebasedFilter()
+    IndexFileFilter indexFileFilter =
-        ? new IntervalTreeBasedIndexFileFilter(partitionToFileIndexInfo)
+        config.useBloomIndexTreebasedFilter() ? new IntervalTreeBasedIndexFileFilter(partitionToFileIndexInfo)
            : new ListBasedIndexFileFilter(partitionToFileIndexInfo);
    return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
@@ -322,10 +326,12 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
  }
  /**
-   * Find out <RowKey, filename> pair. All workload grouped by file-level. <p> Join PairRDD(PartitionPath, RecordKey)
+   * Find out <RowKey, filename> pair. All workload grouped by file-level.
-   * and PairRDD(PartitionPath, File) & then repartition such that each RDD partition is a file, then for each file, we
+   * <p>
-   * do (1) load bloom filter, (2) load rowKeys, (3) Tag rowKey <p> Make sure the parallelism is atleast the groupby
+   * Join PairRDD(PartitionPath, RecordKey) and PairRDD(PartitionPath, File) & then repartition such that each RDD
-   * parallelism for tagging location
+   * partition is a file, then for each file, we do (1) load bloom filter, (2) load rowKeys, (3) Tag rowKey
   * <p>
   * Make sure the parallelism is atleast the groupby parallelism for tagging location
   */
  @VisibleForTesting
  JavaPairRDD<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(
@@ -336,33 +342,24 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
        explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD);
    if (config.useBloomIndexBucketizedChecking()) {
-      Partitioner partitioner = new BucketizedBloomCheckPartitioner(
+      Partitioner partitioner = new BucketizedBloomCheckPartitioner(shuffleParallelism, fileGroupToComparisons,
-          shuffleParallelism,
+          config.getBloomIndexKeysPerBucket());
          fileGroupToComparisons,
          config.getBloomIndexKeysPerBucket()
      );
-      fileComparisonsRDD = fileComparisonsRDD
+      fileComparisonsRDD = fileComparisonsRDD.mapToPair(t -> new Tuple2<>(Pair.of(t._1, t._2.getRecordKey()), t))
-          .mapToPair(t -> new Tuple2<>(Pair.of(t._1, t._2.getRecordKey()), t))
+          .repartitionAndSortWithinPartitions(partitioner).map(Tuple2::_2);
          .repartitionAndSortWithinPartitions(partitioner)
          .map(Tuple2::_2);
    } else {
      fileComparisonsRDD = fileComparisonsRDD.sortBy(Tuple2::_1, true, shuffleParallelism);
    }
-    return fileComparisonsRDD
+    return fileComparisonsRDD.mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(hoodieTable, config), true)
-        .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(hoodieTable, config), true)
+        .flatMap(List::iterator).filter(lr -> lr.getMatchingRecordKeys().size() > 0)
        .flatMap(List::iterator)
        .filter(lr -> lr.getMatchingRecordKeys().size() > 0)
        .flatMapToPair(lookupResult -> lookupResult.getMatchingRecordKeys().stream()
            .map(recordKey -> new Tuple2<>(new HoodieKey(recordKey, lookupResult.getPartitionPath()),
                new HoodieRecordLocation(lookupResult.getBaseInstantTime(), lookupResult.getFileId())))
-            .collect(Collectors.toList())
+            .collect(Collectors.toList()).iterator());
            .iterator());
  }
-  HoodieRecord<T> getTaggedRecord(HoodieRecord<T> inputRecord,
+  HoodieRecord<T> getTaggedRecord(HoodieRecord<T> inputRecord, Option<HoodieRecordLocation> location) {
      Option<HoodieRecordLocation> location) {
    HoodieRecord<T> record = inputRecord;
    if (location.isPresent()) {
      // When you have a record in multiple files in the same partition, then rowKeyRecordPairRDD
@@ -383,12 +380,12 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
   */
  protected JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(
      JavaPairRDD<HoodieKey, HoodieRecordLocation> keyFilenamePairRDD, JavaRDD<HoodieRecord<T>> recordRDD) {
-    JavaPairRDD<HoodieKey, HoodieRecord<T>> keyRecordPairRDD = recordRDD
+    JavaPairRDD<HoodieKey, HoodieRecord<T>> keyRecordPairRDD =
-        .mapToPair(record -> new Tuple2<>(record.getKey(), record));
+        recordRDD.mapToPair(record -> new Tuple2<>(record.getKey(), record));
    // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null),
    // so we do left outer join.
-    return keyRecordPairRDD.leftOuterJoin(keyFilenamePairRDD).values().map(
+    return keyRecordPairRDD.leftOuterJoin(keyFilenamePairRDD).values()
-        v1 -> getTaggedRecord(v1._1, Option.ofNullable(v1._2.orNull())));
+        .map(v1 -> getTaggedRecord(v1._1, Option.ofNullable(v1._2.orNull())));
  }
  @Override
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java
@@ -34,11 +34,10 @@ import org.apache.spark.api.java.function.Function2;
 import scala.Tuple2;
 /**
- * Function performing actual checking of RDD partition containing (fileId, hoodieKeys) against the
+ * Function performing actual checking of RDD partition containing (fileId, hoodieKeys) against the actual files
 * actual files
 */
-public class HoodieBloomIndexCheckFunction implements
+public class HoodieBloomIndexCheckFunction
-    Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<KeyLookupResult>>> {
+    implements Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<KeyLookupResult>>> {
  private final HoodieTable hoodieTable;
@@ -59,14 +58,12 @@ public class HoodieBloomIndexCheckFunction implements
    private HoodieKeyLookupHandle keyLookupHandle;
-    LazyKeyCheckIterator(
+    LazyKeyCheckIterator(Iterator<Tuple2<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
        Iterator<Tuple2<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
      super(filePartitionRecordKeyTripletItr);
    }
    @Override
-    protected void start() {
+    protected void start() {}
    }
    @Override
    protected List<HoodieKeyLookupHandle.KeyLookupResult> computeNext() {
@@ -113,7 +110,6 @@ public class HoodieBloomIndexCheckFunction implements
    }
    @Override
-    protected void end() {
+    protected void end() {}
    }
  }
 }
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java
@@ -59,8 +59,7 @@ public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends Hoodi
      final HoodieTable hoodieTable) {
    HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
    try {
-      List<String> allPartitionPaths = FSUtils
+      List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
          .getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
          config.shouldAssumeDatePartitioning());
      return super.loadInvolvedFiles(allPartitionPaths, jsc, hoodieTable);
    } catch (IOException e) {
@@ -88,8 +87,8 @@ public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends Hoodi
      entry.getValue().forEach(indexFile -> indexToPartitionMap.put(indexFile.getFileId(), entry.getKey()));
    }
-    IndexFileFilter indexFileFilter = config.getBloomIndexPruneByRanges()
+    IndexFileFilter indexFileFilter =
-        ? new IntervalTreeBasedGlobalIndexFileFilter(partitionToFileIndexInfo)
+        config.getBloomIndexPruneByRanges() ? new IntervalTreeBasedGlobalIndexFileFilter(partitionToFileIndexInfo)
            : new ListBasedGlobalIndexFileFilter(partitionToFileIndexInfo);
    return partitionRecordKeyPairRDD.map(partitionRecordKeyPair -> {
@@ -109,8 +108,8 @@ public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload> extends Hoodi
  @Override
  protected JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(
      JavaPairRDD<HoodieKey, HoodieRecordLocation> keyFilenamePairRDD, JavaRDD<HoodieRecord<T>> recordRDD) {
-    JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD
+    JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD =
-        .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
+        recordRDD.mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
    // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null),
    // so we do left outer join.
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/IntervalTreeBasedGlobalIndexFileFilter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/IntervalTreeBasedGlobalIndexFileFilter.java
@@ -41,16 +41,16 @@ class IntervalTreeBasedGlobalIndexFileFilter implements IndexFileFilter {
   * @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}s
   */
  IntervalTreeBasedGlobalIndexFileFilter(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
-    List<BloomIndexFileInfo> allIndexFiles = partitionToFileIndexInfo.values().stream().flatMap(Collection::stream)
+    List<BloomIndexFileInfo> allIndexFiles =
-        .collect(Collectors.toList());
+        partitionToFileIndexInfo.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
    // Note that the interval tree implementation doesn't have auto-balancing to ensure logN search time.
    // So, we are shuffling the input here hoping the tree will not have any skewness. If not, the tree could be skewed
    // which could result in N search time instead of NlogN.
    Collections.shuffle(allIndexFiles);
    allIndexFiles.forEach(indexFile -> {
      if (indexFile.hasKeyRanges()) {
-        indexLookUpTree.insert(new KeyRangeNode(indexFile.getMinRecordKey(),
+        indexLookUpTree
-            indexFile.getMaxRecordKey(), indexFile.getFileId()));
+            .insert(new KeyRangeNode(indexFile.getMinRecordKey(), indexFile.getMaxRecordKey(), indexFile.getFileId()));
      } else {
        filesWithNoRanges.add(indexFile.getFileId());
      }
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/IntervalTreeBasedIndexFileFilter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/IntervalTreeBasedIndexFileFilter.java
@@ -48,8 +48,8 @@ class IntervalTreeBasedIndexFileFilter implements IndexFileFilter {
      KeyRangeLookupTree lookUpTree = new KeyRangeLookupTree();
      bloomIndexFiles.forEach(indexFileInfo -> {
        if (indexFileInfo.hasKeyRanges()) {
-          lookUpTree.insert(new KeyRangeNode(indexFileInfo.getMinRecordKey(),
+          lookUpTree.insert(new KeyRangeNode(indexFileInfo.getMinRecordKey(), indexFileInfo.getMaxRecordKey(),
-              indexFileInfo.getMaxRecordKey(), indexFileInfo.getFileId()));
+              indexFileInfo.getFileId()));
        } else {
          if (!partitionToFilesWithNoRanges.containsKey(partition)) {
            partitionToFilesWithNoRanges.put(partition, new HashSet<>());
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeLookupTree.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeLookupTree.java
@@ -50,25 +50,16 @@ class KeyRangeLookupTree implements Serializable {
   *
   * If no root exists, make {@code newNode} as the root and return the new root.
   *
-   * If current root and newNode matches with min record key and max record key,
+   * If current root and newNode matches with min record key and max record key, merge two nodes. In other words, add
-   * merge two nodes. In other words, add files from {@code newNode} to current root.
+   * files from {@code newNode} to current root. Return current root.
   * Return current root.
   *
-   * If current root is < newNode
+   * If current root is < newNode if current root has no right sub tree update current root's right sub tree max and min
-   *     if current root has no right sub tree
+   * set newNode as right sub tree else update root's right sub tree min and max with newNode's min and max record key
-   *           update current root's right sub tree max and min
+   * as applicable recursively call insert() with root's right subtree as new root
   *           set newNode as right sub tree
   *     else
   *        update root's right sub tree min and max with newNode's min and max record key as applicable
   *        recursively call insert() with root's right subtree as new root
   *
-   * else // current root is >= newNode
+   * else // current root is >= newNode if current root has no left sub tree update current root's left sub tree max and
-   *     if current root has no left sub tree
+   * min set newNode as left sub tree else update root's left sub tree min and max with newNode's min and max record key
-   *            update current root's left sub tree max and min
+   * as applicable recursively call insert() with root's left subtree as new root
   *            set newNode as left sub tree
   *     else
   *         update root's left sub tree min and max with newNode's min and max record key as applicable
   *         recursively call insert() with root's left subtree as new root
   *
   * @param root refers to the current root of the look up tree
   * @param newNode newNode the new {@link KeyRangeNode} to be inserted
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/KeyRangeNode.java
@@ -62,15 +62,10 @@ class KeyRangeNode implements Comparable<KeyRangeNode>, Serializable {
  @Override
  public String toString() {
-    return "KeyRangeNode{"
+    return "KeyRangeNode{" + "minRecordKey='" + minRecordKey + '\'' + ", maxRecordKey='" + maxRecordKey + '\''
-        + "minRecordKey='" + minRecordKey + '\''
+        + ", fileNameList=" + fileNameList + ", rightSubTreeMax='" + rightSubTreeMax + '\'' + ", leftSubTreeMax='"
-        + ", maxRecordKey='" + maxRecordKey + '\''
+        + leftSubTreeMax + '\'' + ", rightSubTreeMin='" + rightSubTreeMin + '\'' + ", leftSubTreeMin='" + leftSubTreeMin
-        + ", fileNameList=" + fileNameList
+        + '\'' + '}';
        + ", rightSubTreeMax='" + rightSubTreeMax + '\''
        + ", leftSubTreeMax='" + leftSubTreeMax + '\''
        + ", rightSubTreeMin='" + rightSubTreeMin + '\''
        + ", leftSubTreeMin='" + leftSubTreeMin + '\''
        + '}';
  }
  /**
@@ -78,8 +73,8 @@ class KeyRangeNode implements Comparable<KeyRangeNode>, Serializable {
   *
   * @param that the {@link KeyRangeNode} to be compared with
   * @return the result of comparison. 0 if both min and max are equal in both. 1 if this {@link KeyRangeNode} is
-   * greater than the {@code that} keyRangeNode. -1 if {@code that} keyRangeNode is greater than this {@link
+   *         greater than the {@code that} keyRangeNode. -1 if {@code that} keyRangeNode is greater than this
-   * KeyRangeNode}
+   *         {@link KeyRangeNode}
   */
  @Override
  public int compareTo(KeyRangeNode that) {
--- a/hudi-client/src/main/java/org/apache/hudi/index/bloom/ListBasedGlobalIndexFileFilter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/bloom/ListBasedGlobalIndexFileFilter.java
@@ -30,8 +30,7 @@ class ListBasedGlobalIndexFileFilter extends ListBasedIndexFileFilter {
   *
   * @param partitionToFileIndexInfo Map of partition to List of {@link BloomIndexFileInfo}
   */
-  ListBasedGlobalIndexFileFilter(
+  ListBasedGlobalIndexFileFilter(Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
      Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo) {
    super(partitionToFileIndexInfo);
  }
--- a/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndex.java
@@ -68,10 +68,8 @@ import scala.Tuple2;
 */
 public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
-  public static final String DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME =
+  public static final String DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME = "spark.executor.instances";
-      "spark.executor.instances";
+  public static final String DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME = "spark.dynamicAllocation.enabled";
  public static final String DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME =
      "spark.dynamicAllocation.enabled";
  public static final String DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME =
      "spark.dynamicAllocation.maxExecutors";
@@ -114,9 +112,8 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
  public HBaseIndexQPSResourceAllocator createQPSResourceAllocator(HoodieWriteConfig config) {
    try {
      logger.info("createQPSResourceAllocator :" + config.getHBaseQPSResourceAllocatorClass());
-      final HBaseIndexQPSResourceAllocator resourceAllocator =
+      final HBaseIndexQPSResourceAllocator resourceAllocator = (HBaseIndexQPSResourceAllocator) ReflectionUtils
-          (HBaseIndexQPSResourceAllocator) ReflectionUtils.loadClass(
+          .loadClass(config.getHBaseQPSResourceAllocatorClass(), config);
              config.getHBaseQPSResourceAllocatorClass(), config);
      return resourceAllocator;
    } catch (Exception e) {
      logger.warn("error while instantiating HBaseIndexQPSResourceAllocator", e);
@@ -143,14 +140,14 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
    try {
      return ConnectionFactory.createConnection(hbaseConfig);
    } catch (IOException e) {
-      throw new HoodieDependentSystemUnavailableException(
+      throw new HoodieDependentSystemUnavailableException(HoodieDependentSystemUnavailableException.HBASE,
-          HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
+          quorum + ":" + port);
    }
  }
  /**
-   * Since we are sharing the HbaseConnection across tasks in a JVM, make sure the HbaseConnectio is
+   * Since we are sharing the HbaseConnection across tasks in a JVM, make sure the HbaseConnectio is closed when JVM
-   * closed when JVM exits
+   * exits
   */
  private void addShutDownHook() {
    Runtime.getRuntime().addShutdownHook(new Thread() {
@@ -172,31 +169,28 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
  }
  private Get generateStatement(String key) throws IOException {
-    return new Get(Bytes.toBytes(key)).setMaxVersions(1)
+    return new Get(Bytes.toBytes(key)).setMaxVersions(1).addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
-        .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
+        .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN).addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN);
        .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
        .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN);
  }
  private boolean checkIfValidCommit(HoodieTableMetaClient metaClient, String commitTs) {
    HoodieTimeline commitTimeline = metaClient.getActiveTimeline().filterCompletedInstants();
    // Check if the last commit ts for this row is 1) present in the timeline or
    // 2) is less than the first commit ts in the timeline
-    return !commitTimeline.empty() && (commitTimeline
+    return !commitTimeline.empty()
-        .containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))
+        && (commitTimeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))
-        || HoodieTimeline
+            || HoodieTimeline.compareTimestamps(commitTimeline.firstInstant().get().getTimestamp(), commitTs,
        .compareTimestamps(commitTimeline.firstInstant().get().getTimestamp(), commitTs,
                HoodieTimeline.GREATER));
  }
  /**
   * Function that tags each HoodieRecord with an existing location, if known.
   */
-  private Function2<Integer, Iterator<HoodieRecord<T>>,
+  private Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> locationTagFunction(
-      Iterator<HoodieRecord<T>>> locationTagFunction(HoodieTableMetaClient metaClient) {
+      HoodieTableMetaClient metaClient) {
-    return (Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>>)
+    return (Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>>) (partitionNum,
-        (partitionNum, hoodieRecordIterator) -> {
+        hoodieRecordIterator) -> {
      Integer multiGetBatchSize = config.getHbaseIndexGetBatchSize();
@@ -228,16 +222,12 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
              HoodieRecord currentRecord = currentBatchOfRecords.remove(0);
              if (result.getRow() != null) {
                String keyFromResult = Bytes.toString(result.getRow());
-                    String commitTs = Bytes
+                String commitTs = Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
-                        .toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
+                String fileId = Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
-                    String fileId = Bytes
+                String partitionPath = Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
                        .toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
                    String partitionPath = Bytes
                        .toString(result.getValue(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
                if (checkIfValidCommit(metaClient, commitTs)) {
-                      currentRecord = new HoodieRecord(
+                  currentRecord = new HoodieRecord(new HoodieKey(currentRecord.getRecordKey(), partitionPath),
                          new HoodieKey(currentRecord.getRecordKey(), partitionPath),
                      currentRecord.getData());
                  currentRecord.unseal();
                  currentRecord.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
@@ -255,8 +245,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
          }
        }
      } catch (IOException e) {
-            throw new HoodieIndexException(
+        throw new HoodieIndexException("Failed to Tag indexed locations because of exception with HBase Client", e);
                "Failed to Tag indexed locations because of exception with HBase Client", e);
      } finally {
        if (hTable != null) {
          try {
@@ -310,12 +299,9 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
                    continue;
                  }
                  Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
-                  put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN, Bytes.toBytes(loc.get().getInstantTime()));
-                      Bytes.toBytes(loc.get().getInstantTime()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN, Bytes.toBytes(loc.get().getFileId()));
-                  put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN, Bytes.toBytes(rec.getPartitionPath()));
                      Bytes.toBytes(loc.get().getFileId()));
                  put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
                      Bytes.toBytes(rec.getPartitionPath()));
                  puts.add(put);
                } else {
                  // Delete existing index for a deleted record
@@ -338,8 +324,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
          writeStatusList.add(writeStatus);
        }
      } catch (IOException e) {
-        throw new HoodieIndexException(
+        throw new HoodieIndexException("Failed to Update Index locations because of exception with HBase Client", e);
            "Failed to Update Index locations because of exception with HBase Client", e);
      } finally {
        if (hTable != null) {
          try {
@@ -356,8 +341,7 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
  /**
   * Helper method to facilitate performing puts and deletes in Hbase
   */
-  private void doPutsAndDeletes(HTable hTable, List<Put> puts, List<Delete> deletes)
+  private void doPutsAndDeletes(HTable hTable, List<Put> puts, List<Delete> deletes) throws IOException {
      throws IOException {
    if (puts.size() > 0) {
      hTable.put(puts);
    }
@@ -385,58 +369,49 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
    final HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator = createQPSResourceAllocator(this.config);
    setPutBatchSize(writeStatusRDD, hBaseIndexQPSResourceAllocator, jsc);
    logger.info("multiPutBatchSize: before hbase puts" + multiPutBatchSize);
-    JavaRDD<WriteStatus> writeStatusJavaRDD = writeStatusRDD.mapPartitionsWithIndex(
+    JavaRDD<WriteStatus> writeStatusJavaRDD = writeStatusRDD.mapPartitionsWithIndex(updateLocationFunction(), true);
        updateLocationFunction(), true);
    // caching the index updated status RDD
    writeStatusJavaRDD = writeStatusJavaRDD.persist(config.getWriteStatusStorageLevel());
    return writeStatusJavaRDD;
  }
  private void setPutBatchSize(JavaRDD<WriteStatus> writeStatusRDD,
-                               HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator,
+      HBaseIndexQPSResourceAllocator hBaseIndexQPSResourceAllocator, final JavaSparkContext jsc) {
                               final JavaSparkContext jsc) {
    if (config.getHbaseIndexPutBatchSizeAutoCompute()) {
      SparkConf conf = jsc.getConf();
      int maxExecutors = conf.getInt(DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME, 1);
      if (conf.getBoolean(DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME, false)) {
-        maxExecutors = Math.max(maxExecutors, conf.getInt(
+        maxExecutors =
-            DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
+            Math.max(maxExecutors, conf.getInt(DEFAULT_SPARK_DYNAMIC_ALLOCATION_MAX_EXECUTORS_CONFIG_NAME, 1));
      }
      /*
-        Each writeStatus represents status information from a write done in one of the IOHandles.
+       * Each writeStatus represents status information from a write done in one of the IOHandles. If a writeStatus has
-        If a writeStatus has any insert, it implies that the corresponding task contacts HBase for
+       * any insert, it implies that the corresponding task contacts HBase for doing puts, since we only do puts for
-        doing puts, since we only do puts for inserts from HBaseIndex.
+       * inserts from HBaseIndex.
       */
      final Tuple2<Long, Integer> numPutsParallelismTuple = getHBasePutAccessParallelism(writeStatusRDD);
      final long numPuts = numPutsParallelismTuple._1;
      final int hbasePutsParallelism = numPutsParallelismTuple._2;
      this.numRegionServersForTable = getNumRegionServersAliveForTable();
-      final float desiredQPSFraction = hBaseIndexQPSResourceAllocator
+      final float desiredQPSFraction =
-                                           .calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable);
+          hBaseIndexQPSResourceAllocator.calculateQPSFractionForPutsTime(numPuts, this.numRegionServersForTable);
      logger.info("Desired QPSFraction :" + desiredQPSFraction);
      logger.info("Number HBase puts :" + numPuts);
      logger.info("Hbase Puts Parallelism :" + hbasePutsParallelism);
-      final float availableQpsFraction = hBaseIndexQPSResourceAllocator
+      final float availableQpsFraction =
-                                             .acquireQPSResources(desiredQPSFraction, numPuts);
+          hBaseIndexQPSResourceAllocator.acquireQPSResources(desiredQPSFraction, numPuts);
      logger.info("Allocated QPS Fraction :" + availableQpsFraction);
-      multiPutBatchSize = putBatchSizeCalculator
+      multiPutBatchSize = putBatchSizeCalculator.getBatchSize(numRegionServersForTable, maxQpsPerRegionServer,
-          .getBatchSize(
+          hbasePutsParallelism, maxExecutors, SLEEP_TIME_MILLISECONDS, availableQpsFraction);
              numRegionServersForTable,
              maxQpsPerRegionServer,
              hbasePutsParallelism,
              maxExecutors,
              SLEEP_TIME_MILLISECONDS,
              availableQpsFraction);
      logger.info("multiPutBatchSize :" + multiPutBatchSize);
    }
  }
  @VisibleForTesting
  public Tuple2<Long, Integer> getHBasePutAccessParallelism(final JavaRDD<WriteStatus> writeStatusRDD) {
-    final JavaPairRDD<Long, Integer> insertOnlyWriteStatusRDD =
+    final JavaPairRDD<Long, Integer> insertOnlyWriteStatusRDD = writeStatusRDD
-        writeStatusRDD.filter(w -> w.getStat().getNumInserts() > 0)
+        .filter(w -> w.getStat().getNumInserts() > 0).mapToPair(w -> new Tuple2<>(w.getStat().getNumInserts(), 1));
            .mapToPair(w -> new Tuple2<>(w.getStat().getNumInserts(), 1));
    return insertOnlyWriteStatusRDD.fold(new Tuple2<>(0L, 0), (w, c) -> new Tuple2<>(w._1 + c._1, w._2 + c._2));
  }
@@ -460,21 +435,25 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
     * 16000. We assume requests get distributed to Region Servers uniformly, so each RS gets 1600 requests which
     * happens to be 10% of 16667 (maxQPSPerRegionServer), as expected.
     * </p>
-     * <p> Assumptions made here <li> In a batch, writes get evenly distributed to each RS for that
+     * <p>
-     * table. Since we do writes only in the case of inserts and not updates, for this assumption to fail, inserts would
+     * Assumptions made here
-     * have to be skewed towards few RS, likelihood of which is less if Hbase table is pre-split and rowKeys are UUIDs
+     * <li>In a batch, writes get evenly distributed to each RS for that table. Since we do writes only in the case of
-     * (random strings). If this assumption fails, then it is possible for some RS to receive more than
+     * inserts and not updates, for this assumption to fail, inserts would have to be skewed towards few RS, likelihood
-     * maxQpsPerRegionServer QPS, but for simplicity, we are going ahead with this model, since this is meant to be a
+     * of which is less if Hbase table is pre-split and rowKeys are UUIDs (random strings). If this assumption fails,
-     * lightweight distributed throttling mechanism without maintaining a global context. So if this assumption breaks,
+     * then it is possible for some RS to receive more than maxQpsPerRegionServer QPS, but for simplicity, we are going
-     * we are hoping the HBase Master relocates hot-spot regions to new Region Servers.
+     * ahead with this model, since this is meant to be a lightweight distributed throttling mechanism without
     * maintaining a global context. So if this assumption breaks, we are hoping the HBase Master relocates hot-spot
     * regions to new Region Servers.
     *
-     * </li> <li> For Region Server stability, throttling at a second level granularity is fine.
+     * </li>
-     * Although, within a second, the sum of queries might be within maxQpsPerRegionServer, there could be peaks at some
+     * <li>For Region Server stability, throttling at a second level granularity is fine. Although, within a second, the
-     * sub second intervals. So, the assumption is that these peaks are tolerated by the Region Server (which at max can
+     * sum of queries might be within maxQpsPerRegionServer, there could be peaks at some sub second intervals. So, the
-     * be maxQpsPerRegionServer). </li> </p>
+     * assumption is that these peaks are tolerated by the Region Server (which at max can be maxQpsPerRegionServer).
     * </li>
     * </p>
     */
-    public int getBatchSize(int numRegionServersForTable, int maxQpsPerRegionServer,
+    public int getBatchSize(int numRegionServersForTable, int maxQpsPerRegionServer, int numTasksDuringPut,
-        int numTasksDuringPut, int maxExecutors, int sleepTimeMs, float qpsFraction) {
+        int maxExecutors, int sleepTimeMs, float qpsFraction) {
      int numRSAlive = numRegionServersForTable;
      int maxReqPerSec = (int) (qpsFraction * numRSAlive * maxQpsPerRegionServer);
      int numTasks = numTasksDuringPut;
@@ -499,11 +478,9 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
    // from the driver, so ok to use a local connection variable.
    if (numRegionServersForTable == null) {
      try (Connection conn = getHBaseConnection()) {
-        RegionLocator regionLocator = conn
+        RegionLocator regionLocator = conn.getRegionLocator(TableName.valueOf(tableName));
-            .getRegionLocator(TableName.valueOf(tableName));
+        numRegionServersForTable = Math
-        numRegionServersForTable = Math.toIntExact(
+            .toIntExact(regionLocator.getAllRegionLocations().stream().map(e -> e.getServerName()).distinct().count());
            regionLocator.getAllRegionLocations().stream().map(e -> e.getServerName()).distinct()
                .count());
        return numRegionServersForTable;
      } catch (IOException e) {
        logger.error(e);
--- a/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndexQPSResourceAllocator.java
+++ b/hudi-client/src/main/java/org/apache/hudi/index/hbase/HBaseIndexQPSResourceAllocator.java
@@ -26,8 +26,8 @@ import java.io.Serializable;
 public interface HBaseIndexQPSResourceAllocator extends Serializable {
  /**
-   * This method returns the QPS Fraction value that needs to be acquired such that the respective
+   * This method returns the QPS Fraction value that needs to be acquired such that the respective HBase index operation
-   * HBase index operation can be completed in desiredPutsTime.
+   * can be completed in desiredPutsTime.
   *
   * @param numPuts Number of inserts to be written to HBase index
   * @param desiredPutsTimeInSecs Total expected time for the HBase inserts operation
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -96,8 +96,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
  // Total number of new records inserted into the delta file
  private long insertRecordsWritten = 0;
-  public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable,
+  public HoodieAppendHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, String fileId,
-      String fileId, Iterator<HoodieRecord<T>> recordItr) {
+      Iterator<HoodieRecord<T>> recordItr) {
    super(config, commitTime, fileId, hoodieTable);
    writeStatus.setStat(new HoodieDeltaWriteStat());
    this.fileId = fileId;
@@ -137,10 +137,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
      } catch (Exception e) {
        logger.error("Error in update task at commit " + instantTime, e);
        writeStatus.setGlobalError(e);
-        throw new HoodieUpsertException(
+        throw new HoodieUpsertException("Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
-            "Failed to initialize HoodieAppendHandle for FileId: " + fileId + " on commit "
+            + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath() + partitionPath, e);
                + instantTime + " on HDFS path " + hoodieTable.getMetaClient().getBasePath()
                + partitionPath, e);
      }
      Path path = new Path(partitionPath, writer.getLogFile().getFileName());
      writeStatus.getStat().setPath(path.toString());
@@ -155,13 +153,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
      if (avroRecord.isPresent()) {
        // Convert GenericRecord to GenericRecord with hoodie commit metadata in schema
        avroRecord = Option.of(rewriteRecord((GenericRecord) avroRecord.get()));
-        String seqId = HoodieRecord.generateSequenceId(instantTime, TaskContext.getPartitionId(),
+        String seqId =
-            recordIndex.getAndIncrement());
+            HoodieRecord.generateSequenceId(instantTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement());
-        HoodieAvroUtils
+        HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
            .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
            hoodieRecord.getPartitionPath(), fileId);
-        HoodieAvroUtils
+        HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord.get(), instantTime, seqId);
            .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), instantTime, seqId);
        // If currentLocation is present, then this is an update
        if (hoodieRecord.getCurrentLocation() != null) {
          updatedRecordsWritten++;
@@ -208,20 +204,18 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
        recordList.clear();
      }
      if (keysToDelete.size() > 0) {
-        writer = writer.appendBlock(
+        writer = writer.appendBlock(new HoodieDeleteBlock(keysToDelete.stream().toArray(HoodieKey[]::new), header));
            new HoodieDeleteBlock(keysToDelete.stream().toArray(HoodieKey[]::new), header));
        keysToDelete.clear();
      }
    } catch (Exception e) {
-      throw new HoodieAppendException(
+      throw new HoodieAppendException("Failed while appending records to " + currentLogFile.getPath(), e);
          "Failed while appending records to " + currentLogFile.getPath(), e);
    }
  }
  @Override
  public boolean canWrite(HoodieRecord record) {
-    return config.getParquetMaxFileSize() >= estimatedNumberOfBytesWritten * config
+    return config.getParquetMaxFileSize() >= estimatedNumberOfBytesWritten
-        .getLogFileToParquetCompressionRatio();
+        * config.getLogFileToParquetCompressionRatio();
  }
  @Override
@@ -262,8 +256,8 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
      runtimeStats.setTotalUpsertTime(timer.endTimer());
      stat.setRuntimeStats(runtimeStats);
-      logger.info(String.format("AppendHandle for partitionPath %s fileID %s, took %d ms.",
+      logger.info(String.format("AppendHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(),
-          stat.getPartitionPath(), stat.getFileId(), runtimeStats.getTotalUpsertTime()));
+          stat.getFileId(), runtimeStats.getTotalUpsertTime()));
      return writeStatus;
    } catch (IOException e) {
@@ -282,13 +276,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieWri
    return HoodieLogFormat.newWriterBuilder()
        .onParentPath(FSUtils.getPartitionPath(hoodieTable.getMetaClient().getBasePath(), partitionPath))
-        .withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion(
+        .withFileId(fileId).overBaseCommit(baseCommitTime)
-            latestLogFile.map(HoodieLogFile::getLogVersion).orElse(HoodieLogFile.LOGFILE_BASE_VERSION))
+        .withLogVersion(latestLogFile.map(HoodieLogFile::getLogVersion).orElse(HoodieLogFile.LOGFILE_BASE_VERSION))
        .withSizeThreshold(config.getLogFileMaxSize()).withFs(fs)
-        .withLogWriteToken(
+        .withLogWriteToken(latestLogFile.map(x -> FSUtils.getWriteTokenFromLogPath(x.getPath())).orElse(writeToken))
-            latestLogFile.map(x -> FSUtils.getWriteTokenFromLogPath(x.getPath())).orElse(writeToken))
+        .withRolloverLogWriteToken(writeToken).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
        .withRolloverLogWriteToken(writeToken)
        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
  }
  private void writeToBuffer(HoodieRecord<T> record) {
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieCleanHelper.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieCleanHelper.java
@@ -45,9 +45,12 @@ import org.apache.log4j.Logger;
 /**
 * Cleaner is responsible for garbage collecting older files in a given partition path, such that
- * <p> 1) It provides sufficient time for existing queries running on older versions, to close <p>
+ * <p>
- * 2) It bounds the growth of the files in the file system <p> TODO: Should all cleaning be done
+ * 1) It provides sufficient time for existing queries running on older versions, to close
- * based on {@link HoodieCommitMetadata}
+ * <p>
 * 2) It bounds the growth of the files in the file system
 * <p>
 * TODO: Should all cleaning be done based on {@link HoodieCommitMetadata}
 */
 public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
@@ -66,22 +69,21 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
    this.config = config;
    this.fgIdToPendingCompactionOperations =
        ((SyncableFileSystemView) hoodieTable.getRTFileSystemView()).getPendingCompactionOperations()
-            .map(entry -> Pair.of(new HoodieFileGroupId(entry.getValue().getPartitionPath(),
+            .map(entry -> Pair.of(
-                    entry.getValue().getFileId()), entry.getValue()))
+                new HoodieFileGroupId(entry.getValue().getPartitionPath(), entry.getValue().getFileId()),
                entry.getValue()))
            .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
  }
  /**
-   * Selects the older versions of files for cleaning, such that it bounds the number of versions of
+   * Selects the older versions of files for cleaning, such that it bounds the number of versions of each file. This
-   * each file. This policy is useful, if you are simply interested in querying the table, and you
+   * policy is useful, if you are simply interested in querying the table, and you don't want too many versions for a
-   * don't want too many versions for a single file (i.e run it with versionsRetained = 1)
+   * single file (i.e run it with versionsRetained = 1)
   */
-  private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
+  private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath) throws IOException {
-      throws IOException {
+    logger.info("Cleaning " + partitionPath + ", retaining latest " + config.getCleanerFileVersionsRetained()
-    logger.info("Cleaning " + partitionPath + ", retaining latest " + config
+        + " file versions. ");
-        .getCleanerFileVersionsRetained() + " file versions. ");
+    List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath).collect(Collectors.toList());
    List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath)
        .collect(Collectors.toList());
    List<String> deletePaths = new ArrayList<>();
    // Collect all the datafiles savepointed by all the savepoints
    List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
@@ -90,8 +92,8 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
    for (HoodieFileGroup fileGroup : fileGroups) {
      int keepVersions = config.getCleanerFileVersionsRetained();
      // do not cleanup slice required for pending compaction
-      Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices()
+      Iterator<FileSlice> fileSliceIterator =
-          .filter(fs -> !isFileSliceNeededForPendingCompaction(fs)).iterator();
+          fileGroup.getAllFileSlices().filter(fs -> !isFileSliceNeededForPendingCompaction(fs)).iterator();
      if (isFileGroupInPendingCompaction(fileGroup)) {
        // We have already saved the last version of file-groups for pending compaction Id
        keepVersions--;
@@ -116,8 +118,8 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
        }
        if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
          // If merge on read, then clean the log files for the commits as well
-          deletePaths.addAll(nextSlice.getLogFiles().map(file -> file.getPath().toString())
+          deletePaths
-              .collect(Collectors.toList()));
+              .addAll(nextSlice.getLogFiles().map(file -> file.getPath().toString()).collect(Collectors.toList()));
        }
      }
    }
@@ -126,21 +128,21 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
  /**
-   * Selects the versions for file for cleaning, such that it <p> - Leaves the latest version of the
+   * Selects the versions for file for cleaning, such that it
-   * file untouched - For older versions, - It leaves all the commits untouched which has occured in
+   * <p>
-   * last <code>config.getCleanerCommitsRetained()</code> commits - It leaves ONE commit before this
+   * - Leaves the latest version of the file untouched - For older versions, - It leaves all the commits untouched which
-   * window. We assume that the max(query execution time) == commit_batch_time *
+   * has occured in last <code>config.getCleanerCommitsRetained()</code> commits - It leaves ONE commit before this
-   * config.getCleanerCommitsRetained(). This is 12 hours by default. This is essential to leave the
+   * window. We assume that the max(query execution time) == commit_batch_time * config.getCleanerCommitsRetained().
-   * file used by the query thats running for the max time. <p> This provides the effect of having
+   * This is 12 hours by default. This is essential to leave the file used by the query thats running for the max time.
-   * lookback into all changes that happened in the last X commits. (eg: if you retain 24 commits,
+   * <p>
-   * and commit batch time is 30 mins, then you have 12 hrs of lookback) <p> This policy is the
+   * This provides the effect of having lookback into all changes that happened in the last X commits. (eg: if you
-   * default.
+   * retain 24 commits, and commit batch time is 30 mins, then you have 12 hrs of lookback)
   * <p>
   * This policy is the default.
   */
-  private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath)
+  private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath) throws IOException {
      throws IOException {
    int commitsRetained = config.getCleanerCommitsRetained();
-    logger
+    logger.info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
        .info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
    List<String> deletePaths = new ArrayList<>();
    // Collect all the datafiles savepointed by all the savepoints
@@ -150,8 +152,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
    // determine if we have enough commits, to start cleaning.
    if (commitTimeline.countInstants() > commitsRetained) {
      HoodieInstant earliestCommitToRetain = getEarliestCommitToRetain().get();
-      List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath)
+      List<HoodieFileGroup> fileGroups = fileSystemView.getAllFileGroups(partitionPath).collect(Collectors.toList());
          .collect(Collectors.toList());
      for (HoodieFileGroup fileGroup : fileGroups) {
        List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
@@ -160,8 +161,8 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
        }
        String lastVersion = fileSliceList.get(0).getBaseInstantTime();
-        String lastVersionBeforeEarliestCommitToRetain = getLatestVersionBeforeCommit(fileSliceList,
+        String lastVersionBeforeEarliestCommitToRetain =
-            earliestCommitToRetain);
+            getLatestVersionBeforeCommit(fileSliceList, earliestCommitToRetain);
        // Ensure there are more than 1 version of the file (we only clean old files from updates)
        // i.e always spare the last commit.
@@ -183,16 +184,14 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
          }
          // Always keep the last commit
-          if (!isFileSliceNeededForPendingCompaction(aSlice)
+          if (!isFileSliceNeededForPendingCompaction(aSlice) && HoodieTimeline
-              && HoodieTimeline
+              .compareTimestamps(earliestCommitToRetain.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
              .compareTimestamps(earliestCommitToRetain.getTimestamp(), fileCommitTime,
                  HoodieTimeline.GREATER)) {
            // this is a commit, that should be cleaned.
            aFile.ifPresent(hoodieDataFile -> deletePaths.add(hoodieDataFile.getPath()));
            if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
              // If merge on read, then clean the log files for the commits as well
-              deletePaths.addAll(aSlice.getLogFiles().map(file -> file.getPath().toString())
+              deletePaths
-                  .collect(Collectors.toList()));
+                  .addAll(aSlice.getLogFiles().map(file -> file.getPath().toString()).collect(Collectors.toList()));
            }
          }
        }
@@ -205,12 +204,10 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
  /**
   * Gets the latest version < commitTime. This version file could still be used by queries.
   */
-  private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
+  private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList, HoodieInstant commitTime) {
      HoodieInstant commitTime) {
    for (FileSlice file : fileSliceList) {
      String fileCommitTime = file.getBaseInstantTime();
-      if (HoodieTimeline
+      if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
          .compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
        // fileList is sorted on the reverse, so the first commit we find <= commitTime is the
        // one we want
        return fileCommitTime;
@@ -246,14 +243,14 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
    int commitsRetained = config.getCleanerCommitsRetained();
    if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
        && commitTimeline.countInstants() > commitsRetained) {
-      earliestCommitToRetain = commitTimeline
+      earliestCommitToRetain = commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
          .nthInstant(commitTimeline.countInstants() - commitsRetained);
    }
    return earliestCommitToRetain;
  }
  /**
   * Determine if file slice needed to be preserved for pending compaction
   * 
   * @param fileSlice File Slice
   * @return true if file slice needs to be preserved, false otherwise.
   */
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieCommitArchiveLog.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieCommitArchiveLog.java
@@ -83,9 +83,8 @@ public class HoodieCommitArchiveLog {
    try {
      if (this.writer == null) {
        return HoodieLogFormat.newWriterBuilder().onParentPath(archiveFilePath.getParent())
-            .withFileId(archiveFilePath.getName())
+            .withFileId(archiveFilePath.getName()).withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION)
-            .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFs(metaClient.getFs())
+            .withFs(metaClient.getFs()).overBaseCommit("").build();
            .overBaseCommit("").build();
      } else {
        return this.writer;
      }
@@ -137,8 +136,7 @@ public class HoodieCommitArchiveLog {
    // TODO: Handle ROLLBACK_ACTION in future
    // ROLLBACK_ACTION is currently not defined in HoodieActiveTimeline
    HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline()
-        .getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION))
+        .getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION)).filterCompletedInstants();
        .filterCompletedInstants();
    Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
        .collect(Collectors.groupingBy(s -> s.getAction())).entrySet().stream().map(i -> {
          if (i.getValue().size() > maxCommitsToKeep) {
@@ -159,20 +157,16 @@ public class HoodieCommitArchiveLog {
    Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
    if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
      // Actually do the commits
-      instants = Stream.concat(instants, commitTimeline.getInstants()
+      instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
          .filter(s -> {
        // if no savepoint present, then dont filter
-            return !(firstSavepoint.isPresent() && HoodieTimeline
+        return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(),
-                .compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
+            s.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL));
-                    HoodieTimeline.LESSER_OR_EQUAL));
+      }).filter(s -> {
          })
          .filter(s -> {
        // Ensure commits >= oldest pending compaction commit is retained
        return oldestPendingCompactionInstant.map(instant -> {
          return HoodieTimeline.compareTimestamps(instant.getTimestamp(), s.getTimestamp(), HoodieTimeline.GREATER);
        }).orElse(true);
-          })
+      }).limit(commitTimeline.countInstants() - minCommitsToKeep));
          .limit(commitTimeline.countInstants() - minCommitsToKeep));
    }
    return instants;
@@ -194,12 +188,9 @@ public class HoodieCommitArchiveLog {
    }
    // Remove older meta-data from auxiliary path too
-    Option<HoodieInstant> latestCommitted =
+    Option<HoodieInstant> latestCommitted = Option.fromJavaOptional(archivedInstants.stream().filter(i -> {
-            Option.fromJavaOptional(archivedInstants.stream()
+      return i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
-                    .filter(i -> {
+          || (i.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION)));
                      return i.isCompleted()
                              && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION) || (i.getAction().equals(
                          HoodieTimeline.DELTA_COMMIT_ACTION)));
    }).max(Comparator.comparing(HoodieInstant::getTimestamp)));
    if (latestCommitted.isPresent()) {
      success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get());
@@ -214,12 +205,9 @@ public class HoodieCommitArchiveLog {
   * @return success if all eligible file deleted successfully
   * @throws IOException in case of error
   */
-  private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant)
+  private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
-      throws IOException {
+    List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
-    List<HoodieInstant> instants =
+        new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
        HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
            new Path(metaClient.getMetaAuxiliaryPath()),
            HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
    List<HoodieInstant> instantsToBeDeleted =
        instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
@@ -239,8 +227,7 @@ public class HoodieCommitArchiveLog {
  public void archive(List<HoodieInstant> instants) throws HoodieCommitException {
    try {
-      HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getAllCommitsTimeline()
+      HoodieTimeline commitTimeline = metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
          .filterCompletedInstants();
      Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
      log.info("Wrapper schema " + wrapperSchema.toString());
      List<IndexedRecord> records = new ArrayList<>();
@@ -277,15 +264,14 @@ public class HoodieCommitArchiveLog {
    }
  }
-  private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline,
+  private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline, HoodieInstant hoodieInstant)
-      HoodieInstant hoodieInstant) throws IOException {
+      throws IOException {
    HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
    archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
    switch (hoodieInstant.getAction()) {
      case HoodieTimeline.CLEAN_ACTION: {
        archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils
-            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class));
                HoodieCleanMetadata.class));
        archivedMetaWrapper.setActionType(ActionType.clean.name());
        break;
      }
@@ -297,16 +283,14 @@ public class HoodieCommitArchiveLog {
        break;
      }
      case HoodieTimeline.ROLLBACK_ACTION: {
-        archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils
+        archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils.deserializeAvroMetadata(
-            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+            commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
                HoodieRollbackMetadata.class));
        archivedMetaWrapper.setActionType(ActionType.rollback.name());
        break;
      }
      case HoodieTimeline.SAVEPOINT_ACTION: {
-        archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils
+        archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils.deserializeAvroMetadata(
-            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+            commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
                HoodieSavepointMetadata.class));
        archivedMetaWrapper.setActionType(ActionType.savepoint.name());
        break;
      }
@@ -328,8 +312,8 @@ public class HoodieCommitArchiveLog {
    ObjectMapper mapper = new ObjectMapper();
    // Need this to ignore other public get() methods
    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-    org.apache.hudi.avro.model.HoodieCommitMetadata avroMetaData = mapper
+    org.apache.hudi.avro.model.HoodieCommitMetadata avroMetaData =
-        .convertValue(hoodieCommitMetadata, org.apache.hudi.avro.model.HoodieCommitMetadata.class);
+        mapper.convertValue(hoodieCommitMetadata, org.apache.hudi.avro.model.HoodieCommitMetadata.class);
    // Do not archive Rolling Stats, cannot set to null since AVRO will throw null pointer
    avroMetaData.getExtraMetadata().put(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY, "");
    return avroMetaData;
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
@@ -66,11 +66,10 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
      partitionMetadata.trySave(TaskContext.getPartitionId());
      createMarkerFile(partitionPath);
-      this.storageWriter = HoodieStorageWriterFactory
+      this.storageWriter =
-          .getStorageWriter(commitTime, path, hoodieTable, config, writerSchema);
+          HoodieStorageWriterFactory.getStorageWriter(commitTime, path, hoodieTable, config, writerSchema);
    } catch (IOException e) {
-      throw new HoodieInsertException(
+      throw new HoodieInsertException("Failed to initialize HoodieStorageWriter for path " + path, e);
          "Failed to initialize HoodieStorageWriter for path " + path, e);
    }
    logger.info("New CreateHandle for partition :" + partitionPath + " with fileId " + fileId);
  }
@@ -136,8 +135,7 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
        }
      }
    } catch (IOException io) {
-      throw new HoodieInsertException(
+      throw new HoodieInsertException("Failed to insert records for path " + path, io);
          "Failed to insert records for path " + path, io);
    }
  }
@@ -151,8 +149,8 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
   */
  @Override
  public WriteStatus close() {
-    logger.info("Closing the file " + writeStatus.getFileId() + " as we are done with all the records "
+    logger
-        + recordsWritten);
+        .info("Closing the file " + writeStatus.getFileId() + " as we are done with all the records " + recordsWritten);
    try {
      storageWriter.close();
@@ -174,8 +172,8 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieWri
      stat.setRuntimeStats(runtimeStats);
      writeStatus.setStat(stat);
-      logger.info(String.format("CreateHandle for partitionPath %s fileID %s, took %d ms.",
+      logger.info(String.format("CreateHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(),
-          stat.getPartitionPath(), stat.getFileId(), runtimeStats.getTotalCreateTime()));
+          stat.getFileId(), runtimeStats.getTotalCreateTime()));
      return writeStatus;
    } catch (IOException e) {
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
@@ -67,15 +67,15 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
  /**
   * Given a list of row keys and one file, return only row keys existing in that file.
   */
-  public static List<String> checkCandidatesAgainstFile(Configuration configuration,
+  public static List<String> checkCandidatesAgainstFile(Configuration configuration, List<String> candidateRecordKeys,
-      List<String> candidateRecordKeys, Path filePath) throws HoodieIndexException {
+      Path filePath) throws HoodieIndexException {
    List<String> foundRecordKeys = new ArrayList<>();
    try {
      // Load all rowKeys from the file, to double-confirm
      if (!candidateRecordKeys.isEmpty()) {
        HoodieTimer timer = new HoodieTimer().startTimer();
-        Set<String> fileRowKeys = ParquetUtils.filterParquetRowKeys(configuration, filePath,
+        Set<String> fileRowKeys =
-            new HashSet<>(candidateRecordKeys));
+            ParquetUtils.filterParquetRowKeys(configuration, filePath, new HashSet<>(candidateRecordKeys));
        foundRecordKeys.addAll(fileRowKeys);
        logger.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
            timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
@@ -112,11 +112,11 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload> extends Hoodie
    }
    HoodieDataFile dataFile = getLatestDataFile();
-    List<String> matchingKeys = checkCandidatesAgainstFile(hoodieTable.getHadoopConf(), candidateRecordKeys,
+    List<String> matchingKeys =
-        new Path(dataFile.getPath()));
+        checkCandidatesAgainstFile(hoodieTable.getHadoopConf(), candidateRecordKeys, new Path(dataFile.getPath()));
-    logger.info(String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)",
+    logger.info(
-        totalKeysChecked, candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(),
+        String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)", totalKeysChecked,
-        matchingKeys.size()));
+            candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(), matchingKeys.size()));
    return new KeyLookupResult(partitionPathFilePair.getRight(), partitionPathFilePair.getLeft(),
        dataFile.getCommitTime(), matchingKeys);
  }
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -71,8 +71,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
      Iterator<HoodieRecord<T>> recordItr, String fileId) {
    super(config, commitTime, fileId, hoodieTable);
    String partitionPath = init(fileId, recordItr);
-    init(fileId, partitionPath,
+    init(fileId, partitionPath, hoodieTable.getROFileSystemView().getLatestDataFile(partitionPath, fileId).get());
        hoodieTable.getROFileSystemView().getLatestDataFile(partitionPath, fileId).get());
  }
  /**
@@ -83,8 +82,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
    super(config, commitTime, fileId, hoodieTable);
    this.keyToNewRecords = keyToNewRecords;
    this.useWriterSchema = true;
-    init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get())
+    init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get()).getPartitionPath(),
-        .getPartitionPath(), dataFileToBeMerged);
+        dataFileToBeMerged);
  }
@@ -160,14 +159,12 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
          new Path(config.getBasePath()), FSUtils.getPartitionPath(config.getBasePath(), partitionPath));
      partitionMetadata.trySave(TaskContext.getPartitionId());
-      oldFilePath = new Path(
+      oldFilePath = new Path(config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
          config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
      String relativePath = new Path((partitionPath.isEmpty() ? "" : partitionPath + "/")
          + FSUtils.makeDataFileName(instantTime, writeToken, fileId)).toString();
      newFilePath = new Path(config.getBasePath(), relativePath);
-      logger.info(String
+      logger.info(String.format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(),
          .format("Merging new data into oldPath %s, as newPath %s", oldFilePath.toString(),
          newFilePath.toString()));
      // file name is same for all records, in this bunch
      writeStatus.setFileId(fileId);
@@ -180,13 +177,12 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
      createMarkerFile(partitionPath);
      // Create the writer for writing the new version file
-      storageWriter = HoodieStorageWriterFactory
+      storageWriter =
-          .getStorageWriter(instantTime, newFilePath, hoodieTable, config, writerSchema);
+          HoodieStorageWriterFactory.getStorageWriter(instantTime, newFilePath, hoodieTable, config, writerSchema);
    } catch (IOException io) {
      logger.error("Error in update task at commit " + instantTime, io);
      writeStatus.setGlobalError(io);
-      throw new HoodieUpsertException(
+      throw new HoodieUpsertException("Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
          "Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
          + instantTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
    }
  }
@@ -217,10 +213,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
    logger.info("Number of entries in MemoryBasedMap => "
        + ((ExternalSpillableMap) keyToNewRecords).getInMemoryMapNumEntries()
        + "Total size in bytes of MemoryBasedMap => "
-        + ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize()
+        + ((ExternalSpillableMap) keyToNewRecords).getCurrentInMemoryMapSize() + "Number of entries in DiskBasedMap => "
-        + "Number of entries in DiskBasedMap => "
+        + ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries() + "Size of file spilled to disk => "
        + ((ExternalSpillableMap) keyToNewRecords).getDiskBasedMapNumEntries()
        + "Size of file spilled to disk => "
        + ((ExternalSpillableMap) keyToNewRecords).getSizeOfFileOnDiskInBytes());
    return partitionPath;
  }
@@ -258,8 +252,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
  }
  /**
-   * Go through an old record. Here if we detect a newer version shows up, we write the new one to
+   * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
   * the file.
   */
  public void write(GenericRecord oldRecord) {
    String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
@@ -269,12 +262,12 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
      // writing the first record. So make a copy of the record to be merged
      HoodieRecord<T> hoodieRecord = new HoodieRecord<>(keyToNewRecords.get(key));
      try {
-        Option<IndexedRecord> combinedAvroRecord = hoodieRecord.getData()
+        Option<IndexedRecord> combinedAvroRecord =
-            .combineAndGetUpdateValue(oldRecord, useWriterSchema ? writerSchema : originalSchema);
+            hoodieRecord.getData().combineAndGetUpdateValue(oldRecord, useWriterSchema ? writerSchema : originalSchema);
        if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
-          /* ONLY WHEN
+          /*
-           * 1) we have an update for this key AND
+           * ONLY WHEN 1) we have an update for this key AND 2) We are able to successfully write the the combined new
-           * 2) We are able to successfully write the the combined new value
+           * value
           *
           * We no longer need to copy the old record over.
           */
@@ -282,26 +275,24 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
        }
        writtenRecordKeys.add(key);
      } catch (Exception e) {
-        throw new HoodieUpsertException(
+        throw new HoodieUpsertException("Failed to combine/merge new record with old value in storage, for new record {"
            "Failed to combine/merge new record with old value in storage, for new record {"
            + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
      }
    }
    if (copyOldRecord) {
      // this should work as it is, since this is an existing record
-      String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
+      String errMsg = "Failed to merge old record into new file for key " + key + " from old file " + getOldFilePath()
-          + getOldFilePath() + " to new file " + newFilePath;
+          + " to new file " + newFilePath;
      try {
        storageWriter.writeAvro(key, oldRecord);
      } catch (ClassCastException e) {
-        logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file "
+        logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file " + getOldFilePath()
-            + getOldFilePath() + " to file " + newFilePath + " with writerSchema " + writerSchema
+            + " to file " + newFilePath + " with writerSchema " + writerSchema.toString(true));
            .toString(true));
        throw new HoodieUpsertException(errMsg, e);
      } catch (IOException e) {
-        logger.error("Failed to merge old record into new file for key " + key + " from old file "
+        logger.error("Failed to merge old record into new file for key " + key + " from old file " + getOldFilePath()
-            + getOldFilePath() + " to new file " + newFilePath, e);
+            + " to new file " + newFilePath, e);
        throw new HoodieUpsertException(errMsg, e);
      }
      recordsWritten++;
@@ -344,8 +335,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieWrit
      runtimeStats.setTotalUpsertTime(timer.endTimer());
      stat.setRuntimeStats(runtimeStats);
-      logger.info(String.format("MergeHandle for partitionPath %s fileID %s, took %d ms.",
+      logger.info(String.format("MergeHandle for partitionPath %s fileID %s, took %d ms.", stat.getPartitionPath(),
-          stat.getPartitionPath(), stat.getFileId(), runtimeStats.getTotalUpsertTime()));
+          stat.getFileId(), runtimeStats.getTotalUpsertTime()));
      return writeStatus;
    } catch (IOException e) {
--- a/hudi-client/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
@@ -61,8 +61,7 @@ public abstract class HoodieWriteHandle<T extends HoodieRecordPayload> extends H
    this.writerSchema = createHoodieWriteSchema(originalSchema);
    this.timer = new HoodieTimer().startTimer();
    this.writeStatus = (WriteStatus) ReflectionUtils.loadClass(config.getWriteStatusClassName(),
-        !hoodieTable.getIndex().isImplicitWithStorage(),
+        !hoodieTable.getIndex().isImplicitWithStorage(), config.getWriteStatusFailureFraction());
        config.getWriteStatusFailureFraction());
  }
  /**
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieCompactor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieCompactor.java
@@ -45,15 +45,12 @@ public interface HoodieCompactor extends Serializable {
   * @return Compaction Plan
   * @throws IOException when encountering errors
   */
-  HoodieCompactionPlan generateCompactionPlan(JavaSparkContext jsc,
+  HoodieCompactionPlan generateCompactionPlan(JavaSparkContext jsc, HoodieTable hoodieTable, HoodieWriteConfig config,
-      HoodieTable hoodieTable, HoodieWriteConfig config, String compactionCommitTime,
+      String compactionCommitTime, Set<HoodieFileGroupId> fgIdsInPendingCompactions) throws IOException;
      Set<HoodieFileGroupId> fgIdsInPendingCompactions)
      throws IOException;
  /**
   * Execute compaction operations and report back status
   */
-  JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
+  JavaRDD<WriteStatus> compact(JavaSparkContext jsc, HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable,
-      HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
+      HoodieWriteConfig config, String compactionInstantTime) throws IOException;
      String compactionInstantTime) throws IOException;
 }
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/HoodieRealtimeTableCompactor.java
@@ -63,9 +63,9 @@ import org.apache.spark.util.AccumulatorV2;
 import org.apache.spark.util.LongAccumulator;
 /**
- * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage. Computes all
+ * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage. Computes all possible compactions,
- * possible compactions, passes it through a CompactionFilter and executes all the compactions and
+ * passes it through a CompactionFilter and executes all the compactions and writes a new version of base files and make
- * writes a new version of base files and make a normal commit
+ * a normal commit
 *
 * @see HoodieCompactor
 */
@@ -78,9 +78,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
  private AccumulatorV2<Long, Long> totalFileSlices;
  @Override
-  public JavaRDD<WriteStatus> compact(JavaSparkContext jsc,
+  public JavaRDD<WriteStatus> compact(JavaSparkContext jsc, HoodieCompactionPlan compactionPlan,
-      HoodieCompactionPlan compactionPlan, HoodieTable hoodieTable, HoodieWriteConfig config,
+      HoodieTable hoodieTable, HoodieWriteConfig config, String compactionInstantTime) throws IOException {
      String compactionInstantTime) throws IOException {
    if (compactionPlan == null || (compactionPlan.getOperations() == null)
        || (compactionPlan.getOperations().isEmpty())) {
      return jsc.emptyRDD();
@@ -88,41 +87,36 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
    // Compacting is very similar to applying updates to existing file
    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc);
-    List<CompactionOperation> operations = compactionPlan.getOperations().stream().map(
+    List<CompactionOperation> operations = compactionPlan.getOperations().stream()
-            CompactionOperation::convertFromAvroRecordInstance).collect(toList());
+        .map(CompactionOperation::convertFromAvroRecordInstance).collect(toList());
    log.info("Compactor compacting " + operations + " files");
    return jsc.parallelize(operations, operations.size())
-        .map(s -> compact(table, metaClient, config, s, compactionInstantTime))
+        .map(s -> compact(table, metaClient, config, s, compactionInstantTime)).flatMap(List::iterator);
        .flatMap(List::iterator);
  }
  private List<WriteStatus> compact(HoodieCopyOnWriteTable hoodieCopyOnWriteTable, HoodieTableMetaClient metaClient,
-      HoodieWriteConfig config,
+      HoodieWriteConfig config, CompactionOperation operation, String commitTime) throws IOException {
      CompactionOperation operation, String commitTime) throws IOException {
    FileSystem fs = metaClient.getFs();
-    Schema readerSchema = HoodieAvroUtils
+    Schema readerSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
        .addMetadataFields(new Schema.Parser().parse(config.getSchema()));
-    log.info("Compacting base " + operation.getDataFilePath() + " with delta files " + operation
+    log.info("Compacting base " + operation.getDataFilePath() + " with delta files " + operation.getDeltaFilePaths()
-        .getDeltaFilePaths() + " for commit " + commitTime);
+        + " for commit " + commitTime);
    // TODO - FIX THIS
    // Reads the entire avro file. Always only specific blocks should be read from the avro file
    // (failure recover).
    // Load all the delta commits since the last compaction commit and get all the blocks to be
    // loaded and load it using CompositeAvroLogReader
    // Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
-    String maxInstantTime = metaClient.getActiveTimeline()
+    String maxInstantTime = metaClient
-        .getTimelineOfActions(
+        .getActiveTimeline().getTimelineOfActions(Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
-            Sets.newHashSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.ROLLBACK_ACTION,
+            HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION))
                HoodieTimeline.DELTA_COMMIT_ACTION))
        .filterCompletedInstants().lastInstant().get().getTimestamp();
    log.info("MaxMemoryPerCompaction => " + config.getMaxMemoryPerCompaction());
-    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, metaClient.getBasePath(),
-        metaClient.getBasePath(), operation.getDeltaFilePaths(), readerSchema, maxInstantTime,
+        operation.getDeltaFilePaths(), readerSchema, maxInstantTime, config.getMaxMemoryPerCompaction(),
-        config.getMaxMemoryPerCompaction(), config.getCompactionLazyBlockReadEnabled(),
+        config.getCompactionLazyBlockReadEnabled(), config.getCompactionReverseLogReadEnabled(),
-        config.getCompactionReverseLogReadEnabled(), config.getMaxDFSStreamBufferSize(),
+        config.getMaxDFSStreamBufferSize(), config.getSpillableMapBasePath());
        config.getSpillableMapBasePath());
    if (!scanner.iterator().hasNext()) {
      return Lists.<WriteStatus>newArrayList();
    }
@@ -134,21 +128,20 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    // If the dataFile is present, there is a base parquet file present, perform updates else perform inserts into a
    // new base parquet file.
    if (oldDataFileOpt.isPresent()) {
-      result = hoodieCopyOnWriteTable
+      result = hoodieCopyOnWriteTable.handleUpdate(commitTime, operation.getFileId(), scanner.getRecords(),
-          .handleUpdate(commitTime, operation.getFileId(), scanner.getRecords(), oldDataFileOpt.get());
+          oldDataFileOpt.get());
    } else {
-      result = hoodieCopyOnWriteTable
+      result = hoodieCopyOnWriteTable.handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(),
-          .handleInsert(commitTime, operation.getPartitionPath(), operation.getFileId(), scanner.iterator());
+          scanner.iterator());
    }
    Iterable<List<WriteStatus>> resultIterable = () -> result;
-    return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream)
+    return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream).peek(s -> {
        .peek(s -> {
      s.getStat().setTotalUpdatedRecordsCompacted(scanner.getNumMergedRecordsInLog());
      s.getStat().setTotalLogFilesCompacted(scanner.getTotalLogFiles());
      s.getStat().setTotalLogRecords(scanner.getTotalLogRecords());
      s.getStat().setPartitionPath(operation.getPartitionPath());
-          s.getStat().setTotalLogSizeCompacted(operation.getMetrics().get(
+      s.getStat()
-              CompactionStrategy.TOTAL_LOG_FILE_SIZE).longValue());
+          .setTotalLogSizeCompacted(operation.getMetrics().get(CompactionStrategy.TOTAL_LOG_FILE_SIZE).longValue());
      s.getStat().setTotalLogBlocks(scanner.getTotalLogBlocks());
      s.getStat().setTotalCorruptLogBlock(scanner.getTotalCorruptBlocks());
      s.getStat().setTotalRollbackBlocks(scanner.getTotalRollbacks());
@@ -159,27 +152,24 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
  }
  @Override
-  public HoodieCompactionPlan generateCompactionPlan(JavaSparkContext jsc,
+  public HoodieCompactionPlan generateCompactionPlan(JavaSparkContext jsc, HoodieTable hoodieTable,
-      HoodieTable hoodieTable, HoodieWriteConfig config, String compactionCommitTime,
+      HoodieWriteConfig config, String compactionCommitTime, Set<HoodieFileGroupId> fgIdsInPendingCompactions)
-      Set<HoodieFileGroupId> fgIdsInPendingCompactions) throws IOException {
+      throws IOException {
    totalLogFiles = new LongAccumulator();
    totalFileSlices = new LongAccumulator();
    jsc.sc().register(totalLogFiles);
    jsc.sc().register(totalFileSlices);
-    Preconditions
+    Preconditions.checkArgument(hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ,
-        .checkArgument(hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ,
+        "HoodieRealtimeTableCompactor can only compact table of type " + HoodieTableType.MERGE_ON_READ + " and not "
-            "HoodieRealtimeTableCompactor can only compact table of type "
+            + hoodieTable.getMetaClient().getTableType().name());
                + HoodieTableType.MERGE_ON_READ + " and not " + hoodieTable.getMetaClient()
                .getTableType().name());
    // TODO : check if maxMemory is not greater than JVM or spark.executor memory
    // TODO - rollback any compactions in flight
    HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
    log.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommitTime);
-    List<String> partitionPaths = FSUtils
+    List<String> partitionPaths = FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
        .getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
        config.shouldAssumeDatePartitioning());
    // filter the partition paths if needed to reduce list status
@@ -192,16 +182,12 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    RealtimeView fileSystemView = hoodieTable.getRTFileSystemView();
    log.info("Compaction looking for files to compact in " + partitionPaths + " partitions");
-    List<HoodieCompactionOperation> operations =
+    List<HoodieCompactionOperation> operations = jsc.parallelize(partitionPaths, partitionPaths.size())
        jsc.parallelize(partitionPaths, partitionPaths.size())
        .flatMap((FlatMapFunction<String, CompactionOperation>) partitionPath -> fileSystemView
            .getLatestFileSlices(partitionPath)
-                .filter(slice ->
+            .filter(slice -> !fgIdsInPendingCompactions.contains(slice.getFileGroupId())).map(s -> {
-                    !fgIdsInPendingCompactions.contains(slice.getFileGroupId()))
+              List<HoodieLogFile> logFiles =
-                .map(
+                  s.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
                    s -> {
                      List<HoodieLogFile> logFiles = s.getLogFiles().sorted(HoodieLogFile
                          .getLogFileComparator()).collect(Collectors.toList());
              totalLogFiles.add((long) logFiles.size());
              totalFileSlices.add(1L);
              // Avro generated classes are not inheriting Serializable. Using CompactionOperation POJO
@@ -210,10 +196,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
              Option<HoodieDataFile> dataFile = s.getDataFile();
              return new CompactionOperation(dataFile, partitionPath, logFiles,
                  config.getCompactionStrategy().captureMetrics(config, dataFile, partitionPath, logFiles));
-                    })
+            }).filter(c -> !c.getDeltaFilePaths().isEmpty()).collect(toList()).iterator())
-                .filter(c -> !c.getDeltaFilePaths().isEmpty())
+        .collect().stream().map(CompactionUtils::buildHoodieCompactionOperation).collect(toList());
                .collect(toList()).iterator()).collect().stream().map(CompactionUtils::buildHoodieCompactionOperation)
            .collect(toList());
    log.info("Total of " + operations.size() + " compactions are retrieved");
    log.info("Total number of latest files slices " + totalFileSlices.value());
    log.info("Total number of log files " + totalLogFiles.value());
@@ -222,11 +206,11 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    // compactions only
    HoodieCompactionPlan compactionPlan = config.getCompactionStrategy().generateCompactionPlan(config, operations,
        CompactionUtils.getAllPendingCompactionPlans(metaClient).stream().map(Pair::getValue).collect(toList()));
-    Preconditions.checkArgument(compactionPlan.getOperations().stream().noneMatch(
+    Preconditions.checkArgument(
        compactionPlan.getOperations().stream().noneMatch(
            op -> fgIdsInPendingCompactions.contains(new HoodieFileGroupId(op.getPartitionPath(), op.getFileId()))),
        "Bad Compaction Plan. FileId MUST NOT have multiple pending compactions. "
-            + "Please fix your strategy implementation."
+            + "Please fix your strategy implementation." + "FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions
            + "FileIdsWithPendingCompactions :" + fgIdsInPendingCompactions
            + ", Selected workload :" + compactionPlan);
    if (compactionPlan.getOperations().isEmpty()) {
      log.warn("After filtering, Nothing to compact for " + metaClient.getBasePath());
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/BoundedIOCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/BoundedIOCompactionStrategy.java
@@ -25,8 +25,8 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.config.HoodieWriteConfig;
 /**
- * CompactionStrategy which looks at total IO to be done for the compaction (read + write) and
+ * CompactionStrategy which looks at total IO to be done for the compaction (read + write) and limits the list of
- * limits the list of compactions to be under a configured limit on the IO
+ * compactions to be under a configured limit on the IO
 *
 * @see CompactionStrategy
 */
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/BoundedPartitionAwareCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/BoundedPartitionAwareCompactionStrategy.java
@@ -32,11 +32,10 @@ import org.apache.hudi.config.HoodieWriteConfig;
 /**
 * This strategy ensures that the last N partitions are picked up even if there are later partitions created for the
- * dataset. lastNPartitions is defined as the N partitions  before the currentDate.
+ * dataset. lastNPartitions is defined as the N partitions before the currentDate. currentDay = 2018/01/01 The dataset
- * currentDay = 2018/01/01
+ * has partitions for 2018/02/02 and 2018/03/03 beyond the currentDay This strategy will pick up the following
- * The dataset has partitions for 2018/02/02 and 2018/03/03 beyond the currentDay
+ * partitions for compaction : (2018/01/01, allPartitionsInRange[(2018/01/01 - lastNPartitions) to 2018/01/01),
- * This strategy will pick up the following partitions for compaction :
+ * 2018/02/02, 2018/03/03)
 * (2018/01/01, allPartitionsInRange[(2018/01/01 - lastNPartitions) to 2018/01/01), 2018/02/02, 2018/03/03)
 */
 public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionStrategy {
@@ -46,15 +45,14 @@ public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionS
  public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
      List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
    // The earliest partition to compact - current day minus the target partitions limit
-    String earliestPartitionPathToCompact = dateFormat.format(
+    String earliestPartitionPathToCompact =
-        getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
+        dateFormat.format(getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
    // Filter out all partitions greater than earliestPartitionPathToCompact
-    List<HoodieCompactionOperation> eligibleCompactionOperations = operations.stream()
+    List<HoodieCompactionOperation> eligibleCompactionOperations =
-        .collect(Collectors.groupingBy(HoodieCompactionOperation::getPartitionPath)).entrySet().stream()
+        operations.stream().collect(Collectors.groupingBy(HoodieCompactionOperation::getPartitionPath)).entrySet()
-        .sorted(Map.Entry.comparingByKey(comparator))
+            .stream().sorted(Map.Entry.comparingByKey(comparator))
            .filter(e -> comparator.compare(earliestPartitionPathToCompact, e.getKey()) >= 0)
-        .flatMap(e -> e.getValue().stream())
+            .flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
        .collect(Collectors.toList());
    return eligibleCompactionOperations;
  }
@@ -62,13 +60,12 @@ public class BoundedPartitionAwareCompactionStrategy extends DayBasedCompactionS
  @Override
  public List<String> filterPartitionPaths(HoodieWriteConfig writeConfig, List<String> partitionPaths) {
    // The earliest partition to compact - current day minus the target partitions limit
-    String earliestPartitionPathToCompact = dateFormat.format(
+    String earliestPartitionPathToCompact =
-        getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
+        dateFormat.format(getDateAtOffsetFromToday(-1 * writeConfig.getTargetPartitionsPerDayBasedCompaction()));
    // Get all partitions and sort them
    List<String> filteredPartitionPaths = partitionPaths.stream().map(partition -> partition.replace("/", "-"))
        .sorted(Comparator.reverseOrder()).map(partitionPath -> partitionPath.replace("-", "/"))
-        .filter(e -> comparator.compare(earliestPartitionPathToCompact, e) >= 0)
+        .filter(e -> comparator.compare(earliestPartitionPathToCompact, e) >= 0).collect(Collectors.toList());
        .collect(Collectors.toList());
    return filteredPartitionPaths;
  }
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/CompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/CompactionStrategy.java
@@ -32,11 +32,10 @@ import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.io.compact.HoodieRealtimeTableCompactor;
 /**
- * Strategy for compaction. Pluggable implementation to define how compaction should be done. The
+ * Strategy for compaction. Pluggable implementation to define how compaction should be done. The over-ridden
- * over-ridden implementations of this abstract class can capture the relevant metrics to order
+ * implementations of this abstract class can capture the relevant metrics to order and filter the final list of
- * and filter the final list of compaction operation to run in a single compaction.
+ * compaction operation to run in a single compaction. Implementation of CompactionStrategy cannot hold any state.
- * Implementation of CompactionStrategy cannot hold any state. Difference instantiations can be
+ * Difference instantiations can be passed in every time
 * passed in every time
 *
 * @see HoodieRealtimeTableCompactor
 */
@@ -49,8 +48,8 @@ public abstract class CompactionStrategy implements Serializable {
  public static final String TOTAL_LOG_FILES = "TOTAL_LOG_FILES";
  /**
-   * Callback hook when a HoodieCompactionOperation is created. Individual strategies can capture the
+   * Callback hook when a HoodieCompactionOperation is created. Individual strategies can capture the metrics they need
-   * metrics they need to decide on the priority.
+   * to decide on the priority.
   *
   * @param dataFile - Base file to compact
   * @param partitionPath - Partition path
@@ -65,11 +64,11 @@ public abstract class CompactionStrategy implements Serializable {
    Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(size -> size >= 0)
        .reduce((size1, size2) -> size1 + size2).orElse(0L);
    // Total read will be the base file + all the log files
-    Long totalIORead = FSUtils.getSizeInMB((dataFile.isPresent() ? dataFile.get().getFileSize() : 0L)
+    Long totalIORead =
-        + totalLogFileSize);
+        FSUtils.getSizeInMB((dataFile.isPresent() ? dataFile.get().getFileSize() : 0L) + totalLogFileSize);
    // Total write will be similar to the size of the base file
-    Long totalIOWrite = FSUtils
+    Long totalIOWrite =
-        .getSizeInMB(dataFile.isPresent() ? dataFile.get().getFileSize() : defaultMaxParquetFileSize);
+        FSUtils.getSizeInMB(dataFile.isPresent() ? dataFile.get().getFileSize() : defaultMaxParquetFileSize);
    // Total IO will the the IO for read + write
    Long totalIO = totalIORead + totalIOWrite;
    // Save these metrics and we will use during the filter
@@ -95,8 +94,7 @@ public abstract class CompactionStrategy implements Serializable {
      List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
    // Strategy implementation can overload this method to set specific compactor-id
    return HoodieCompactionPlan.newBuilder()
-        .setOperations(orderAndFilter(writeConfig, operations, pendingCompactionPlans))
+        .setOperations(orderAndFilter(writeConfig, operations, pendingCompactionPlans)).build();
        .build();
  }
  /**
@@ -109,13 +107,13 @@ public abstract class CompactionStrategy implements Serializable {
   * @return list of compactions to perform in this run
   */
  public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
-      List<HoodieCompactionOperation> operations,
+      List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
      List<HoodieCompactionPlan> pendingCompactionPlans) {
    return operations;
  }
  /**
   * Filter the partition paths based on compaction strategy
   * 
   * @param writeConfig
   * @param allPartitionPaths
   * @return
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/DayBasedCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/DayBasedCompactionStrategy.java
@@ -34,21 +34,18 @@ import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 /**
- * This strategy orders compactions in reverse order of creation of Hive Partitions. It helps to
+ * This strategy orders compactions in reverse order of creation of Hive Partitions. It helps to compact data in latest
- * compact data in latest partitions first and then older capped at the Total_IO allowed.
+ * partitions first and then older capped at the Total_IO allowed.
 */
 public class DayBasedCompactionStrategy extends CompactionStrategy {
  // For now, use SimpleDateFormat as default partition format
  protected static String datePartitionFormat = "yyyy/MM/dd";
  // Sorts compaction in LastInFirstCompacted order
-  protected static Comparator<String> comparator = (String leftPartition,
+  protected static Comparator<String> comparator = (String leftPartition, String rightPartition) -> {
      String rightPartition) -> {
    try {
-      Date left = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH)
+      Date left = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH).parse(leftPartition);
-          .parse(leftPartition);
+      Date right = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH).parse(rightPartition);
      Date right = new SimpleDateFormat(datePartitionFormat, Locale.ENGLISH)
          .parse(rightPartition);
      return left.after(right) ? -1 : right.after(left) ? 1 : 0;
    } catch (ParseException e) {
      throw new HoodieException("Invalid Partition Date Format", e);
@@ -68,8 +65,7 @@ public class DayBasedCompactionStrategy extends CompactionStrategy {
    List<HoodieCompactionOperation> filteredList = operations.stream()
        .collect(Collectors.groupingBy(HoodieCompactionOperation::getPartitionPath)).entrySet().stream()
        .sorted(Map.Entry.comparingByKey(comparator)).limit(writeConfig.getTargetPartitionsPerDayBasedCompaction())
-        .flatMap(e -> e.getValue().stream())
+        .flatMap(e -> e.getValue().stream()).collect(Collectors.toList());
        .collect(Collectors.toList());
    return filteredList;
  }
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
@@ -30,14 +30,14 @@ import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 /**
- * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and
+ * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and limits the
- * limits the compactions within a configured IO bound
+ * compactions within a configured IO bound
 *
 * @see BoundedIOCompactionStrategy
 * @see CompactionStrategy
 */
-public class LogFileSizeBasedCompactionStrategy extends BoundedIOCompactionStrategy implements
+public class LogFileSizeBasedCompactionStrategy extends BoundedIOCompactionStrategy
-    Comparator<HoodieCompactionOperation> {
+    implements Comparator<HoodieCompactionOperation> {
  private static final String TOTAL_LOG_FILE_SIZE = "TOTAL_LOG_FILE_SIZE";
@@ -47,9 +47,8 @@ public class LogFileSizeBasedCompactionStrategy extends BoundedIOCompactionStrat
    Map<String, Double> metrics = super.captureMetrics(config, dataFile, partitionPath, logFiles);
    // Total size of all the log files
-    Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize)
+    Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(size -> size >= 0)
-        .filter(size -> size >= 0).reduce((size1, size2) -> size1 + size2)
+        .reduce((size1, size2) -> size1 + size2).orElse(0L);
        .orElse(0L);
    // save the metrics needed during the order
    metrics.put(TOTAL_LOG_FILE_SIZE, totalLogFileSize.doubleValue());
    return metrics;
@@ -59,9 +58,8 @@ public class LogFileSizeBasedCompactionStrategy extends BoundedIOCompactionStrat
  public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
      List<HoodieCompactionOperation> operations, List<HoodieCompactionPlan> pendingCompactionPlans) {
    // Order the operations based on the reverse size of the logs and limit them by the IO
-    return super
+    return super.orderAndFilter(writeConfig, operations.stream().sorted(this).collect(Collectors.toList()),
-        .orderAndFilter(writeConfig,
+        pendingCompactionPlans);
            operations.stream().sorted(this).collect(Collectors.toList()), pendingCompactionPlans);
  }
  @Override
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/UnBoundedCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/UnBoundedCompactionStrategy.java
@@ -24,9 +24,8 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.config.HoodieWriteConfig;
 /**
- * UnBoundedCompactionStrategy will not change ordering or filter any compaction. It is a
+ * UnBoundedCompactionStrategy will not change ordering or filter any compaction. It is a pass-through and will compact
- * pass-through and will compact all the base files which has a log file. This usually means
+ * all the base files which has a log file. This usually means no-intelligence on compaction.
 * no-intelligence on compaction.
 *
 * @see CompactionStrategy
 */
--- a/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/UnBoundedPartitionAwareCompactionStrategy.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/compact/strategy/UnBoundedPartitionAwareCompactionStrategy.java
@@ -27,12 +27,11 @@ import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.config.HoodieWriteConfig;
 /**
- * UnBoundedPartitionAwareCompactionStrategy is a custom UnBounded Strategy.
+ * UnBoundedPartitionAwareCompactionStrategy is a custom UnBounded Strategy. This will filter all the partitions that
- * This will filter all the partitions that are eligible to be compacted by a
+ * are eligible to be compacted by a {@link BoundedPartitionAwareCompactionStrategy} and return the result. This is done
- * {@link BoundedPartitionAwareCompactionStrategy} and return the result.
+ * so that a long running UnBoundedPartitionAwareCompactionStrategy does not step over partitions in a shorter running
- * This is done so that a long running UnBoundedPartitionAwareCompactionStrategy does not step over partitions
+ * BoundedPartitionAwareCompactionStrategy. Essentially, this is an inverse of the partitions chosen in
- * in a shorter running BoundedPartitionAwareCompactionStrategy. Essentially, this is an inverse of the
+ * BoundedPartitionAwareCompactionStrategy
 * partitions chosen in BoundedPartitionAwareCompactionStrategy
 *
 * @see CompactionStrategy
 */
@@ -41,10 +40,10 @@ public class UnBoundedPartitionAwareCompactionStrategy extends CompactionStrateg
  @Override
  public List<HoodieCompactionOperation> orderAndFilter(HoodieWriteConfig config,
      final List<HoodieCompactionOperation> operations, final List<HoodieCompactionPlan> pendingCompactionWorkloads) {
-    BoundedPartitionAwareCompactionStrategy boundedPartitionAwareCompactionStrategy
+    BoundedPartitionAwareCompactionStrategy boundedPartitionAwareCompactionStrategy =
-        = new BoundedPartitionAwareCompactionStrategy();
+        new BoundedPartitionAwareCompactionStrategy();
-    List<HoodieCompactionOperation> operationsToExclude = boundedPartitionAwareCompactionStrategy
+    List<HoodieCompactionOperation> operationsToExclude =
-        .orderAndFilter(config, operations, pendingCompactionWorkloads);
+        boundedPartitionAwareCompactionStrategy.orderAndFilter(config, operations, pendingCompactionWorkloads);
    List<HoodieCompactionOperation> allOperations = new ArrayList<>(operations);
    allOperations.removeAll(operationsToExclude);
    return allOperations;
@@ -52,13 +51,13 @@ public class UnBoundedPartitionAwareCompactionStrategy extends CompactionStrateg
  @Override
  public List<String> filterPartitionPaths(HoodieWriteConfig writeConfig, List<String> partitionPaths) {
-    List<String> allPartitionPaths = partitionPaths.stream().map(partition -> partition.replace("/", "-"))
+    List<String> allPartitionPaths =
-        .sorted(Comparator.reverseOrder()).map(partitionPath -> partitionPath.replace("-", "/"))
+        partitionPaths.stream().map(partition -> partition.replace("/", "-")).sorted(Comparator.reverseOrder())
-        .collect(Collectors.toList());
+            .map(partitionPath -> partitionPath.replace("-", "/")).collect(Collectors.toList());
-    BoundedPartitionAwareCompactionStrategy boundedPartitionAwareCompactionStrategy
+    BoundedPartitionAwareCompactionStrategy boundedPartitionAwareCompactionStrategy =
-        = new BoundedPartitionAwareCompactionStrategy();
+        new BoundedPartitionAwareCompactionStrategy();
-    List<String> partitionsToExclude = boundedPartitionAwareCompactionStrategy.filterPartitionPaths(writeConfig,
+    List<String> partitionsToExclude =
-        partitionPaths);
+        boundedPartitionAwareCompactionStrategy.filterPartitionPaths(writeConfig, partitionPaths);
    allPartitionPaths.removeAll(partitionsToExclude);
    return allPartitionPaths;
  }
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
@@ -32,9 +32,8 @@ public class HoodieParquetConfig {
  private Configuration hadoopConf;
  private double compressionRatio;
-  public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport,
+  public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport, CompressionCodecName compressionCodecName,
-      CompressionCodecName compressionCodecName, int blockSize, int pageSize, long maxFileSize,
+      int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf, double compressionRatio) {
      Configuration hadoopConf, double compressionRatio) {
    this.writeSupport = writeSupport;
    this.compressionCodecName = compressionCodecName;
    this.blockSize = blockSize;
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
@@ -36,11 +36,11 @@ import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.spark.TaskContext;
 /**
- * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file. Provides
+ * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file. Provides a way to check if
- * a way to check if the current file can take more records with the <code>canWrite()</code>
+ * the current file can take more records with the <code>canWrite()</code>
 */
-public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends IndexedRecord> extends
+public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
-    ParquetWriter<IndexedRecord> implements HoodieStorageWriter<R> {
+    extends ParquetWriter<IndexedRecord> implements HoodieStorageWriter<R> {
  private static AtomicLong recordIndex = new AtomicLong(1);
@@ -52,24 +52,22 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
  private final Schema schema;
-  public HoodieParquetWriter(String commitTime, Path file, HoodieParquetConfig parquetConfig,
+  public HoodieParquetWriter(String commitTime, Path file, HoodieParquetConfig parquetConfig, Schema schema)
-      Schema schema) throws IOException {
+      throws IOException {
    super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
-        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
+        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
-        parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(),
+        parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(),
        parquetConfig.getPageSize(), parquetConfig.getPageSize(),
        ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED, ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED,
-        ParquetWriter.DEFAULT_WRITER_VERSION,
+        ParquetWriter.DEFAULT_WRITER_VERSION, registerFileSystem(file, parquetConfig.getHadoopConf()));
        registerFileSystem(file, parquetConfig.getHadoopConf()));
    this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
-    this.fs = (HoodieWrapperFileSystem) this.file
+    this.fs =
-        .getFileSystem(registerFileSystem(file, parquetConfig.getHadoopConf()));
+        (HoodieWrapperFileSystem) this.file.getFileSystem(registerFileSystem(file, parquetConfig.getHadoopConf()));
    // We cannot accurately measure the snappy compressed output file size. We are choosing a
    // conservative 10%
    // TODO - compute this compression ratio dynamically by looking at the bytes written to the
    // stream and the actual file size reported by HDFS
-    this.maxFileSize = parquetConfig.getMaxFileSize() + Math
+    this.maxFileSize = parquetConfig.getMaxFileSize()
-        .round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
+        + Math.round(parquetConfig.getMaxFileSize() * parquetConfig.getCompressionRatio());
    this.writeSupport = parquetConfig.getWriteSupport();
    this.commitTime = commitTime;
    this.schema = schema;
@@ -85,10 +83,10 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
  @Override
  public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
-    String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
+    String seqId =
-        recordIndex.getAndIncrement());
+        HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(), recordIndex.getAndIncrement());
-    HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(),
+    HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord, record.getRecordKey(), record.getPartitionPath(),
-        record.getPartitionPath(), file.getName());
+        file.getName());
    HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
    super.write(avroRecord);
    writeSupport.add(record.getRecordKey());
--- a/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieStorageWriterFactory.java
+++ b/hudi-client/src/main/java/org/apache/hudi/io/storage/HoodieStorageWriterFactory.java
@@ -36,8 +36,8 @@ import org.apache.parquet.avro.AvroSchemaConverter;
 public class HoodieStorageWriterFactory {
  public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
-      String commitTime, Path path, HoodieTable<T> hoodieTable,
+      String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config, Schema schema)
-      HoodieWriteConfig config, Schema schema) throws IOException {
+      throws IOException {
    final String name = path.getName();
    final String extension = FSUtils.isLogFile(path) ? HOODIE_LOG.getFileExtension() : FSUtils.getFileExtension(name);
    if (PARQUET.getFileExtension().equals(extension)) {
@@ -46,19 +46,16 @@ public class HoodieStorageWriterFactory {
    throw new UnsupportedOperationException(extension + " format not supported yet.");
  }
-  private static <T extends HoodieRecordPayload,
+  private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
-      R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(String commitTime, Path path,
+      String commitTime, Path path, HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable)
-      HoodieWriteConfig config, Schema schema, HoodieTable hoodieTable) throws IOException {
+      throws IOException {
-    BloomFilter filter = new BloomFilter(config.getBloomFilterNumEntries(),
+    BloomFilter filter = new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP());
-        config.getBloomFilterFPP());
+    HoodieAvroWriteSupport writeSupport =
-    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
+        new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
        new AvroSchemaConverter().convert(schema), schema, filter);
-    HoodieParquetConfig parquetConfig =
+    HoodieParquetConfig parquetConfig = new HoodieParquetConfig(writeSupport, config.getParquetCompressionCodec(),
-        new HoodieParquetConfig(writeSupport, config.getParquetCompressionCodec(),
+        config.getParquetBlockSize(), config.getParquetPageSize(), config.getParquetMaxFileSize(),
-            config.getParquetBlockSize(), config.getParquetPageSize(),
+        hoodieTable.getHadoopConf(), config.getParquetCompressionRatio());
            config.getParquetMaxFileSize(), hoodieTable.getHadoopConf(),
            config.getParquetCompressionRatio());
    return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
  }
--- a/hudi-client/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java
+++ b/hudi-client/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java
@@ -118,8 +118,8 @@ public class HoodieMetrics {
    return indexTimer == null ? null : indexTimer.time();
  }
-  public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs,
+  public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs, HoodieCommitMetadata metadata,
-      HoodieCommitMetadata metadata, String actionType) {
+      String actionType) {
    if (config.isMetricsOn()) {
      long totalPartitionsWritten = metadata.fetchTotalPartitionsWritten();
      long totalFilesInsert = metadata.fetchTotalFilesInsert();
@@ -154,9 +154,8 @@ public class HoodieMetrics {
  public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
    if (config.isMetricsOn()) {
-      logger.info(String
+      logger.info(
-          .format("Sending rollback metrics (duration=%d, numFilesDeleted=%d)", durationInMs,
+          String.format("Sending rollback metrics (duration=%d, numFilesDeleted=%d)", durationInMs, numFilesDeleted));
              numFilesDeleted));
      Metrics.registerGauge(getMetricsName("rollback", "duration"), durationInMs);
      Metrics.registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
    }
@@ -164,9 +163,8 @@ public class HoodieMetrics {
  public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
    if (config.isMetricsOn()) {
-      logger.info(String
+      logger.info(
-          .format("Sending clean metrics (duration=%d, numFilesDeleted=%d)", durationInMs,
+          String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)", durationInMs, numFilesDeleted));
              numFilesDeleted));
      Metrics.registerGauge(getMetricsName("clean", "duration"), durationInMs);
      Metrics.registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
    }
@@ -174,9 +172,8 @@ public class HoodieMetrics {
  public void updateFinalizeWriteMetrics(long durationInMs, long numFilesFinalized) {
    if (config.isMetricsOn()) {
-      logger.info(String
+      logger.info(String.format("Sending finalize write metrics (duration=%d, numFilesFinalized=%d)", durationInMs,
-          .format("Sending finalize write metrics (duration=%d, numFilesFinalized=%d)",
+          numFilesFinalized));
              durationInMs, numFilesFinalized));
      Metrics.registerGauge(getMetricsName("finalize", "duration"), durationInMs);
      Metrics.registerGauge(getMetricsName("finalize", "numFilesFinalized"), numFilesFinalized);
    }
@@ -184,10 +181,8 @@ public class HoodieMetrics {
  public void updateIndexMetrics(final String action, final long durationInMs) {
    if (config.isMetricsOn()) {
-      logger.info(String
+      logger.info(String.format("Sending index metrics (%s.duration, %d)", action, durationInMs));
-          .format("Sending index metrics (%s.duration, %d)",action, durationInMs));
+      Metrics.registerGauge(getMetricsName("index", String.format("%s.duration", action)), durationInMs);
      Metrics.registerGauge(getMetricsName("index", String.format("%s.duration", action)),
          durationInMs);
    }
  }
--- a/hudi-client/src/main/java/org/apache/hudi/metrics/InMemoryMetricsReporter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/metrics/InMemoryMetricsReporter.java
@@ -26,12 +26,10 @@ import java.io.Closeable;
 public class InMemoryMetricsReporter extends MetricsReporter {
  @Override
-  public void start() {
+  public void start() {}
  }
  @Override
-  public void report() {
+  public void report() {}
  }
  @Override
  public Closeable getReporter() {
--- a/hudi-client/src/main/java/org/apache/hudi/metrics/MetricsGraphiteReporter.java
+++ b/hudi-client/src/main/java/org/apache/hudi/metrics/MetricsGraphiteReporter.java
@@ -30,8 +30,7 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 /**
- * Implementation of Graphite reporter, which connects to the Graphite server, and send metrics to
+ * Implementation of Graphite reporter, which connects to the Graphite server, and send metrics to that server.
 * that server.
 */
 public class MetricsGraphiteReporter extends MetricsReporter {
@@ -50,8 +49,7 @@ public class MetricsGraphiteReporter extends MetricsReporter {
    this.serverHost = config.getGraphiteServerHost();
    this.serverPort = config.getGraphiteServerPort();
    if (serverHost == null || serverPort == 0) {
-      throw new RuntimeException(String
+      throw new RuntimeException(String.format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
          .format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
          serverHost, serverPort));
    }
@@ -84,8 +82,7 @@ public class MetricsGraphiteReporter extends MetricsReporter {
  private GraphiteReporter createGraphiteReport() {
    Graphite graphite = new Graphite(new InetSocketAddress(serverHost, serverPort));
    String reporterPrefix = config.getGraphiteMetricPrefix();
-    return GraphiteReporter.forRegistry(registry).prefixedWith(reporterPrefix)
+    return GraphiteReporter.forRegistry(registry).prefixedWith(reporterPrefix).convertRatesTo(TimeUnit.SECONDS)
-        .convertRatesTo(TimeUnit.SECONDS).convertDurationsTo(TimeUnit.MILLISECONDS)
+        .convertDurationsTo(TimeUnit.MILLISECONDS).filter(MetricFilter.ALL).build(graphite);
        .filter(MetricFilter.ALL).build(graphite);
  }
 }
--- a/hudi-client/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java
+++ b/hudi-client/src/main/java/org/apache/hudi/metrics/MetricsReporterType.java
@@ -19,8 +19,7 @@
 package org.apache.hudi.metrics;
 /**
- * Types of the reporter. Right now we only support Graphite. We can include JMX and CSV in the
+ * Types of the reporter. Right now we only support Graphite. We can include JMX and CSV in the future.
 * future.
 */
 public enum MetricsReporterType {
  GRAPHITE, INMEMORY
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java
@@ -82,8 +82,7 @@ import scala.Tuple2;
 /**
 * Implementation of a very heavily read-optimized Hoodie Table where
 * <p>
- * INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing
+ * INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing file, to expand it
 * file, to expand it
 * <p>
 * UPDATES - Produce a new version of the file, just replacing the updated records with new values
 */
@@ -95,11 +94,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    super(config, jsc);
  }
-  private static PairFlatMapFunction<Iterator<Tuple2<String, String>>, String,
+  private static PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat> deleteFilesFunc(
      PartitionCleanStat> deleteFilesFunc(
      HoodieTable table) {
-    return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>)
+    return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>) iter -> {
        iter -> {
      Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();
      FileSystem fs = table.getMetaClient().getFs();
@@ -116,8 +113,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
        partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
      }
-          return partitionCleanStatMap.entrySet().stream()
+      return partitionCleanStatMap.entrySet().stream().map(e -> new Tuple2<>(e.getKey(), e.getValue()))
              .map(e -> new Tuple2<>(e.getKey(), e.getValue()))
          .collect(Collectors.toList()).iterator();
    };
  }
@@ -131,8 +127,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    };
  }
-  private static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr)
+  private static Boolean deleteFileAndGetResult(FileSystem fs, String deletePathStr) throws IOException {
      throws IOException {
    Path deletePath = new Path(deletePathStr);
    logger.debug("Working on delete path :" + deletePath);
    boolean deleteResult = fs.delete(deletePath, false);
@@ -171,8 +166,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    throw new HoodieNotSupportedException("Compaction is not supported from a CopyOnWrite table");
  }
-  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
+  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr)
-      Iterator<HoodieRecord<T>> recordItr) throws IOException {
+      throws IOException {
    // This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
    if (!recordItr.hasNext()) {
      logger.info("Empty partition with fileId => " + fileId);
@@ -190,17 +185,16 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    return handleUpdateInternal(upsertHandle, commitTime, fileId);
  }
-  protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle,
+  protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime,
-      String commitTime, String fileId)
+      String fileId) throws IOException {
      throws IOException {
    if (upsertHandle.getOldFilePath() == null) {
      throw new HoodieUpsertException(
          "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId);
    } else {
      AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getWriterSchema());
      BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
-      try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath())
+      try (ParquetReader<IndexedRecord> reader =
-          .withConf(getHadoopConf()).build()) {
+          AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath()).withConf(getHadoopConf()).build()) {
        wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
            new UpdateHandler(upsertHandle), x -> x);
        wrapper.execute();
@@ -216,15 +210,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    // TODO(vc): This needs to be revisited
    if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
-      logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath()
+      logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", "
-          + ", " + upsertHandle.getWriteStatus());
+          + upsertHandle.getWriteStatus());
    }
-    return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus()))
+    return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
        .iterator();
  }
-  protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId,
+  protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) {
      Iterator<HoodieRecord<T>> recordItr) {
    return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId);
  }
@@ -233,8 +225,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged);
  }
-  public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx,
+  public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
-      Iterator<HoodieRecord<T>> recordItr) throws Exception {
+      throws Exception {
    // This is needed since sometimes some buckets are never picked in getPartition() and end up with 0 records
    if (!recordItr.hasNext()) {
      logger.info("Empty partition");
@@ -245,16 +237,16 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId,
      Iterator<HoodieRecord<T>> recordItr) {
-    HoodieCreateHandle createHandle = new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId,
+    HoodieCreateHandle createHandle =
-        recordItr);
+        new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId, recordItr);
    createHandle.write();
    return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator();
  }
  @SuppressWarnings("unchecked")
  @Override
-  public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
+  public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition, Iterator recordItr,
-      Iterator recordItr, Partitioner partitioner) {
+      Partitioner partitioner) {
    UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
    BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
    BucketType btype = binfo.bucketType;
@@ -264,8 +256,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
      } else if (btype.equals(BucketType.UPDATE)) {
        return handleUpdate(commitTime, binfo.fileIdPrefix, recordItr);
      } else {
-        throw new HoodieUpsertException(
+        throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition);
            "Unknown bucketType " + btype + " for partition :" + partition);
      }
    } catch (Throwable t) {
      String msg = "Error upserting bucketType " + btype + " for partition :" + partition;
@@ -275,15 +266,14 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  }
  @Override
-  public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
+  public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition, Iterator recordItr,
-      Iterator recordItr, Partitioner partitioner) {
+      Partitioner partitioner) {
    return handleUpsertPartition(commitTime, partition, recordItr, partitioner);
  }
  /**
-   * Performs cleaning of partition paths according to cleaning policy and returns the number of
+   * Performs cleaning of partition paths according to cleaning policy and returns the number of files cleaned. Handles
-   * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of
+   * skews in partitions to clean by making files to clean as the unit of task distribution.
   * task distribution.
   *
   * @throws IllegalArgumentException if unknown cleaning policy is provided
   */
@@ -291,11 +281,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  public List<HoodieCleanStat> clean(JavaSparkContext jsc) {
    try {
      FileSystem fs = getMetaClient().getFs();
-      List<String> partitionsToClean = FSUtils
+      List<String> partitionsToClean =
-          .getAllPartitionPaths(fs, getMetaClient().getBasePath(),
+          FSUtils.getAllPartitionPaths(fs, getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning());
-              config.shouldAssumeDatePartitioning());
+      logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config.getCleanerPolicy());
      logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config
          .getCleanerPolicy());
      if (partitionsToClean.isEmpty()) {
        logger.info("Nothing to clean here mom. It is already clean");
        return Collections.emptyList();
@@ -307,12 +295,10 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  }
  /**
-   * Common method used for cleaning out parquet files under a partition path during rollback of a
+   * Common method used for cleaning out parquet files under a partition path during rollback of a set of commits
   * set of commits
   */
  protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String partitionPath,
-      PathFilter filter)
+      PathFilter filter) throws IOException {
      throws IOException {
    logger.info("Cleaning path " + partitionPath);
    FileSystem fs = getMetaClient().getFs();
    FileStatus[] toBeDeleted = fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(), partitionPath), filter);
@@ -325,12 +311,10 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  }
  /**
-   * Common method used for cleaning out parquet files under a partition path during rollback of a
+   * Common method used for cleaning out parquet files under a partition path during rollback of a set of commits
   * set of commits
   */
-  protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String commit, String
+  protected Map<FileStatus, Boolean> deleteCleanedFiles(Map<FileStatus, Boolean> results, String commit,
-      partitionPath)
+      String partitionPath) throws IOException {
      throws IOException {
    logger.info("Cleaning path " + partitionPath);
    FileSystem fs = getMetaClient().getFs();
    PathFilter filter = (path) -> {
@@ -354,8 +338,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
      throws IOException {
    String actionType = metaClient.getCommitActionType();
    HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
-    List<String> inflights = this.getInflightCommitTimeline().getInstants()
+    List<String> inflights =
-        .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+        this.getInflightCommitTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
    // Atomically unpublish the commits
    if (!inflights.contains(commit)) {
      activeTimeline.revertToInflight(new HoodieInstant(false, actionType, commit));
@@ -364,10 +348,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    // delete all the data files for this commit
    logger.info("Clean out all parquet files generated for commit: " + commit);
-    List<HoodieRollbackStat> stats = jsc.parallelize(FSUtils
+    List<HoodieRollbackStat> stats =
-        .getAllPartitionPaths(metaClient.getFs(), getMetaClient().getBasePath(),
+        jsc.parallelize(FSUtils.getAllPartitionPaths(metaClient.getFs(), getMetaClient().getBasePath(),
-            config.shouldAssumeDatePartitioning()))
+            config.shouldAssumeDatePartitioning())).map((Function<String, HoodieRollbackStat>) partitionPath -> {
        .map((Function<String, HoodieRollbackStat>) partitionPath -> {
              // Scan all partitions files with this commit time
              final Map<FileStatus, Boolean> filesToDeletedStatus = new HashMap<>();
              deleteCleanedFiles(filesToDeletedStatus, commit, partitionPath);
@@ -376,13 +359,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
            }).collect();
    // Delete Inflight instant if enabled
-    deleteInflightInstant(deleteInstants, activeTimeline,
+    deleteInflightInstant(deleteInstants, activeTimeline, new HoodieInstant(true, actionType, commit));
        new HoodieInstant(true, actionType, commit));
    return stats;
  }
  /**
   * Delete Inflight instant if enabled
   * 
   * @param deleteInstant Enable Deletion of Inflight instant
   * @param activeTimeline Hoodie active timeline
   * @param instantToBeDeleted Instant to be deleted
@@ -401,30 +384,27 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    }
  }
-  private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
+  private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean, JavaSparkContext jsc) {
      JavaSparkContext jsc) {
    int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
    logger.info("Using cleanerParallelism: " + cleanerParallelism);
    List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
-        .parallelize(partitionsToClean, cleanerParallelism)
+        .parallelize(partitionsToClean, cleanerParallelism).flatMapToPair(getFilesToDeleteFunc(this, config))
        .flatMapToPair(getFilesToDeleteFunc(this, config))
        .repartition(cleanerParallelism) // repartition to remove skews
        .mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey(
            // merge partition level clean stats below
-            (Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
+            (Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1.merge(e2))
-                .merge(e2)).collect();
+        .collect();
-    Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats.stream()
+    Map<String, PartitionCleanStat> partitionCleanStatsMap =
-        .collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
+        partitionCleanStats.stream().collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
    HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
    // Return PartitionCleanStat for each partition passed.
    return partitionsToClean.stream().map(partitionPath -> {
      PartitionCleanStat partitionCleanStat =
-          (partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap
+          (partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap.get(partitionPath)
-              .get(partitionPath) : new PartitionCleanStat(partitionPath);
+              : new PartitionCleanStat(partitionPath);
-      return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy())
+      return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath)
          .withPartitionPath(partitionPath)
          .withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
          .withDeletePathPattern(partitionCleanStat.deletePathPatterns)
          .withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
@@ -453,8 +433,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    }
    @Override
-    protected void finish() {
+    protected void finish() {}
    }
    @Override
    protected Void getResult() {
@@ -487,8 +466,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    private PartitionCleanStat merge(PartitionCleanStat other) {
      if (!this.partitionPath.equals(other.partitionPath)) {
-        throw new RuntimeException(String
+        throw new RuntimeException(
-            .format("partitionPath is not a match: (%s, %s)", partitionPath, other.partitionPath));
+            String.format("partitionPath is not a match: (%s, %s)", partitionPath, other.partitionPath));
      }
      successDeleteFiles.addAll(other.successDeleteFiles);
      deletePathPatterns.addAll(other.deletePathPatterns);
@@ -516,8 +495,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  }
  /**
-   * Helper class for an insert bucket along with the weight [0.0, 0.1] that defines the amount of
+   * Helper class for an insert bucket along with the weight [0.0, 0.1] that defines the amount of incoming inserts that
-   * incoming inserts that should be allocated to the bucket
+   * should be allocated to the bucket
   */
  class InsertBucket implements Serializable {
@@ -563,8 +542,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
     */
    List<SmallFile> smallFiles = new ArrayList<SmallFile>();
    /**
-     * Total number of RDD partitions, is determined by total buckets we want to pack the incoming
+     * Total number of RDD partitions, is determined by total buckets we want to pack the incoming workload into
     * workload into
     */
    private int totalBuckets = 0;
    /**
@@ -599,8 +577,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
      assignUpdates(profile);
      assignInserts(profile);
-      logger.info(
+      logger.info("Total Buckets :" + totalBuckets + ", " + "buckets info => " + bucketInfoMap + ", \n"
          "Total Buckets :" + totalBuckets + ", " + "buckets info => " + bucketInfoMap + ", \n"
          + "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n"
          + "UpdateLocations mapped to buckets =>" + updateLocationToBucket);
    }
@@ -608,8 +585,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    private void assignUpdates(WorkloadProfile profile) {
      // each update location gets a partition
      WorkloadStat gStat = profile.getGlobalStat();
-      for (Map.Entry<String, Pair<String, Long>> updateLocEntry : gStat.getUpdateLocationToCount()
+      for (Map.Entry<String, Pair<String, Long>> updateLocEntry : gStat.getUpdateLocationToCount().entrySet()) {
          .entrySet()) {
        addUpdateBucket(updateLocEntry.getKey());
      }
    }
@@ -628,8 +604,9 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    private void assignInserts(WorkloadProfile profile) {
      // for new inserts, compute buckets depending on how many records we have for each partition
      Set<String> partitionPaths = profile.getPartitionPaths();
-      long averageRecordSize = averageBytesPerRecord(metaClient.getActiveTimeline().getCommitTimeline()
+      long averageRecordSize =
-              .filterCompletedInstants(), config.getCopyOnWriteRecordSizeEstimate());
+          averageBytesPerRecord(metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(),
              config.getCopyOnWriteRecordSizeEstimate());
      logger.info("AvgRecordSize => " + averageRecordSize);
      for (String partitionPath : partitionPaths) {
        WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
@@ -644,20 +621,17 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
          // first try packing this into one of the smallFiles
          for (SmallFile smallFile : smallFiles) {
-            long recordsToAppend = Math
+            long recordsToAppend = Math.min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
                .min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
                totalUnassignedInserts);
            if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
              // create a new bucket or re-use an existing bucket
              int bucket;
              if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
                bucket = updateLocationToBucket.get(smallFile.location.getFileId());
-                logger.info("Assigning " + recordsToAppend + " inserts to existing update bucket "
+                logger.info("Assigning " + recordsToAppend + " inserts to existing update bucket " + bucket);
                    + bucket);
              } else {
                bucket = addUpdateBucket(smallFile.location.getFileId());
-                logger.info(
+                logger.info("Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
                    "Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
              }
              bucketNumbers.add(bucket);
              recordsPerBucket.add(recordsToAppend);
@@ -673,10 +647,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
            }
            int insertBuckets = (int) Math.ceil((1.0 * totalUnassignedInserts) / insertRecordsPerBucket);
-            logger.info(
+            logger.info("After small file assignment: unassignedInserts => " + totalUnassignedInserts
-                "After small file assignment: unassignedInserts => " + totalUnassignedInserts
+                + ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => " + insertRecordsPerBucket);
                    + ", totalInsertBuckets => " + insertBuckets + ", recordsPerBucket => "
                    + insertRecordsPerBucket);
            for (int b = 0; b < insertBuckets; b++) {
              bucketNumbers.add(totalBuckets);
              recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
@@ -696,8 +668,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
            bkt.weight = (1.0 * recordsPerBucket.get(i)) / pStat.getNumInserts();
            insertBuckets.add(bkt);
          }
-          logger.info(
+          logger.info("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
              "Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
          partitionPathToInsertBuckets.put(partitionPath, insertBuckets);
        }
      }
@@ -716,15 +687,13 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
      if (!commitTimeline.empty()) { // if we have some commits
        HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
        List<HoodieDataFile> allFiles = getROFileSystemView()
-            .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp())
+            .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp()).collect(Collectors.toList());
            .collect(Collectors.toList());
        for (HoodieDataFile file : allFiles) {
          if (file.getFileSize() < config.getParquetSmallFileLimit()) {
            String filename = file.getFileName();
            SmallFile sf = new SmallFile();
-            sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename),
+            sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename), FSUtils.getFileId(filename));
                FSUtils.getFileId(filename));
            sf.sizeBytes = file.getFileSize();
            smallFileLocations.add(sf);
            // Update the global small files list
@@ -751,19 +720,18 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    @Override
    public int getPartition(Object key) {
-      Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation = (Tuple2<HoodieKey,
+      Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation =
-          Option<HoodieRecordLocation>>) key;
+          (Tuple2<HoodieKey, Option<HoodieRecordLocation>>) key;
      if (keyLocation._2().isPresent()) {
        HoodieRecordLocation location = keyLocation._2().get();
        return updateLocationToBucket.get(location.getFileId());
      } else {
-        List<InsertBucket> targetBuckets = partitionPathToInsertBuckets
+        List<InsertBucket> targetBuckets = partitionPathToInsertBuckets.get(keyLocation._1().getPartitionPath());
            .get(keyLocation._1().getPartitionPath());
        // pick the target bucket to use based on the weights.
        double totalWeight = 0.0;
        final long totalInserts = Math.max(1, globalStat.getNumInserts());
-        final long hashOfKey = Hashing.md5()
+        final long hashOfKey =
-            .hashString(keyLocation._1().getRecordKey(), StandardCharsets.UTF_8).asLong();
+            Hashing.md5().hashString(keyLocation._1().getRecordKey(), StandardCharsets.UTF_8).asLong();
        final double r = 1.0 * Math.floorMod(hashOfKey, totalInserts) / totalInserts;
        for (InsertBucket insertBucket : targetBuckets) {
          totalWeight += insertBucket.weight;
@@ -782,8 +750,8 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
  }
  /**
-   * Obtains the average record size based on records written during previous commits. Used for
+   * Obtains the average record size based on records written during previous commits. Used for estimating how many
-   * estimating how many records pack into one file.
+   * records pack into one file.
   */
  protected static long averageBytesPerRecord(HoodieTimeline commitTimeline, int defaultRecordSizeEstimate) {
    long avgSize = defaultRecordSizeEstimate;
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java
@@ -73,15 +73,21 @@ import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
 /**
- * Implementation of a more real-time read-optimized Hoodie Table where <p> INSERTS - Same as
+ * Implementation of a more real-time read-optimized Hoodie Table where
- * HoodieCopyOnWriteTable - Produce new files, block aligned to desired size (or) Merge with the
+ * <p>
- * smallest existing file, to expand it </p> <p> UPDATES - Appends the changes to a rolling log file
+ * INSERTS - Same as HoodieCopyOnWriteTable - Produce new files, block aligned to desired size (or) Merge with the
- * maintained per file Id. Compaction merges the log file into the base file. </p> <p> WARNING - MOR
+ * smallest existing file, to expand it
- * table type does not support nested rollbacks, every rollback must be followed by an attempted
+ * </p>
- * commit action </p>
+ * <p>
 * UPDATES - Appends the changes to a rolling log file maintained per file Id. Compaction merges the log file into the
 * base file.
 * </p>
 * <p>
 * WARNING - MOR table type does not support nested rollbacks, every rollback must be followed by an attempted commit
 * action
 * </p>
 */
-public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
+public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieCopyOnWriteTable<T> {
    HoodieCopyOnWriteTable<T> {
  private static Logger logger = LogManager.getLogger(HoodieMergeOnReadTable.class);
@@ -102,27 +108,24 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
  }
  @Override
-  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
+  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr)
-      Iterator<HoodieRecord<T>> recordItr) throws IOException {
+      throws IOException {
    logger.info("Merging updates for commit " + commitTime + " for file " + fileId);
    if (!index.canIndexLogFiles() && mergeOnReadUpsertPartitioner.getSmallFileIds().contains(fileId)) {
-      logger.info(
+      logger.info("Small file corrections for updates for commit " + commitTime + " for file " + fileId);
          "Small file corrections for updates for commit " + commitTime + " for file " + fileId);
      return super.handleUpdate(commitTime, fileId, recordItr);
    } else {
-      HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, commitTime, this,
+      HoodieAppendHandle<T> appendHandle = new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
          fileId, recordItr);
      appendHandle.doAppend();
      appendHandle.close();
-      return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus()))
+      return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus())).iterator();
          .iterator();
    }
  }
  @Override
-  public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx,
+  public Iterator<List<WriteStatus>> handleInsert(String commitTime, String idPfx, Iterator<HoodieRecord<T>> recordItr)
-      Iterator<HoodieRecord<T>> recordItr) throws Exception {
+      throws Exception {
    // If canIndexLogFiles, write inserts to log files else write inserts to parquet files
    if (index.canIndexLogFiles()) {
      return new MergeOnReadLazyInsertIterable<>(recordItr, config, commitTime, this, idPfx);
@@ -134,8 +137,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
  @Override
  public HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime) {
    logger.info("Checking if compaction needs to be run on " + config.getBasePath());
-    Option<HoodieInstant> lastCompaction = getActiveTimeline().getCommitTimeline()
+    Option<HoodieInstant> lastCompaction =
-        .filterCompletedInstants().lastInstant();
+        getActiveTimeline().getCommitTimeline().filterCompletedInstants().lastInstant();
    String deltaCommitsSinceTs = "0";
    if (lastCompaction.isPresent()) {
      deltaCommitsSinceTs = lastCompaction.get().getTimestamp();
@@ -145,8 +148,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
        .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
    if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
      logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction
-          + " delta commits was found since last compaction " + deltaCommitsSinceTs
+          + " delta commits was found since last compaction " + deltaCommitsSinceTs + ". Waiting for "
-          + ". Waiting for " + config.getInlineCompactDeltaCommitMax());
+          + config.getInlineCompactDeltaCommitMax());
      return new HoodieCompactionPlan();
    }
@@ -186,9 +189,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    // Atomically un-publish all non-inflight commits
    Option<HoodieInstant> commitOrCompactionOption = Option.fromJavaOptional(this.getActiveTimeline()
        .getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
-            HoodieActiveTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION)).getInstants()
+            HoodieActiveTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION))
-        .filter(i -> commit.equals(i.getTimestamp()))
+        .getInstants().filter(i -> commit.equals(i.getTimestamp())).findFirst());
        .findFirst());
    HoodieInstant instantToRollback = commitOrCompactionOption.get();
    // Atomically un-publish all non-inflight commits
    if (!instantToRollback.isInflight()) {
@@ -196,10 +198,9 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    }
    logger.info("Unpublished " + commit);
    Long startTime = System.currentTimeMillis();
-    List<HoodieRollbackStat> allRollbackStats = jsc.parallelize(FSUtils
+    List<HoodieRollbackStat> allRollbackStats =
-        .getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
+        jsc.parallelize(FSUtils.getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
-            config.shouldAssumeDatePartitioning()))
+            config.shouldAssumeDatePartitioning())).map((Function<String, HoodieRollbackStat>) partitionPath -> {
        .map((Function<String, HoodieRollbackStat>) partitionPath -> {
              HoodieActiveTimeline activeTimeline = this.getActiveTimeline().reload();
              HoodieRollbackStat hoodieRollbackStats = null;
              // Need to put the path filter here since Filter is not serializable
@@ -222,10 +223,11 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                case HoodieTimeline.COMMIT_ACTION:
                  try {
                    // Rollback of a commit should delete the newly created parquet files along with any log
-                // files created with this as baseCommit. This is required to support multi-rollbacks in a MOR table.
+                    // files created with this as baseCommit. This is required to support multi-rollbacks in a MOR
                    // table.
                    super.deleteCleanedFiles(filesToDeletedStatus, partitionPath, filter);
-                hoodieRollbackStats = HoodieRollbackStat.newBuilder()
+                    hoodieRollbackStats = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                    .withPartitionPath(partitionPath).withDeletedFileResults(filesToDeletedStatus).build();
+                        .withDeletedFileResults(filesToDeletedStatus).build();
                    break;
                  } catch (IOException io) {
                    throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
@@ -233,25 +235,28 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                case HoodieTimeline.COMPACTION_ACTION:
                  try {
                    // If there is no delta commit present after the current commit (if compaction), no action, else we
-                // need to make sure that a compaction commit rollback also deletes any log files written as part of the
+                    // need to make sure that a compaction commit rollback also deletes any log files written as part of
                    // the
                    // succeeding deltacommit.
-                boolean higherDeltaCommits = !activeTimeline.getDeltaCommitTimeline()
+                    boolean higherDeltaCommits = !activeTimeline.getDeltaCommitTimeline().filterCompletedInstants()
-                    .filterCompletedInstants().findInstantsAfter(commit, 1).empty();
+                        .findInstantsAfter(commit, 1).empty();
                    if (higherDeltaCommits) {
-                  // Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
+                      // Rollback of a compaction action with no higher deltacommit means that the compaction is
-                  // and has not yet finished. In this scenario we should delete only the newly created parquet files
+                      // scheduled
                      // and has not yet finished. In this scenario we should delete only the newly created parquet
                      // files
                      // and not corresponding base commit log files created with this as baseCommit since updates would
                      // have been written to the log files.
                      super.deleteCleanedFiles(filesToDeletedStatus, commit, partitionPath);
-                  hoodieRollbackStats = HoodieRollbackStat.newBuilder()
+                      hoodieRollbackStats = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                      .withPartitionPath(partitionPath).withDeletedFileResults(filesToDeletedStatus).build();
+                          .withDeletedFileResults(filesToDeletedStatus).build();
                    } else {
                      // No deltacommits present after this compaction commit (inflight or requested). In this case, we
                      // can also delete any log files that were created with this compaction commit as base
                      // commit.
                      super.deleteCleanedFiles(filesToDeletedStatus, partitionPath, filter);
-                  hoodieRollbackStats = HoodieRollbackStat.newBuilder()
+                      hoodieRollbackStats = HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                      .withPartitionPath(partitionPath).withDeletedFileResults(filesToDeletedStatus).build();
+                          .withDeletedFileResults(filesToDeletedStatus).build();
                    }
                    break;
                  } catch (IOException io) {
@@ -261,12 +266,14 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                  // --------------------------------------------------------------------------------------------------
                  // (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
                  // --------------------------------------------------------------------------------------------------
-              // (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
+                  // (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries.
                  // In
                  // this scenario we would want to delete these log files.
                  // (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
                  // HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
                  // (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
-              // being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
+                  // being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime
                  // and
                  // and hence will end up deleting these log files. This is done so there are no orphan log files
                  // lying around.
                  // (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
@@ -274,7 +281,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                  // ---------------------------------------------------------------------------------------------------
                  // (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
                  // ---------------------------------------------------------------------------------------------------
-              // (B.1) Failed first commit - Inserts were written to parquet files and HoodieWriteStat has no entries.
+                  // (B.1) Failed first commit - Inserts were written to parquet files and HoodieWriteStat has no
                  // entries.
                  // In this scenario, we delete all the parquet files written for the failed commit.
                  // (B.2) Failed recurring commits - Inserts were written to parquet files and updates to log files. In
                  // this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
@@ -282,10 +290,11 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                  // (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
                  // as well if the base parquet file gets deleted.
                  try {
-                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+                    HoodieCommitMetadata commitMetadata =
-                    metaClient.getCommitTimeline().getInstantDetails(
+                        HoodieCommitMetadata.fromBytes(
-                        new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()))
+                            metaClient.getCommitTimeline().getInstantDetails(new HoodieInstant(true,
-                        .get(), HoodieCommitMetadata.class);
+                                instantToRollback.getAction(), instantToRollback.getTimestamp())).get(),
                            HoodieCommitMetadata.class);
                    // read commit file and (either append delete blocks or delete file)
                    Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
@@ -294,8 +303,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
                    // We do not know fileIds for inserts (first inserts are either log files or parquet files),
                    // delete all files for the corresponding failed commit, if present (same as COW)
                    super.deleteCleanedFiles(filesToDeletedStatus, partitionPath, filter);
-                final Set<String> deletedFiles = filesToDeletedStatus.entrySet().stream()
+                    final Set<String> deletedFiles = filesToDeletedStatus.entrySet().stream().map(entry -> {
                    .map(entry -> {
                      Path filePath = entry.getKey().getPath();
                      return FSUtils.getFileIdFromFilePath(filePath);
                    }).collect(Collectors.toSet());
@@ -316,8 +324,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
            }).filter(Objects::nonNull).collect();
    // Delete Inflight instants if enabled
-    deleteInflightInstant(deleteInstants, this.getActiveTimeline(), new HoodieInstant(true, instantToRollback
+    deleteInflightInstant(deleteInstants, this.getActiveTimeline(),
-        .getAction(), instantToRollback.getTimestamp()));
+        new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()));
    logger.debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
@@ -332,8 +340,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
  }
  /**
-   * UpsertPartitioner for MergeOnRead table type, this allows auto correction of small parquet
+   * UpsertPartitioner for MergeOnRead table type, this allows auto correction of small parquet files to larger ones
-   * files to larger ones without the need for an index in the logFile.
+   * without the need for an index in the logFile.
   */
  class MergeOnReadUpsertPartitioner extends HoodieCopyOnWriteTable.UpsertPartitioner {
@@ -361,20 +369,22 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
          // TODO : choose last N small files since there can be multiple small files written to a single partition
          // by different spark partitions in a single batch
          Option<FileSlice> smallFileSlice = Option.fromJavaOptional(getRTFileSystemView()
-              .getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), false).filter(
+              .getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), false)
-                  fileSlice -> fileSlice.getLogFiles().count() < 1
+              .filter(fileSlice -> fileSlice.getLogFiles().count() < 1
-                      && fileSlice.getDataFile().get().getFileSize() < config
+                  && fileSlice.getDataFile().get().getFileSize() < config.getParquetSmallFileLimit())
-                      .getParquetSmallFileLimit()).sorted((FileSlice left, FileSlice right) ->
+              .sorted((FileSlice left,
-                  left.getDataFile().get().getFileSize() < right.getDataFile().get().getFileSize()
+                  FileSlice right) -> left.getDataFile().get().getFileSize() < right.getDataFile().get().getFileSize()
-                      ? -1 : 1).findFirst());
+                      ? -1
                      : 1)
              .findFirst());
          if (smallFileSlice.isPresent()) {
            allSmallFileSlices.add(smallFileSlice.get());
          }
        } else {
          // If we can index log files, we can add more inserts to log files for fileIds including those under
          // pending compaction.
-          List<FileSlice> allFileSlices = getRTFileSystemView()
+          List<FileSlice> allFileSlices =
-              .getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
+              getRTFileSystemView().getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
                  .collect(Collectors.toList());
          for (FileSlice fileSlice : allFileSlices) {
            if (isSmallFile(partitionPath, fileSlice)) {
@@ -408,8 +418,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    }
    public List<String> getSmallFileIds() {
-      return (List<String>) smallFiles.stream()
+      return (List<String>) smallFiles.stream().map(smallFile -> ((SmallFile) smallFile).location.getFileId())
          .map(smallFile -> ((SmallFile) smallFile).location.getFileId())
          .collect(Collectors.toList());
    }
@@ -417,8 +426,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
      if (!fileSlice.getDataFile().isPresent()) {
        return convertLogFilesSizeToExpectedParquetSize(fileSlice.getLogFiles().collect(Collectors.toList()));
      } else {
-        return fileSlice.getDataFile().get().getFileSize() + convertLogFilesSizeToExpectedParquetSize(fileSlice
+        return fileSlice.getDataFile().get().getFileSize()
-            .getLogFiles().collect(Collectors.toList()));
+            + convertLogFilesSizeToExpectedParquetSize(fileSlice.getLogFiles().collect(Collectors.toList()));
      }
    }
@@ -431,13 +440,12 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    @VisibleForTesting
    public long convertLogFilesSizeToExpectedParquetSize(List<HoodieLogFile> hoodieLogFiles) {
      long totalSizeOfLogFiles = hoodieLogFiles.stream().map(hoodieLogFile -> hoodieLogFile.getFileSize())
-          .filter(size -> size > 0)
+          .filter(size -> size > 0).reduce((a, b) -> (a + b)).orElse(0L);
          .reduce((a, b) -> (a + b)).orElse(0L);
      // Here we assume that if there is no base parquet file, all log files contain only inserts.
      // We can then just get the parquet equivalent size of these log files, compare that with
      // {@link config.getParquetMaxFileSize()} and decide if there is scope to insert more rows
-      long logFilesEquivalentParquetFileSize = (long) (totalSizeOfLogFiles * config
+      long logFilesEquivalentParquetFileSize =
-          .getLogFileToParquetCompressionRatio());
+          (long) (totalSizeOfLogFiles * config.getLogFileToParquetCompressionRatio());
      return logFilesEquivalentParquetFileSize;
    }
  }
@@ -447,8 +455,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    Map<HeaderMetadataType, String> header = Maps.newHashMap();
    header.put(HeaderMetadataType.INSTANT_TIME, metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
    header.put(HeaderMetadataType.TARGET_INSTANT_TIME, commit);
-    header.put(HeaderMetadataType.COMMAND_BLOCK_TYPE, String.valueOf(HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK
+    header.put(HeaderMetadataType.COMMAND_BLOCK_TYPE,
-        .ordinal()));
+        String.valueOf(HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
    return header;
  }
@@ -462,8 +470,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
    // baseCommit always by listing the file slice
    Map<String, String> fileIdToBaseCommitTimeForLogMap = this.getRTFileSystemView().getLatestFileSlices(partitionPath)
        .collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseInstantTime));
-    commitMetadata.getPartitionToWriteStats().get(partitionPath).stream()
+    commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> {
        .filter(wStat -> {
      // Filter out stats without prevCommit since they are all inserts
      return wStat != null && wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT && wStat.getPrevCommit() != null
          && !deletedFiles.contains(wStat.getFileId());
@@ -473,10 +480,9 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
      if (null != baseCommitTime) {
        boolean success = false;
        try {
-              writer = HoodieLogFormat.newWriterBuilder().onParentPath(
+          writer = HoodieLogFormat.newWriterBuilder()
-                  FSUtils.getPartitionPath(this.getMetaClient().getBasePath(), partitionPath))
+              .onParentPath(FSUtils.getPartitionPath(this.getMetaClient().getBasePath(), partitionPath))
-                  .withFileId(wStat.getFileId()).overBaseCommit(baseCommitTime)
+              .withFileId(wStat.getFileId()).overBaseCommit(baseCommitTime).withFs(this.metaClient.getFs())
                  .withFs(this.metaClient.getFs())
              .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
          // generate metadata
          Map<HeaderMetadataType, String> header = generateHeader(commit);
@@ -484,8 +490,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
          writer = writer.appendBlock(new HoodieCommandBlock(header));
          success = true;
        } catch (IOException | InterruptedException io) {
-              throw new HoodieRollbackException(
+          throw new HoodieRollbackException("Failed to rollback for commit " + commit, io);
                  "Failed to rollback for commit " + commit, io);
        } finally {
          try {
            if (writer != null) {
@@ -495,8 +500,8 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
              // This step is intentionally done after writer is closed. Guarantees that
              // getFileStatus would reflect correct stats and FileNotFoundException is not thrown in
              // cloud-storage : HUDI-168
-                  filesToNumBlocksRollback.put(this.getMetaClient().getFs()
+              filesToNumBlocksRollback.put(this.getMetaClient().getFs().getFileStatus(writer.getLogFile().getPath()),
-                      .getFileStatus(writer.getLogFile().getPath()), 1L);
+                  1L);
            }
          } catch (IOException io) {
            throw new UncheckedIOException(io);
@@ -504,9 +509,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
        }
      }
    });
-    return HoodieRollbackStat.newBuilder()
+    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath).withDeletedFileResults(filesToDeletedStatus)
        .withPartitionPath(partitionPath)
        .withDeletedFileResults(filesToDeletedStatus)
        .withRollbackBlockAppendResults(filesToNumBlocksRollback).build();
  }
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -82,22 +82,21 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  protected HoodieTable(HoodieWriteConfig config, JavaSparkContext jsc) {
    this.config = config;
    this.hadoopConfiguration = new SerializableConfiguration(jsc.hadoopConfiguration());
-    this.viewManager = FileSystemViewManager.createViewManager(
+    this.viewManager = FileSystemViewManager.createViewManager(new SerializableConfiguration(jsc.hadoopConfiguration()),
-        new SerializableConfiguration(jsc.hadoopConfiguration()), config.getViewStorageConfig());
+        config.getViewStorageConfig());
    this.metaClient = ClientUtils.createMetaClient(jsc, config, true);
    this.index = HoodieIndex.createIndex(config, jsc);
  }
  private synchronized FileSystemViewManager getViewManager() {
    if (null == viewManager) {
-      viewManager = FileSystemViewManager.createViewManager(hadoopConfiguration,
+      viewManager = FileSystemViewManager.createViewManager(hadoopConfiguration, config.getViewStorageConfig());
          config.getViewStorageConfig());
    }
    return viewManager;
  }
-  public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(
+  public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(HoodieTableMetaClient metaClient,
-      HoodieTableMetaClient metaClient, HoodieWriteConfig config, JavaSparkContext jsc) {
+      HoodieWriteConfig config, JavaSparkContext jsc) {
    switch (metaClient.getTableType()) {
      case COPY_ON_WRITE:
        return new HoodieCopyOnWriteTable<>(config, jsc);
@@ -202,8 +201,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
   * Get the list of savepoints in this table
   */
  public List<String> getSavepoints() {
-    return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
+    return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
        .collect(Collectors.toList());
  }
  /**
@@ -214,18 +212,14 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
      throw new HoodieSavepointException(
          "Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
    }
-    HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION,
+    HoodieInstant instant = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
        savepointTime);
    HoodieSavepointMetadata metadata = null;
    try {
-      metadata = AvroUtils
+      metadata = AvroUtils.deserializeHoodieSavepointMetadata(getActiveTimeline().getInstantDetails(instant).get());
          .deserializeHoodieSavepointMetadata(getActiveTimeline().getInstantDetails(instant).get());
    } catch (IOException e) {
-      throw new HoodieSavepointException(
+      throw new HoodieSavepointException("Could not get savepointed data files for savepoint " + savepointTime, e);
          "Could not get savepointed data files for savepoint " + savepointTime, e);
    }
-    return metadata.getPartitionMetadata().values().stream()
+    return metadata.getPartitionMetadata().values().stream().flatMap(s -> s.getSavepointDataFile().stream());
        .flatMap(s -> s.getSavepointDataFile().stream());
  }
  public HoodieActiveTimeline getActiveTimeline() {
@@ -242,17 +236,18 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  /**
   * Perform the ultimate IO for a given upserted (RDD) partition
   */
-  public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime,
+  public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
-      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+      Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
  /**
   * Perform the ultimate IO for a given inserted (RDD) partition
   */
-  public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime,
+  public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
-      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+      Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
  /**
   * Schedule compaction for the instant time
   * 
   * @param jsc Spark Context
   * @param instantTime Instant Time for scheduling compaction
   * @return
@@ -260,8 +255,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  public abstract HoodieCompactionPlan scheduleCompaction(JavaSparkContext jsc, String instantTime);
  /**
-   * Run Compaction on the table. Compaction arranges the data so that it is optimized for data
+   * Run Compaction on the table. Compaction arranges the data so that it is optimized for data access
   * access
   *
   * @param jsc Spark Context
   * @param compactionInstantTime Instant Time
@@ -276,9 +270,9 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc);
  /**
-   * Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1)
+   * Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1) Atomically unpublish
-   * Atomically unpublish this commit (2) clean indexing data (3) clean new generated parquet files
+   * this commit (2) clean indexing data (3) clean new generated parquet files / log blocks (4) Finally, delete
-   * / log blocks (4) Finally, delete .<action>.commit or .<action>.inflight file if deleteInstants = true
+   * .<action>.commit or .<action>.inflight file if deleteInstants = true
   */
  public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
      throws IOException;
@@ -297,6 +291,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  /**
   * Delete Marker directory corresponding to an instant
   * 
   * @param instantTs Instant Time
   */
  protected void deleteMarkerDir(String instantTs) {
@@ -343,13 +338,12 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
      // Contains list of partially created files. These needs to be cleaned up.
      invalidDataPaths.removeAll(validDataPaths);
      if (!invalidDataPaths.isEmpty()) {
-        logger.info("Removing duplicate data files created due to spark retries before committing. Paths="
+        logger.info(
-            + invalidDataPaths);
+            "Removing duplicate data files created due to spark retries before committing. Paths=" + invalidDataPaths);
      }
      Map<String, List<Pair<String, String>>> groupByPartition = invalidDataPaths.stream()
-          .map(dp -> Pair.of(new Path(dp).getParent().toString(), dp))
+          .map(dp -> Pair.of(new Path(dp).getParent().toString(), dp)).collect(Collectors.groupingBy(Pair::getKey));
          .collect(Collectors.groupingBy(Pair::getKey));
      if (!groupByPartition.isEmpty()) {
        // Ensure all files in delete list is actually present. This is mandatory for an eventually consistent FS.
@@ -394,6 +388,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
  /**
   * Ensures all files passed either appear or disappear
   * 
   * @param jsc JavaSparkContext
   * @param groupByPartition Files grouped by partition
   * @param visibility Appear/Disappear
--- a/hudi-client/src/main/java/org/apache/hudi/table/UserDefinedBulkInsertPartitioner.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/UserDefinedBulkInsertPartitioner.java
@@ -23,13 +23,11 @@ import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.spark.api.java.JavaRDD;
 /**
- * Repartition input records into at least expected number of output spark partitions. It should
+ * Repartition input records into at least expected number of output spark partitions. It should give below guarantees -
- * give below guarantees - Output spark partition will have records from only one hoodie partition.
+ * Output spark partition will have records from only one hoodie partition. - Average records per output spark
- * - Average records per output spark partitions should be almost equal to (#inputRecords /
+ * partitions should be almost equal to (#inputRecords / #outputSparkPartitions) to avoid possible skews.
 * #outputSparkPartitions) to avoid possible skews.
 */
 public interface UserDefinedBulkInsertPartitioner<T extends HoodieRecordPayload> {
-  JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records,
+  JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions);
      int outputSparkPartitions);
 }
--- a/hudi-client/src/main/java/org/apache/hudi/table/WorkloadProfile.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/WorkloadProfile.java
@@ -30,8 +30,7 @@ import org.apache.spark.api.java.JavaRDD;
 import scala.Tuple2;
 /**
- * Information about incoming records for upsert/insert obtained either via sampling or
+ * Information about incoming records for upsert/insert obtained either via sampling or introspecting the data fully
 * introspecting the data fully
 * <p>
 * TODO(vc): Think about obtaining this directly from index.tagLocation
 */
@@ -62,11 +61,10 @@ public class WorkloadProfile<T extends HoodieRecordPayload> implements Serializa
    Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords
        .mapToPair(record -> new Tuple2<>(
-            new Tuple2<>(record.getPartitionPath(), Option.ofNullable(record.getCurrentLocation())),
+            new Tuple2<>(record.getPartitionPath(), Option.ofNullable(record.getCurrentLocation())), record))
-            record)).countByKey();
+        .countByKey();
-    for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts
+    for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts.entrySet()) {
        .entrySet()) {
      String partitionPath = e.getKey()._1();
      Long count = e.getValue();
      Option<HoodieRecordLocation> locOption = e.getKey()._2();
--- a/hudi-client/src/test/java/HoodieClientExample.java
+++ b/hudi-client/src/test/java/HoodieClientExample.java
@@ -41,7 +41,8 @@ import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 /**
- * Driver program that uses the Hoodie client with synthetic workload, and performs basic operations. <p>
+ * Driver program that uses the Hoodie client with synthetic workload, and performs basic operations.
 * <p>
 */
 public class HoodieClientExample {
@@ -82,18 +83,15 @@ public class HoodieClientExample {
    Path path = new Path(tablePath);
    FileSystem fs = FSUtils.getFs(tablePath, jsc.hadoopConfiguration());
    if (!fs.exists(path)) {
-      HoodieTableMetaClient
+      HoodieTableMetaClient.initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType),
-          .initTableType(jsc.hadoopConfiguration(), tablePath, HoodieTableType.valueOf(tableType), tableName,
+          tableName, HoodieAvroPayload.class.getName());
              HoodieAvroPayload.class.getName());
    }
    // Create the write client to write some records in
    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
-        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
        .forTable(tableName)
        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
-        .withCompactionConfig(
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
            HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 3).build()).build();
    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
    /**
--- a/hudi-client/src/test/java/org/apache/hudi/HoodieClientTestHarness.java
+++ b/hudi-client/src/test/java/org/apache/hudi/HoodieClientTestHarness.java
@@ -74,6 +74,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
  /**
   * Cleanups resource group for the subclasses of {@link TestHoodieClientBase}.
   * 
   * @throws IOException
   */
  public void cleanupResources() throws IOException {
@@ -84,8 +85,7 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
  }
  /**
-   * Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext})
+   * Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with the given application name.
   * with the given application name.
   *
   * @param appName The specified application name.
   */
@@ -99,8 +99,8 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
  }
  /**
-   * Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext})
+   * Initializes the Spark contexts ({@link JavaSparkContext} and {@link SQLContext}) with a default name
-   * with a default name <b>TestHoodieClient</b>.
+   * <b>TestHoodieClient</b>.
   */
  protected void initSparkContexts() {
    initSparkContexts("TestHoodieClient");
@@ -155,8 +155,8 @@ public abstract class HoodieClientTestHarness extends HoodieCommonTestHarness im
  }
  /**
-   * Initializes an instance of {@link HoodieTableMetaClient} with a special table type
+   * Initializes an instance of {@link HoodieTableMetaClient} with a special table type specified by
-   * specified by {@code getTableType()}.
+   * {@code getTableType()}.
   *
   * @throws IOException
   */
--- a/hudi-client/src/test/java/org/apache/hudi/TestAsyncCompaction.java
+++ b/hudi-client/src/test/java/org/apache/hudi/TestAsyncCompaction.java
@@ -73,15 +73,14 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
  private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) {
    return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-        .withAutoCommit(autoCommit).withAssumeDatePartitioning(true).withCompactionConfig(
+        .withAutoCommit(autoCommit).withAssumeDatePartitioning(true)
-            HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024).withInlineCompaction(false)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
-                .withMaxNumDeltaCommitsBeforeCompaction(1).build())
+            .withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1).build())
        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024 * 1024).build())
        .forTable("test-trip-table")
        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-        .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(
+        .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
-            FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE)
+            .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build());
                .build());
  }
  @Test
@@ -97,8 +96,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      int numRecs = 2000;
      List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-      runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+      runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-          records, cfg, true, new ArrayList<>());
+          new ArrayList<>());
      // Schedule compaction but do not run them
      scheduleCompaction(compactionInstantTime, client, cfg);
@@ -158,8 +157,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    try (HoodieWriteClient client = getHoodieWriteClient(cfg, true);) {
      List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-          records, cfg, true, new ArrayList<>());
+          new ArrayList<>());
      // Schedule compaction but do not run them
      scheduleCompaction(compactionInstantTime, client, cfg);
@@ -182,15 +181,13 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      // Validate
      metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
-      inflightInstant =
+      inflightInstant = metaClient.getActiveTimeline().filterInflightsExcludingCompaction().firstInstant().get();
          metaClient.getActiveTimeline().filterInflightsExcludingCompaction().firstInstant().get();
      assertTrue("inflight instant has expected instant time",
          inflightInstant.getTimestamp().equals(nextInflightInstantTime));
      assertTrue("Expect only one inflight instant",
          metaClient.getActiveTimeline().filterInflightsExcludingCompaction().getInstants().count() == 1);
      // Expect pending Compaction to be present
-      pendingCompactionInstant =
+      pendingCompactionInstant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
          metaClient.getActiveTimeline().filterPendingCompactionTimeline().firstInstant().get();
      assertTrue("Pending Compaction instant has expected instant time",
          pendingCompactionInstant.getTimestamp().equals(compactionInstantTime));
    }
@@ -211,8 +208,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      int numRecs = 2000;
      List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-          records, cfg, true, new ArrayList<>());
+          new ArrayList<>());
      // Schedule and mark compaction instant as inflight
      HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
@@ -221,8 +218,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      moveCompactionFromRequestedToInflight(compactionInstantTime, client, cfg);
      // Complete ingestions
-      runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime),
+      runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime), records, cfg, false,
-          records, cfg, false, Arrays.asList(compactionInstantTime));
+          Arrays.asList(compactionInstantTime));
      // execute inflight compaction
      executeCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, true);
@@ -242,8 +239,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    int numRecs = 2000;
    List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-        records, cfg, true, new ArrayList<>());
+        new ArrayList<>());
    // Schedule compaction but do not run them
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
@@ -256,8 +253,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    boolean gotException = false;
    try {
-      runNextDeltaCommits(client, Arrays.asList(failedInstantTime),
+      runNextDeltaCommits(client, Arrays.asList(failedInstantTime), records, cfg, false,
-          records, cfg, false, Arrays.asList(compactionInstantTime));
+          Arrays.asList(compactionInstantTime));
    } catch (IllegalArgumentException iex) {
      // Latest pending compaction instant time must be earlier than this instant time. Should fail here
      gotException = true;
@@ -279,8 +276,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    int numRecs = 2000;
    List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-        records, cfg, true, new ArrayList<>());
+        new ArrayList<>());
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
    createNextDeltaCommit(inflightInstantTime, records, client, metaClient, cfg, true);
@@ -315,8 +312,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    int numRecs = 2000;
    List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+    records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-        records, cfg, true, new ArrayList<>());
+        new ArrayList<>());
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
    boolean gotException = false;
@@ -337,8 +334,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    } catch (IllegalArgumentException iex) {
      gotException = true;
    }
-    assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant",
+    assertTrue("Compaction Instant to be scheduled cannot have same timestamp as committed instant", gotException);
        gotException);
    compactionInstantTime = "006";
    scheduleCompaction(compactionInstantTime, client, cfg);
@@ -349,8 +345,7 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    } catch (IllegalArgumentException iex) {
      gotException = true;
    }
-    assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction",
+    assertTrue("Compaction Instant to be scheduled cannot have same timestamp as a pending compaction", gotException);
        gotException);
  }
  @Test
@@ -365,8 +360,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      int numRecs = 2000;
      List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-          records, cfg, true, new ArrayList<>());
+          new ArrayList<>());
      HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
      HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
@@ -389,15 +384,15 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      int numRecs = 2000;
      List<HoodieRecord> records = dataGen.generateInserts(firstInstantTime, numRecs);
-      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime),
+      records = runNextDeltaCommits(client, Arrays.asList(firstInstantTime, secondInstantTime), records, cfg, true,
-          records, cfg, true, new ArrayList<>());
+          new ArrayList<>());
      HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
      HoodieTable hoodieTable = getHoodieTable(metaClient, cfg);
      scheduleCompaction(compactionInstantTime, client, cfg);
-      runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime),
+      runNextDeltaCommits(client, Arrays.asList(thirdInstantTime, fourthInstantTime), records, cfg, false,
-          records, cfg, false, Arrays.asList(compactionInstantTime));
+          Arrays.asList(compactionInstantTime));
      executeCompaction(compactionInstantTime, client, hoodieTable, cfg, numRecs, true);
    }
  }
@@ -428,8 +423,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
  }
  private List<HoodieRecord> runNextDeltaCommits(HoodieWriteClient client, List<String> deltaInstants,
-      List<HoodieRecord> records, HoodieWriteConfig cfg, boolean insertFirst,
+      List<HoodieRecord> records, HoodieWriteConfig cfg, boolean insertFirst, List<String> expPendingCompactionInstants)
-      List<String> expPendingCompactionInstants) throws Exception {
+      throws Exception {
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
    List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactions =
@@ -476,8 +471,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      HoodieWriteConfig cfg) throws IOException {
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
    HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime);
-    HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan(
+    HoodieCompactionPlan workload = AvroUtils
-        metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get());
+        .deserializeCompactionPlan(metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get());
    metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant);
    HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants()
        .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get();
@@ -489,27 +484,23 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath());
    HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get();
-    assertEquals("Last compaction instant must be the one set",
+    assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime);
        instant.getTimestamp(), compactionInstantTime);
  }
-  private void scheduleAndExecuteCompaction(String compactionInstantTime,
+  private void scheduleAndExecuteCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
-      HoodieWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs,
+      HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
      boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
    scheduleCompaction(compactionInstantTime, client, cfg);
    executeCompaction(compactionInstantTime, client, table, cfg, expectedNumRecs, hasDeltaCommitAfterPendingCompaction);
  }
-  private void executeCompaction(String compactionInstantTime,
+  private void executeCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table,
-      HoodieWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs,
+      HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
      boolean hasDeltaCommitAfterPendingCompaction) throws IOException {
    client.compact(compactionInstantTime);
    List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table, cfg);
    assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent());
-    assertFalse("Verify all file-slices have base-instant same as compaction instant",
+    assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream()
-        fileSliceList.stream().filter(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime))
+        .filter(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)).findAny().isPresent());
            .findAny().isPresent());
    assertFalse("Verify all file-slices have data-files",
        fileSliceList.stream().filter(fs -> !fs.getDataFile().isPresent()).findAny().isPresent());
@@ -522,12 +513,11 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
    }
    // verify that there is a commit
-    table = getHoodieTable(
+    table = getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
        new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg);
    HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants();
    String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
-    assertEquals("Expect compaction instant time to be the latest commit time",
+    assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime,
-        latestCompactionCommitTime, compactionInstantTime);
+        compactionInstantTime);
    Assert.assertEquals("Must contain expected records", expectedNumRecs,
        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
@@ -546,8 +536,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
      client.commit(instantTime, statuses);
    }
-    Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().reload().getDeltaCommitTimeline()
+    Option<HoodieInstant> deltaCommit =
-        .filterCompletedInstants().lastInstant();
+        metaClient.getActiveTimeline().reload().getDeltaCommitTimeline().filterCompletedInstants().lastInstant();
    if (skipCommit && !cfg.shouldAutoCommit()) {
      assertTrue("Delta commit should not be latest instant",
          deltaCommit.get().getTimestamp().compareTo(instantTime) < 0);
@@ -560,8 +550,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
  private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
    FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath());
-    HoodieTableFileSystemView
+    HoodieTableFileSystemView view =
-        view = new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles);
+        new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles);
    List<HoodieDataFile> dataFilesToRead = view.getLatestDataFiles().collect(Collectors.toList());
    return dataFilesToRead;
  }
@@ -569,9 +559,8 @@ public class TestAsyncCompaction extends TestHoodieClientBase {
  private List<FileSlice> getCurrentLatestFileSlices(HoodieTable table, HoodieWriteConfig cfg) throws IOException {
    HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(),
        table.getMetaClient().getActiveTimeline().reload().getCommitsAndCompactionTimeline());
-    List<FileSlice> fileSliceList =
+    List<FileSlice> fileSliceList = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream()
-        Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream().flatMap(partition ->
+        .flatMap(partition -> view.getLatestFileSlices(partition)).collect(Collectors.toList());
            view.getLatestFileSlices(partition)).collect(Collectors.toList());
    return fileSliceList;
  }
--- a/hudi-client/src/test/java/org/apache/hudi/TestCleaner.java
+++ b/hudi-client/src/test/java/org/apache/hudi/TestCleaner.java
@@ -93,16 +93,13 @@ public class TestCleaner extends TestHoodieClientBase {
   * @param insertFn Insertion API for testing
   * @throws Exception in case of error
   */
-  private String insertFirstBigBatchForClientCleanerTest(
+  private String insertFirstBigBatchForClientCleanerTest(HoodieWriteConfig cfg, HoodieWriteClient client,
      HoodieWriteConfig cfg,
      HoodieWriteClient client,
      Function2<List<HoodieRecord>, String, Integer> recordGenFunction,
      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn) throws Exception {
    /**
-     * do a big insert
+     * do a big insert (this is basically same as insert part of upsert, just adding it here so we can catch breakages
-     * (this is basically same as insert part of upsert, just adding it here so we can
+     * in insert(), if the implementation diverges.)
     * catch breakages in insert(), if the implementation diverges.)
     */
    String newCommitTime = client.startCommit();
@@ -145,8 +142,8 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  @Test
  public void testInsertPreppedAndCleanByVersions() throws Exception {
-    testInsertAndCleanByVersions(HoodieWriteClient::insertPreppedRecords,
+    testInsertAndCleanByVersions(HoodieWriteClient::insertPreppedRecords, HoodieWriteClient::upsertPreppedRecords,
-        HoodieWriteClient::upsertPreppedRecords, true);
+        true);
  }
  /**
@@ -178,15 +175,13 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  private void testInsertAndCleanByVersions(
      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn,
-      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn,
+      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn, boolean isPreppedAPI)
-      boolean isPreppedAPI
+      throws Exception {
  ) throws Exception {
    int maxVersions = 2; // keep upto 2 versions for each file
-    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+    HoodieWriteConfig cfg = getConfigBuilder()
-        HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            .retainFileVersions(maxVersions).build())
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(maxVersions).build())
-        .withParallelism(1, 1).withBulkInsertParallelism(1)
+        .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1)
        .withFinalizeWriteParallelism(1)
        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
        .build();
    try (HoodieWriteClient client = getHoodieWriteClient(cfg);) {
@@ -204,8 +199,7 @@ public class TestCleaner extends TestHoodieClientBase {
      HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc);
      for (String partitionPath : dataGen.getPartitionPaths()) {
        TableFileSystemView fsView = table.getFileSystemView();
-        Option<Boolean> added = Option.fromJavaOptional(fsView.getAllFileGroups(partitionPath).findFirst()
+        Option<Boolean> added = Option.fromJavaOptional(fsView.getAllFileGroups(partitionPath).findFirst().map(fg -> {
            .map(fg -> {
          fg.getLatestFileSlice().map(fs -> compactionFileIdToLatestFileSlice.put(fg.getFileGroupId(), fs));
          return true;
        }));
@@ -234,8 +228,7 @@ public class TestCleaner extends TestHoodieClientBase {
          client.startCommitWithTime(newInstantTime);
          List<HoodieRecord> records = recordUpsertGenWrappedFunction.apply(newInstantTime, 100);
-          List<WriteStatus> statuses =
+          List<WriteStatus> statuses = upsertFn.apply(client, jsc.parallelize(records, 1), newInstantTime).collect();
              upsertFn.apply(client, jsc.parallelize(records, 1), newInstantTime).collect();
          // Verify there are no errors
          assertNoWriteErrors(statuses);
@@ -249,8 +242,8 @@ public class TestCleaner extends TestHoodieClientBase {
            // compute all the versions of all files, from time 0
            HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
            for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) {
-              HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+              HoodieCommitMetadata commitMetadata =
-                  .fromBytes(timeline.getInstantDetails(entry).get(), HoodieCommitMetadata.class);
+                  HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(entry).get(), HoodieCommitMetadata.class);
              for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
                if (!fileIdToVersions.containsKey(wstat.getFileId())) {
@@ -267,8 +260,8 @@ public class TestCleaner extends TestHoodieClientBase {
                // Ensure latest file-slice selected for compaction is retained
                Option<HoodieDataFile> dataFileForCompactionPresent =
                    Option.fromJavaOptional(fileGroup.getAllDataFiles().filter(df -> {
-                      return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId())
+                      return compactionFileIdToLatestFileSlice.get(fileGroup.getFileGroupId()).getBaseInstantTime()
-                          .getBaseInstantTime().equals(df.getCommitTime());
+                          .equals(df.getCommitTime());
                    }).findAny());
                Assert.assertTrue("Data File selected for compaction is retained",
                    dataFileForCompactionPresent.isPresent());
@@ -310,8 +303,7 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  @Test
  public void testInsertPreppedAndCleanByCommits() throws Exception {
-    testInsertAndCleanByCommits(HoodieWriteClient::insertPreppedRecords,
+    testInsertAndCleanByCommits(HoodieWriteClient::insertPreppedRecords, HoodieWriteClient::upsertPreppedRecords, true);
        HoodieWriteClient::upsertPreppedRecords, true);
  }
  /**
@@ -343,15 +335,13 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  private void testInsertAndCleanByCommits(
      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> insertFn,
-      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn,
+      Function3<JavaRDD<WriteStatus>, HoodieWriteClient, JavaRDD<HoodieRecord>, String> upsertFn, boolean isPreppedAPI)
-      boolean isPreppedAPI
+      throws Exception {
  ) throws Exception {
    int maxCommits = 3; // keep upto 3 commits from the past
-    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+    HoodieWriteConfig cfg = getConfigBuilder()
-        HoodieCompactionConfig.newBuilder()
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainCommits(maxCommits).build())
-        .withParallelism(1, 1).withBulkInsertParallelism(1)
+        .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1)
        .withFinalizeWriteParallelism(1)
        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
        .build();
    HoodieWriteClient client = getHoodieWriteClient(cfg);
@@ -370,8 +360,7 @@ public class TestCleaner extends TestHoodieClientBase {
        client.startCommitWithTime(newCommitTime);
        List<HoodieRecord> records = recordUpsertGenWrappedFunction.apply(newCommitTime, 100);
-        List<WriteStatus> statuses =
+        List<WriteStatus> statuses = upsertFn.apply(client, jsc.parallelize(records, 1), newCommitTime).collect();
            upsertFn.apply(client, jsc.parallelize(records, 1), newCommitTime).collect();
        // Verify there are no errors
        assertNoWriteErrors(statuses);
@@ -381,9 +370,9 @@ public class TestCleaner extends TestHoodieClientBase {
        Option<HoodieInstant> earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
        Set<HoodieInstant> acceptableCommits = activeTimeline.getInstants().collect(Collectors.toSet());
        if (earliestRetainedCommit.isPresent()) {
-          acceptableCommits.removeAll(
+          acceptableCommits
-              activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp()).getInstants()
+              .removeAll(activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp())
-                  .collect(Collectors.toSet()));
+                  .getInstants().collect(Collectors.toSet()));
          acceptableCommits.add(earliestRetainedCommit.get());
        }
@@ -412,18 +401,19 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  @Test
  public void testKeepLatestFileVersions() throws IOException {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
+    HoodieWriteConfig config =
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-            HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
+            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
            .build();
    // make 1 commit, with 1 file per partition
    HoodieTestUtils.createCommitFiles(basePath, "000");
-    String file1P0C0 = HoodieTestUtils
+    String file1P0C0 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
-    String file1P1C0 = HoodieTestUtils
+    String file1P1C0 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
@@ -434,24 +424,22 @@ public class TestCleaner extends TestHoodieClientBase {
    assertEquals("Must not clean any files", 0,
        getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
+        file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    HoodieTestUtils.createCommitFiles(basePath, "001");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-    String file2P0C1 = HoodieTestUtils
+    String file2P0C1 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
-    String file2P1C1 = HoodieTestUtils
+    String file2P1C1 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
    HoodieTestUtils
        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
    List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
    assertEquals("Must clean 1 file", 1,
@@ -460,47 +448,44 @@ public class TestCleaner extends TestHoodieClientBase {
    assertEquals("Must clean 1 file", 1,
        getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
+        file2P0C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file2P1C1));
+        file2P1C1));
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH,
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
+        "000", file1P1C0));
    // make next commit, with 2 updates to existing files, and 1 insert
    HoodieTestUtils.createCommitFiles(basePath, "002");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
-    HoodieTestUtils
+    String file3P0C2 =
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
    String file3P0C2 = HoodieTestUtils
        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
    List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
    assertEquals("Must clean two files", 2,
        getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
            .getSuccessDeleteFiles().size());
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
+        file1P0C0));
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
+        file2P0C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
+        file3P0C2));
    // No cleaning on partially written file, with no commit.
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file3P0C2); // update
    List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
    assertEquals("Must not clean any files", 0,
        getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
+        file3P0C2));
  }
  /**
@@ -509,37 +494,33 @@ public class TestCleaner extends TestHoodieClientBase {
  @Test
  public void testKeepLatestFileVersionsMOR() throws IOException {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
+    HoodieWriteConfig config =
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-            HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
+            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
            .build();
-    HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath,
+    HoodieTableMetaClient metaClient =
-        HoodieTableType.MERGE_ON_READ);
+        HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
    // Make 3 files, one base file and 2 log files associated with base file
-    String file1P0 = HoodieTestUtils
+    String file1P0 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
-    String file2P0L0 = HoodieTestUtils
+    String file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath,
-        .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0,
+        HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Option.empty());
-            Option.empty());
+    String file2P0L1 = HoodieTestUtils.createNewLogFile(fs, basePath,
-    String file2P0L1 = HoodieTestUtils
+        HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Option.of(2));
        .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0,
            Option.of(2));
    // make 1 compaction commit
    HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "000");
    // Make 4 files, one base file and 3 log files associated with base file
    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0);
-    file2P0L0 = HoodieTestUtils
+    file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-        .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
+        "001", file1P0, Option.empty());
-            Option.empty());
+    file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-    file2P0L0 = HoodieTestUtils
+        "001", file1P0, Option.of(2));
-        .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
+    file2P0L0 = HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-            Option.of(2));
+        "001", file1P0, Option.of(3));
    file2P0L0 = HoodieTestUtils
        .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0,
            Option.of(3));
    // make 1 compaction commit
    HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001");
@@ -548,16 +529,12 @@ public class TestCleaner extends TestHoodieClientBase {
    assertEquals("Must clean three files, one parquet and 2 log files", 3,
        getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0));
+        file1P0));
-    assertFalse(
+    assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        HoodieTestUtils
+        file2P0L0, Option.empty()));
-            .doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0,
+    assertFalse(HoodieTestUtils.doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-                Option.empty()));
+        file2P0L0, Option.of(2)));
    assertFalse(
        HoodieTestUtils
            .doesLogFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0,
                Option.of(2)));
  }
  /**
@@ -566,16 +543,17 @@ public class TestCleaner extends TestHoodieClientBase {
  @Test
  public void testKeepLatestCommits() throws IOException {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
        .build();
    // make 1 commit, with 1 file per partition
    HoodieTestUtils.createCommitFiles(basePath, "000");
-    String file1P0C0 = HoodieTestUtils
+    String file1P0C0 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000");
-    String file1P1C0 = HoodieTestUtils
+    String file1P1C0 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
@@ -587,24 +565,22 @@ public class TestCleaner extends TestHoodieClientBase {
    assertEquals("Must not clean any files", 0,
        getCleanStat(hoodieCleanStatsOne, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
+        file1P1C0));
    // make next commit, with 1 insert & 1 update per partition
    HoodieTestUtils.createCommitFiles(basePath, "001");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-    String file2P0C1 = HoodieTestUtils
+    String file2P0C1 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001"); // insert
-    String file2P1C1 = HoodieTestUtils
+    String file2P1C1 =
-        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001"); // insert
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
    HoodieTestUtils
        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update
    List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
    assertEquals("Must not clean any files", 0,
@@ -613,78 +589,73 @@ public class TestCleaner extends TestHoodieClientBase {
    assertEquals("Must not clean any files", 0,
        getCleanStat(hoodieCleanStatsTwo, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
+        file2P0C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "001", file2P1C1));
+        file2P1C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0));
+        file1P1C0));
    // make next commit, with 2 updates to existing files, and 1 insert
    HoodieTestUtils.createCommitFiles(basePath, "002");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
-    HoodieTestUtils
+    String file3P0C2 =
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1); // update
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
    String file3P0C2 = HoodieTestUtils
        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002");
    List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
    assertEquals("Must not clean any file. We have to keep 1 version before the latest commit time to keep", 0,
        getCleanStat(hoodieCleanStatsThree, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
            .getSuccessDeleteFiles().size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
    // make next commit, with 2 updates to existing files, and 1 insert
    HoodieTestUtils.createCommitFiles(basePath, "003");
    metaClient = HoodieTableMetaClient.reload(metaClient);
    table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file1P0C0); // update
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file2P0C1); // update
-    HoodieTestUtils
+    String file4P0C3 =
-        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file2P0C1); // update
+        HoodieTestUtils.createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003");
    String file4P0C3 = HoodieTestUtils
        .createNewDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003");
    List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
    assertEquals("Must not clean one old file", 1,
        getCleanStat(hoodieCleanStatsFour, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertFalse(HoodieTestUtils
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
+        file2P0C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file2P0C1));
+        file2P0C1));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "002", file3P0C2));
+        file3P0C2));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "003", file4P0C3));
+        file4P0C3));
    // No cleaning on partially written file, with no commit.
-    HoodieTestUtils
+    HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
        .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "004", file3P0C2); // update
    List<HoodieCleanStat> hoodieCleanStatsFive = table.clean(jsc);
    assertEquals("Must not clean any files", 0,
        getCleanStat(hoodieCleanStatsFive, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0));
+        file1P0C0));
-    assertTrue(HoodieTestUtils
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001",
-        .doesDataFileExist(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1));
+        file2P0C1));
  }
  /**
@@ -711,8 +682,9 @@ public class TestCleaner extends TestHoodieClientBase {
  @Test
  public void testCleaningWithZeroPartitonPaths() throws IOException {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
        .build();
    // Make a commit, although there are no partitionPaths.
    // Example use-case of this is when a client wants to create a table
@@ -732,8 +704,9 @@ public class TestCleaner extends TestHoodieClientBase {
  @Test
  public void testCleaningSkewedPartitons() throws IOException {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
        .build();
    Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();
    // Since clean involves repartition in order to uniformly distribute data,
@@ -783,22 +756,20 @@ public class TestCleaner extends TestHoodieClientBase {
    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
    List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
-    assertEquals(100,
+    assertEquals(100, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
-        getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles()
+        .getSuccessDeleteFiles().size());
-            .size());
+    assertEquals(10, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH)
-    assertEquals(10,
+        .getSuccessDeleteFiles().size());
-        getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles()
+    assertEquals(10, getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH)
-            .size());
+        .getSuccessDeleteFiles().size());
    assertEquals(10,
        getCleanStat(hoodieCleanStats, HoodieTestDataGenerator.DEFAULT_THIRD_PARTITION_PATH).getSuccessDeleteFiles()
            .size());
    // 3 tasks are expected since the number of partitions is 3
    assertEquals(3, stageOneShuffleReadTaskRecordsCountMap.keySet().size());
    // Sum of all records processed = total number of files to clean
    assertEquals(120,
        stageOneShuffleReadTaskRecordsCountMap.values().stream().reduce((a, b) -> a + b).get().intValue());
-    assertTrue("The skew in handling files to clean is not removed. "
+    assertTrue(
        "The skew in handling files to clean is not removed. "
            + "Each task should handle more records than the partitionPath with least files "
            + "and less records than the partitionPath with most files.",
        stageOneShuffleReadTaskRecordsCountMap.values().stream().filter(a -> a > 10 && a < 100).count() == 3);
@@ -811,8 +782,9 @@ public class TestCleaner extends TestHoodieClientBase {
  @Test
  public void testKeepLatestCommitsWithPendingCompactions() throws IOException {
    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
        .build();
    // Deletions:
    // . FileId Parquet Logs Total Retained Commits
    // FileId7 5 10 15 009, 011
@@ -830,9 +802,11 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  @Test
  public void testKeepLatestVersionsWithPendingCompactions() throws IOException {
-    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
+    HoodieWriteConfig config =
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy(
+        HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true)
-            HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build()).build();
+            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build())
            .build();
    // Deletions:
    // . FileId Parquet Logs Total Retained Commits
    // FileId7 5 10 15 009, 011
@@ -853,8 +827,8 @@ public class TestCleaner extends TestHoodieClientBase {
   */
  public void testPendingCompactions(HoodieWriteConfig config, int expNumFilesDeleted,
      int expNumFilesUnderCompactionDeleted) throws IOException {
-    HoodieTableMetaClient metaClient = HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath,
+    HoodieTableMetaClient metaClient =
-        HoodieTableType.MERGE_ON_READ);
+        HoodieTestUtils.init(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ);
    String[] instants = new String[] {"000", "001", "003", "005", "007", "009", "011", "013"};
    String[] compactionInstants = new String[] {"002", "004", "006", "008", "010"};
    Map<String, String> expFileIdToPendingCompaction = new HashMap<>();
@@ -870,13 +844,11 @@ public class TestCleaner extends TestHoodieClientBase {
    // compactions
    // FileIds 2-5 will be under compaction
    int maxNumFileIds = 7;
-    String[] fileIds = new String[]
+    String[] fileIds = new String[] {"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"};
        {"fileId1", "fileId2", "fileId3", "fileId4", "fileId5", "fileId6", "fileId7"};
    int maxNumFileIdsForCompaction = 4;
    for (int i = 0; i < maxNumFileIds; i++) {
-      final String fileId = HoodieTestUtils
+      final String fileId = HoodieTestUtils.createDataFile(basePath,
-          .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
+          HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0], fileIds[i]);
              fileIds[i]);
      HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
          fileId, Option.empty());
      HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[0],
@@ -887,8 +859,8 @@ public class TestCleaner extends TestHoodieClientBase {
          expFileIdToPendingCompaction.put(fileId, compactionInstants[j]);
          metaClient = HoodieTableMetaClient.reload(metaClient);
          HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
-          FileSlice slice = table.getRTFileSystemView().getLatestFileSlices(
+          FileSlice slice =
-              HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
+              table.getRTFileSystemView().getLatestFileSlices(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH)
                  .filter(fs -> fs.getFileId().equals(fileId)).findFirst().get();
          List<FileSlice> slices = new ArrayList<>();
          if (compactionInstantsToFileSlices.containsKey(compactionInstants[j])) {
@@ -898,20 +870,16 @@ public class TestCleaner extends TestHoodieClientBase {
          compactionInstantsToFileSlices.put(compactionInstants[j], slices);
          // Add log-files to simulate delta-commits after pending compaction
          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              compactionInstants[j],
+              compactionInstants[j], fileId, Option.empty());
              fileId, Option.empty());
          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              compactionInstants[j],
+              compactionInstants[j], fileId, Option.of(2));
              fileId, Option.of(2));
        } else {
-          HoodieTestUtils
+          HoodieTestUtils.createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j],
-              .createDataFile(basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId);
+              fileId);
-          HoodieTestUtils
+          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-              .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId,
+              instants[j], fileId, Option.empty());
-                  Option.empty());
+          HoodieTestUtils.createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
-          HoodieTestUtils
+              instants[j], fileId, Option.of(2));
              .createNewLogFile(fs, basePath, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId,
                  Option.of(2));
          fileIdToLatestInstantBeforeCompaction.put(fileId, instants[j]);
        }
      }
@@ -921,9 +889,8 @@ public class TestCleaner extends TestHoodieClientBase {
    for (String instant : compactionInstants) {
      List<FileSlice> fileSliceList = compactionInstantsToFileSlices.get(instant);
      if (null != fileSliceList) {
-        HoodieTestUtils.createCompactionRequest(metaClient, instant,
+        HoodieTestUtils.createCompactionRequest(metaClient, instant, fileSliceList.stream()
-            fileSliceList.stream().map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs))
+            .map(fs -> Pair.of(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, fs)).collect(Collectors.toList()));
                .collect(Collectors.toList()));
      }
    }
@@ -939,22 +906,19 @@ public class TestCleaner extends TestHoodieClientBase {
    expFileIdToPendingCompaction.entrySet().stream().forEach(entry -> {
      String fileId = entry.getKey();
      String baseInstantForCompaction = fileIdToLatestInstantBeforeCompaction.get(fileId);
-      Option<FileSlice> fileSliceForCompaction =
+      Option<FileSlice> fileSliceForCompaction = Option.fromJavaOptional(hoodieTable.getRTFileSystemView()
-          Option.fromJavaOptional(
+          .getLatestFileSlicesBeforeOrOn(HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, baseInstantForCompaction,
-              hoodieTable.getRTFileSystemView().getLatestFileSlicesBeforeOrOn(
+              true)
-                  HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH,
+          .filter(fs -> fs.getFileId().equals(fileId)).findFirst());
                  baseInstantForCompaction, true).filter(fs -> fs.getFileId().equals(fileId)).findFirst());
      Assert.assertTrue("Base Instant for Compaction must be preserved", fileSliceForCompaction.isPresent());
      Assert.assertTrue("FileSlice has data-file", fileSliceForCompaction.get().getDataFile().isPresent());
-      Assert.assertEquals("FileSlice has log-files", 2,
+      Assert.assertEquals("FileSlice has log-files", 2, fileSliceForCompaction.get().getLogFiles().count());
          fileSliceForCompaction.get().getLogFiles().count());
    });
    // Test for progress (Did we clean some files ?)
-    long numFilesUnderCompactionDeleted =
+    long numFilesUnderCompactionDeleted = hoodieCleanStats.stream().flatMap(cleanStat -> {
-        hoodieCleanStats.stream().flatMap(cleanStat -> {
+      return convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns())
-          return convertPathToFileIdWithCommitTime(newMetaClient, cleanStat.getDeletePathPatterns()).map(
+          .map(fileIdWithCommitTime -> {
              fileIdWithCommitTime -> {
            if (expFileIdToPendingCompaction.containsKey(fileIdWithCommitTime.getKey())) {
              Assert.assertTrue("Deleted instant time must be less than pending compaction",
                  HoodieTimeline.compareTimestamps(
@@ -965,12 +929,12 @@ public class TestCleaner extends TestHoodieClientBase {
            return false;
          });
    }).filter(x -> x).count();
-    long numDeleted = hoodieCleanStats.stream()
+    long numDeleted =
-        .flatMap(cleanStat -> cleanStat.getDeletePathPatterns().stream()).count();
+        hoodieCleanStats.stream().flatMap(cleanStat -> cleanStat.getDeletePathPatterns().stream()).count();
    // Tighter check for regression
    Assert.assertEquals("Correct number of files deleted", expNumFilesDeleted, numDeleted);
-    Assert.assertEquals("Correct number of files under compaction deleted",
+    Assert.assertEquals("Correct number of files under compaction deleted", expNumFilesUnderCompactionDeleted,
-        expNumFilesUnderCompactionDeleted, numFilesUnderCompactionDeleted);
+        numFilesUnderCompactionDeleted);
  }
  /**
@@ -991,6 +955,7 @@ public class TestCleaner extends TestHoodieClientBase {
  /***
   * Helper method to return temporary files count
   * 
   * @return Number of temporary files found
   * @throws IOException in case of error
   */
@@ -1004,19 +969,17 @@ public class TestCleaner extends TestHoodieClientBase {
    return count;
  }
-  private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(
+  private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(final HoodieTableMetaClient metaClient,
-      final HoodieTableMetaClient metaClient, List<String> paths) {
+      List<String> paths) {
-    Predicate<String> roFilePredicate = path ->
+    Predicate<String> roFilePredicate =
-        path.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
+        path -> path.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
-    Predicate<String> rtFilePredicate = path ->
+    Predicate<String> rtFilePredicate =
-        path.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
+        path -> path.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
-    Stream<Pair<String, String>> stream1 = paths.stream().filter(roFilePredicate)
+    Stream<Pair<String, String>> stream1 = paths.stream().filter(roFilePredicate).map(fullPath -> {
        .map(fullPath -> {
      String fileName = Paths.get(fullPath).getFileName().toString();
      return Pair.of(FSUtils.getFileId(fileName), FSUtils.getCommitTime(fileName));
    });
-    Stream<Pair<String, String>> stream2 = paths.stream().filter(rtFilePredicate)
+    Stream<Pair<String, String>> stream2 = paths.stream().filter(rtFilePredicate).map(path -> {
        .map(path -> {
      return Pair.of(FSUtils.getFileIdFromLogPath(new Path(path)),
          FSUtils.getBaseCommitTimeFromLogPath(new Path(path)));
    });
--- a/Show More
+++ b/Show More