diff --git a/hoodie-cli/pom.xml b/hoodie-cli/pom.xml
index 40a6cc928..d590153a8 100644
--- a/hoodie-cli/pom.xml
+++ b/hoodie-cli/pom.xml
@@ -15,7 +15,9 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <parent>
     <artifactId>hoodie</artifactId>
     <groupId>com.uber.hoodie</groupId>
@@ -117,7 +119,7 @@
             </goals>
           </execution>
         </executions>
-        </plugin>
+      </plugin>
       <plugin>
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java
index 0b8e8fced..27f0ab516 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieCLI.java
@@ -17,38 +17,38 @@
 package com.uber.hoodie.cli;
 
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 
-import java.io.IOException;
-
 public class HoodieCLI {
-    public static Configuration conf;
-    public static FileSystem fs;
-    public static CLIState state = CLIState.INIT;
-    public static HoodieTableMetaClient tableMetadata;
-    public static HoodieTableMetaClient syncTableMetadata;
+
+  public static Configuration conf;
+  public static FileSystem fs;
+  public static CLIState state = CLIState.INIT;
+  public static HoodieTableMetaClient tableMetadata;
+  public static HoodieTableMetaClient syncTableMetadata;
 
 
-    public enum CLIState {
-        INIT, DATASET, SYNC
+  public enum CLIState {
+    INIT, DATASET, SYNC
+  }
+
+  public static boolean initConf() {
+    if (HoodieCLI.conf == null) {
+      HoodieCLI.conf = new Configuration();
+      return true;
     }
+    return false;
+  }
 
-    public static boolean initConf() {
-        if (HoodieCLI.conf == null) {
-            HoodieCLI.conf = new Configuration();
-            return true;
-        }
-        return false;
+  public static void initFS(boolean force) throws IOException {
+    if (fs == null || force) {
+      fs = FileSystem.get(conf);
     }
+  }
 
-    public static void initFS(boolean force) throws IOException {
-        if(fs == null || force) {
-            fs = FileSystem.get(conf);
-        }
-    }
-
-    public static void setTableMetadata(HoodieTableMetaClient tableMetadata) {
-        HoodieCLI.tableMetadata = tableMetadata;
-    }
+  public static void setTableMetadata(HoodieTableMetaClient tableMetadata) {
+    HoodieCLI.tableMetadata = tableMetadata;
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieHistoryFileNameProvider.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieHistoryFileNameProvider.java
index aecf1de49..01440a274 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieHistoryFileNameProvider.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieHistoryFileNameProvider.java
@@ -25,13 +25,13 @@ import org.springframework.stereotype.Component;
 @Order(Ordered.HIGHEST_PRECEDENCE)
 public class HoodieHistoryFileNameProvider extends DefaultHistoryFileNameProvider {
 
-    public String getHistoryFileName() {
-        return "hoodie-cmd.log";
-    }
+  public String getHistoryFileName() {
+    return "hoodie-cmd.log";
+  }
 
-    @Override
-    public String getProviderName() {
-        return "Hoodie file name provider";
-    }
+  @Override
+  public String getProviderName() {
+    return "Hoodie file name provider";
+  }
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrintHelper.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrintHelper.java
index 34b2c2414..b6625718b 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrintHelper.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrintHelper.java
@@ -17,18 +17,17 @@
 package com.uber.hoodie.cli;
 
 import dnl.utils.text.table.TextTable;
-
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.nio.charset.Charset;
 
 public class HoodiePrintHelper {
 
-    public static String print(String[] header, String[][] rows) {
-        TextTable textTable = new TextTable(header, rows);
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        PrintStream ps = new PrintStream(baos);
-        textTable.printTable(ps, 4);
-        return new String(baos.toByteArray(), Charset.forName("utf-8"));
-    }
+  public static String print(String[] header, String[][] rows) {
+    TextTable textTable = new TextTable(header, rows);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    PrintStream ps = new PrintStream(baos);
+    textTable.printTable(ps, 4);
+    return new String(baos.toByteArray(), Charset.forName("utf-8"));
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrompt.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrompt.java
index 268ec1721..2839cac99 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrompt.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodiePrompt.java
@@ -16,7 +16,6 @@
 
 package com.uber.hoodie.cli;
 
-import com.uber.hoodie.common.table.HoodieTableConfig;
 import org.springframework.core.Ordered;
 import org.springframework.core.annotation.Order;
 import org.springframework.shell.plugin.support.DefaultPromptProvider;
@@ -26,27 +25,27 @@ import org.springframework.stereotype.Component;
 @Order(Ordered.HIGHEST_PRECEDENCE)
 public class HoodiePrompt extends DefaultPromptProvider {
 
-    @Override
-    public String getPrompt() {
-        if (HoodieCLI.tableMetadata != null) {
-            String tableName = HoodieCLI.tableMetadata.getTableConfig().getTableName();
-            switch (HoodieCLI.state) {
-                case INIT:
-                    return "hoodie->";
-                case DATASET:
-                    return "hoodie:" + tableName + "->";
-                case SYNC:
-                    return "hoodie:" + tableName + " <==> "
-                        + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
-            }
-            return "hoodie:" + tableName + "->";
-        }
-        return "hoodie->";
+  @Override
+  public String getPrompt() {
+    if (HoodieCLI.tableMetadata != null) {
+      String tableName = HoodieCLI.tableMetadata.getTableConfig().getTableName();
+      switch (HoodieCLI.state) {
+        case INIT:
+          return "hoodie->";
+        case DATASET:
+          return "hoodie:" + tableName + "->";
+        case SYNC:
+          return "hoodie:" + tableName + " <==> "
+              + HoodieCLI.syncTableMetadata.getTableConfig().getTableName() + "->";
+      }
+      return "hoodie:" + tableName + "->";
     }
+    return "hoodie->";
+  }
 
-    @Override
-    public String getProviderName() {
-        return "Hoodie provider";
-    }
+  @Override
+  public String getProviderName() {
+    return "Hoodie provider";
+  }
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieSplashScreen.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieSplashScreen.java
index fa27d5749..d6a16891a 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieSplashScreen.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/HoodieSplashScreen.java
@@ -22,34 +22,39 @@ import org.springframework.shell.plugin.support.DefaultBannerProvider;
 import org.springframework.shell.support.util.OsUtils;
 import org.springframework.stereotype.Component;
 
-@Component @Order(Ordered.HIGHEST_PRECEDENCE) public class HoodieSplashScreen
+@Component
+@Order(Ordered.HIGHEST_PRECEDENCE)
+public class HoodieSplashScreen
     extends DefaultBannerProvider {
-    private static String screen =  "============================================" + OsUtils.LINE_SEPARATOR +
-        "*                                          *" + OsUtils.LINE_SEPARATOR +
-        "*     _    _                 _ _           *" + OsUtils.LINE_SEPARATOR +
-        "*    | |  | |               | (_)          *" + OsUtils.LINE_SEPARATOR +
-        "*    | |__| | ___   ___   __| |_  ___      *" + OsUtils.LINE_SEPARATOR +
-        "*    |  __  |/ _ \\ / _ \\ / _` | |/ _ \\     *" +
-    OsUtils.LINE_SEPARATOR +
-        "*    | |  | | (_) | (_) | (_| | |  __/     *" + OsUtils.LINE_SEPARATOR +
-        "*    |_|  |_|\\___/ \\___/ \\__,_|_|\\___|     *" +
-    OsUtils.LINE_SEPARATOR +
-        "*                                          *" + OsUtils.LINE_SEPARATOR +
-        "============================================" + OsUtils.LINE_SEPARATOR;
 
-    public String getBanner() {
-        return screen;
-    }
+  private static String screen =
+      "============================================" + OsUtils.LINE_SEPARATOR +
+          "*                                          *" + OsUtils.LINE_SEPARATOR +
+          "*     _    _                 _ _           *" + OsUtils.LINE_SEPARATOR +
+          "*    | |  | |               | (_)          *" + OsUtils.LINE_SEPARATOR +
+          "*    | |__| | ___   ___   __| |_  ___      *" + OsUtils.LINE_SEPARATOR +
+          "*    |  __  |/ _ \\ / _ \\ / _` | |/ _ \\     *" +
+          OsUtils.LINE_SEPARATOR +
+          "*    | |  | | (_) | (_) | (_| | |  __/     *" + OsUtils.LINE_SEPARATOR +
+          "*    |_|  |_|\\___/ \\___/ \\__,_|_|\\___|     *" +
+          OsUtils.LINE_SEPARATOR +
+          "*                                          *" + OsUtils.LINE_SEPARATOR +
+          "============================================" + OsUtils.LINE_SEPARATOR;
 
-    public String getVersion() {
-        return "1.0";
-    }
+  public String getBanner() {
+    return screen;
+  }
 
-    public String getWelcomeMessage() {
-        return "Welcome to Hoodie CLI. Please type help if you are looking for help. ";
-    }
+  public String getVersion() {
+    return "1.0";
+  }
 
-    @Override public String getProviderName() {
-        return "Hoodie Banner";
-    }
+  public String getWelcomeMessage() {
+    return "Welcome to Hoodie CLI. Please type help if you are looking for help. ";
+  }
+
+  @Override
+  public String getProviderName() {
+    return "Hoodie Banner";
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java
index 779df13f0..c0d7924ff 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/Main.java
@@ -16,18 +16,16 @@
 
 package com.uber.hoodie.cli;
 
+import java.io.IOException;
 import org.springframework.shell.Bootstrap;
 
-import java.io.IOException;
-
 public class Main {
-    /**
-     * Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging inside an IDE
-     *
-     * @param args
-     * @throws IOException
-     */
-    public static void main(String[] args) throws IOException {
-        Bootstrap.main(args);
-    }
+
+  /**
+   * Main class that delegates to Spring Shell's Bootstrap class in order to simplify debugging
+   * inside an IDE
+   */
+  public static void main(String[] args) throws IOException {
+    Bootstrap.main(args);
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/ArchivedCommitsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/ArchivedCommitsCommand.java
index 147734990..069c6564a 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/ArchivedCommitsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/ArchivedCommitsCommand.java
@@ -24,6 +24,10 @@ import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.log.HoodieLogFormat;
 import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
 import com.uber.hoodie.common.util.FSUtils;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileStatus;
@@ -34,90 +38,90 @@ import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
 @Component
 public class ArchivedCommitsCommand implements CommandMarker {
 
-    @CliAvailabilityIndicator({"show archived commits"})
-    public boolean isShowArchivedCommitAvailable() {
-        return HoodieCLI.tableMetadata != null;
+  @CliAvailabilityIndicator({"show archived commits"})
+  public boolean isShowArchivedCommitAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "show archived commits", help = "Read commits from archived files and show details")
+  public String showCommits(
+      @CliOption(key = {
+          "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
+      final Integer limit) throws IOException {
+
+    System.out
+        .println("===============> Showing only " + limit + " archived commits <===============");
+    FileStatus[] fsStatuses = FSUtils.getFs().globStatus(
+        new Path(HoodieCLI.tableMetadata.getBasePath() + "/.hoodie/.commits_.archive*"));
+    List<String[]> allCommits = new ArrayList<>();
+    for (FileStatus fs : fsStatuses) {
+      //read the archived file
+      HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(),
+          new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema(), false);
+
+      List<IndexedRecord> readRecords = new ArrayList<>();
+      //read the avro blocks
+      while (reader.hasNext()) {
+        HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
+        List<IndexedRecord> records = blk.getRecords();
+        readRecords.addAll(records);
+      }
+      List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
+          .map(r -> readCommit(r)).limit(limit).collect(Collectors.toList());
+      allCommits.addAll(readCommits);
     }
+    return HoodiePrintHelper.print(
+        new String[]{"CommitTime", "CommitType", "CommitDetails"},
+        allCommits.toArray(new String[allCommits.size()][]));
+  }
 
-    @CliCommand(value = "show archived commits", help = "Read commits from archived files and show details")
-    public String showCommits(
-            @CliOption(key = {"limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
-            final Integer limit) throws IOException {
-
-        System.out.println("===============> Showing only " + limit + " archived commits <===============");
-        FileStatus [] fsStatuses = FSUtils.getFs().globStatus(new Path(HoodieCLI.tableMetadata.getBasePath() + "/.hoodie/.commits_.archive*"));
-        List<String[]> allCommits = new ArrayList<>();
-        for(FileStatus fs : fsStatuses) {
-            //read the archived file
-            HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(),
-                    new HoodieLogFile(fs.getPath()), HoodieArchivedMetaEntry.getClassSchema(), false);
-
-            List<IndexedRecord> readRecords = new ArrayList<>();
-            //read the avro blocks
-            while (reader.hasNext()) {
-                HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-                List<IndexedRecord> records = blk.getRecords();
-                readRecords.addAll(records);
-            }
-            List<String[]> readCommits = readRecords.stream().map(r -> (GenericRecord)r).map(r -> readCommit(r)).limit(limit).collect(Collectors.toList());
-            allCommits.addAll(readCommits);
+  private String[] readCommit(GenericRecord record) {
+    List<String> commitDetails = new ArrayList<>();
+    try {
+      switch (record.get("actionType").toString()) {
+        case HoodieTimeline.CLEAN_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieCleanMetadata").toString());
+          break;
         }
-        return HoodiePrintHelper.print(
-                new String[] {"CommitTime", "CommitType", "CommitDetails"}, allCommits.toArray(new String[allCommits.size()][]));
-    }
-
-    private String[] readCommit(GenericRecord record) {
-        List<String> commitDetails = new ArrayList<>();
-        try {
-            switch (record.get("actionType").toString()) {
-                case HoodieTimeline.CLEAN_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieCleanMetadata").toString());
-                    break;
-                }
-                case HoodieTimeline.COMMIT_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieCommitMetadata").toString());
-                    break;
-                }
-                case HoodieTimeline.COMPACTION_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieCompactionMetadata").toString());
-                    break;
-                }
-                case HoodieTimeline.DELTA_COMMIT_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieCommitMetadata").toString());
-                    break;
-                }
-                case HoodieTimeline.ROLLBACK_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieRollbackMetadata").toString());
-                    break;
-                }
-                case HoodieTimeline.SAVEPOINT_ACTION: {
-                    commitDetails.add(record.get("commitTime").toString());
-                    commitDetails.add(record.get("actionType").toString());
-                    commitDetails.add(record.get("hoodieSavePointMetadata").toString());
-                    break;
-                }
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
+        case HoodieTimeline.COMMIT_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieCommitMetadata").toString());
+          break;
         }
-        return commitDetails.toArray(new String[commitDetails.size()]);
+        case HoodieTimeline.COMPACTION_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieCompactionMetadata").toString());
+          break;
+        }
+        case HoodieTimeline.DELTA_COMMIT_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieCommitMetadata").toString());
+          break;
+        }
+        case HoodieTimeline.ROLLBACK_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieRollbackMetadata").toString());
+          break;
+        }
+        case HoodieTimeline.SAVEPOINT_ACTION: {
+          commitDetails.add(record.get("commitTime").toString());
+          commitDetails.add(record.get("actionType").toString());
+          commitDetails.add(record.get("hoodieSavePointMetadata").toString());
+          break;
+        }
+      }
+    } catch (Exception e) {
+      e.printStackTrace();
     }
-}
\ No newline at end of file
+    return commitDetails.toArray(new String[commitDetails.size()]);
+  }
+}
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CleansCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CleansCommand.java
index 160b9f3c6..1b5a9602d 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CleansCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CleansCommand.java
@@ -24,89 +24,90 @@ import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.AvroUtils;
-import org.springframework.shell.core.CommandMarker;
-import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
-import org.springframework.shell.core.annotation.CliCommand;
-import org.springframework.shell.core.annotation.CliOption;
-import org.springframework.stereotype.Component;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import org.springframework.shell.core.CommandMarker;
+import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
+import org.springframework.shell.core.annotation.CliCommand;
+import org.springframework.shell.core.annotation.CliOption;
+import org.springframework.stereotype.Component;
 
 @Component
 public class CleansCommand implements CommandMarker {
-    @CliAvailabilityIndicator({"cleans show"})
-    public boolean isShowAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
 
-    @CliAvailabilityIndicator({"cleans refresh"})
-    public boolean isRefreshAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
+  @CliAvailabilityIndicator({"cleans show"})
+  public boolean isShowAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
 
-    @CliAvailabilityIndicator({"clean showpartitions"})
-    public boolean isCommitShowAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
+  @CliAvailabilityIndicator({"cleans refresh"})
+  public boolean isRefreshAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
 
-    @CliCommand(value = "cleans show", help = "Show the cleans")
-    public String showCleans() throws IOException {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
-        List<HoodieInstant> cleans = timeline.getInstants().collect(Collectors.toList());
-        String[][] rows = new String[cleans.size()][];
-        Collections.reverse(cleans);
-        for (int i = 0; i < cleans.size(); i++) {
-            HoodieInstant clean = cleans.get(i);
-            HoodieCleanMetadata cleanMetadata =
-                AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
-            rows[i] = new String[] {clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
-                String.valueOf(cleanMetadata.getTotalFilesDeleted()),
-                String.valueOf(cleanMetadata.getTimeTakenInMillis())};
-        }
-        return HoodiePrintHelper.print(
-            new String[] {"CleanTime", "EarliestCommandRetained", "Total Files Deleted",
-                "Total Time Taken"}, rows);
-    }
+  @CliAvailabilityIndicator({"clean showpartitions"})
+  public boolean isCommitShowAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
 
-    @CliCommand(value = "cleans refresh", help = "Refresh the commits")
-    public String refreshCleans() throws IOException {
-        HoodieTableMetaClient metadata =
-            new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
-        HoodieCLI.setTableMetadata(metadata);
-        return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
+  @CliCommand(value = "cleans show", help = "Show the cleans")
+  public String showCleans() throws IOException {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
+    List<HoodieInstant> cleans = timeline.getInstants().collect(Collectors.toList());
+    String[][] rows = new String[cleans.size()][];
+    Collections.reverse(cleans);
+    for (int i = 0; i < cleans.size(); i++) {
+      HoodieInstant clean = cleans.get(i);
+      HoodieCleanMetadata cleanMetadata =
+          AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get());
+      rows[i] = new String[]{clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(),
+          String.valueOf(cleanMetadata.getTotalFilesDeleted()),
+          String.valueOf(cleanMetadata.getTimeTakenInMillis())};
     }
+    return HoodiePrintHelper.print(
+        new String[]{"CleanTime", "EarliestCommandRetained", "Total Files Deleted",
+            "Total Time Taken"}, rows);
+  }
 
-    @CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
-    public String showCleanPartitions(
-        @CliOption(key = {"clean"}, help = "clean to show")
-        final String commitTime) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
-        HoodieInstant cleanInstant =
-            new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
+  @CliCommand(value = "cleans refresh", help = "Refresh the commits")
+  public String refreshCleans() throws IOException {
+    HoodieTableMetaClient metadata =
+        new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
+    HoodieCLI.setTableMetadata(metadata);
+    return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
+  }
 
-        if (!timeline.containsInstant(cleanInstant)) {
-            return "Clean " + commitTime + " not found in metadata " + timeline;
-        }
-        HoodieCleanMetadata cleanMetadata =
-            AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
-        List<String[]> rows = new ArrayList<>();
-        for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
-            String path = entry.getKey();
-            HoodieCleanPartitionMetadata stats = entry.getValue();
-            String policy = stats.getPolicy();
-            String totalSuccessDeletedFiles = String.valueOf(stats.getSuccessDeleteFiles().size());
-            String totalFailedDeletedFiles = String.valueOf(stats.getFailedDeleteFiles().size());
-            rows.add(new String[] {path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
-        }
-        return HoodiePrintHelper.print(
-            new String[] {"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
-                "Total Failed Deletions"}, rows.toArray(new String[rows.size()][]));
+  @CliCommand(value = "clean showpartitions", help = "Show partition level details of a clean")
+  public String showCleanPartitions(
+      @CliOption(key = {"clean"}, help = "clean to show")
+      final String commitTime) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants();
+    HoodieInstant cleanInstant =
+        new HoodieInstant(false, HoodieTimeline.CLEAN_ACTION, commitTime);
+
+    if (!timeline.containsInstant(cleanInstant)) {
+      return "Clean " + commitTime + " not found in metadata " + timeline;
     }
+    HoodieCleanMetadata cleanMetadata =
+        AvroUtils.deserializeHoodieCleanMetadata(timeline.getInstantDetails(cleanInstant).get());
+    List<String[]> rows = new ArrayList<>();
+    for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata
+        .getPartitionMetadata().entrySet()) {
+      String path = entry.getKey();
+      HoodieCleanPartitionMetadata stats = entry.getValue();
+      String policy = stats.getPolicy();
+      String totalSuccessDeletedFiles = String.valueOf(stats.getSuccessDeleteFiles().size());
+      String totalFailedDeletedFiles = String.valueOf(stats.getFailedDeleteFiles().size());
+      rows.add(new String[]{path, policy, totalSuccessDeletedFiles, totalFailedDeletedFiles});
+    }
+    return HoodiePrintHelper.print(
+        new String[]{"Partition Path", "Cleaning policy", "Total Files Successfully Deleted",
+            "Total Failed Deletions"}, rows.toArray(new String[rows.size()][]));
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CommitsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CommitsCommand.java
index 3caacfa81..c1a9e6dd9 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CommitsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/CommitsCommand.java
@@ -27,7 +27,12 @@ import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.NumericUtils;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 import org.apache.spark.launcher.SparkLauncher;
 import org.springframework.shell.core.CommandMarker;
 import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
@@ -35,228 +40,236 @@ import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
 @Component
 public class CommitsCommand implements CommandMarker {
-    @CliAvailabilityIndicator({"commits show"})
-    public boolean isShowAvailable() {
-        return HoodieCLI.tableMetadata != null;
+
+  @CliAvailabilityIndicator({"commits show"})
+  public boolean isShowAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"commits refresh"})
+  public boolean isRefreshAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"commit rollback"})
+  public boolean isRollbackAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"commit show"})
+  public boolean isCommitShowAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "commits show", help = "Show the commits")
+  public String showCommits(
+      @CliOption(key = {
+          "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
+      final Integer limit) throws IOException {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
+    String[][] rows = new String[commits.size()][];
+    Collections.reverse(commits);
+    for (int i = 0; i < commits.size(); i++) {
+      HoodieInstant commit = commits.get(i);
+      HoodieCommitMetadata commitMetadata =
+          HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
+      rows[i] = new String[]{commit.getTimestamp(),
+          NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
+          String.valueOf(commitMetadata.fetchTotalFilesInsert()),
+          String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
+          String.valueOf(commitMetadata.fetchTotalPartitionsWritten()),
+          String.valueOf(commitMetadata.fetchTotalRecordsWritten()),
+          String.valueOf(commitMetadata.fetchTotalUpdateRecordsWritten()),
+          String.valueOf(commitMetadata.fetchTotalWriteErrors())};
+    }
+    return HoodiePrintHelper.print(
+        new String[]{"CommitTime", "Total Written (B)", "Total Files Added",
+            "Total Files Updated", "Total Partitions Written", "Total Records Written",
+            "Total Update Records Written", "Total Errors"}, rows);
+  }
+
+  @CliCommand(value = "commits refresh", help = "Refresh the commits")
+  public String refreshCommits() throws IOException {
+    HoodieTableMetaClient metadata =
+        new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
+    HoodieCLI.setTableMetadata(metadata);
+    return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
+  }
+
+  @CliCommand(value = "commit rollback", help = "Rollback a commit")
+  public String rollbackCommit(
+      @CliOption(key = {"commit"}, help = "Commit to rollback")
+      final String commitTime,
+      @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
+      final String sparkPropertiesPath) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
+        commitTime);
+
+    if (!timeline.containsInstant(commitInstant)) {
+      return "Commit " + commitTime + " not found in Commits " + timeline;
     }
 
-    @CliAvailabilityIndicator({"commits refresh"})
-    public boolean isRefreshAvailable() {
-        return HoodieCLI.tableMetadata != null;
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
+        commitTime,
+        HoodieCLI.tableMetadata.getBasePath());
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    // Refresh the current
+    refreshCommits();
+    if (exitCode != 0) {
+      return "Commit " + commitTime + " failed to roll back";
     }
+    return "Commit " + commitTime + " rolled back";
+  }
 
-    @CliAvailabilityIndicator({"commit rollback"})
-    public boolean isRollbackAvailable() {
-        return HoodieCLI.tableMetadata != null;
+  @CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
+  public String showCommitPartitions(
+      @CliOption(key = {"commit"}, help = "Commit to show")
+      final String commitTime) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
+        commitTime);
+
+    if (!timeline.containsInstant(commitInstant)) {
+      return "Commit " + commitTime + " not found in Commits " + timeline;
     }
-
-    @CliAvailabilityIndicator({"commit show"})
-    public boolean isCommitShowAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
-
-    @CliCommand(value = "commits show", help = "Show the commits")
-    public String showCommits(
-        @CliOption(key = {
-            "limit"}, mandatory = false, help = "Limit commits", unspecifiedDefaultValue = "10")
-        final Integer limit) throws IOException {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
-        String[][] rows = new String[commits.size()][];
-        Collections.reverse(commits);
-        for (int i = 0; i < commits.size(); i++) {
-            HoodieInstant commit = commits.get(i);
-            HoodieCommitMetadata commitMetadata =
-                HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get());
-            rows[i] = new String[] {commit.getTimestamp(),
-                NumericUtils.humanReadableByteCount(commitMetadata.fetchTotalBytesWritten()),
-                String.valueOf(commitMetadata.fetchTotalFilesInsert()),
-                String.valueOf(commitMetadata.fetchTotalFilesUpdated()),
-                String.valueOf(commitMetadata.fetchTotalPartitionsWritten()),
-                String.valueOf(commitMetadata.fetchTotalRecordsWritten()),
-                String.valueOf(commitMetadata.fetchTotalUpdateRecordsWritten()),
-                String.valueOf(commitMetadata.fetchTotalWriteErrors())};
-        }
-        return HoodiePrintHelper.print(
-            new String[] {"CommitTime", "Total Written (B)", "Total Files Added",
-                "Total Files Updated", "Total Partitions Written", "Total Records Written",
-                "Total Update Records Written", "Total Errors"}, rows);
-    }
-
-    @CliCommand(value = "commits refresh", help = "Refresh the commits")
-    public String refreshCommits() throws IOException {
-        HoodieTableMetaClient metadata =
-            new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
-        HoodieCLI.setTableMetadata(metadata);
-        return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
-    }
-
-    @CliCommand(value = "commit rollback", help = "Rollback a commit")
-    public String rollbackCommit(
-        @CliOption(key = {"commit"}, help = "Commit to rollback")
-        final String commitTime,
-        @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
-        final String sparkPropertiesPath) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-
-        if (!timeline.containsInstant(commitInstant)) {
-            return "Commit " + commitTime + " not found in Commits " + timeline;
-        }
-
-        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(),
-                commitTime,
-                HoodieCLI.tableMetadata.getBasePath());
-        Process process = sparkLauncher.launch();
-        InputStreamConsumer.captureOutput(process);
-        int exitCode = process.waitFor();
-        // Refresh the current
-        refreshCommits();
-        if (exitCode != 0) {
-            return "Commit " + commitTime + " failed to roll back";
-        }
-        return "Commit " + commitTime + " rolled back";
-    }
-
-    @CliCommand(value = "commit showpartitions", help = "Show partition level details of a commit")
-    public String showCommitPartitions(
-        @CliOption(key = {"commit"}, help = "Commit to show")
-        final String commitTime) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-
-        if (!timeline.containsInstant(commitInstant)) {
-            return "Commit " + commitTime + " not found in Commits " + timeline;
-        }
-        HoodieCommitMetadata meta =
-            HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
-        List<String[]> rows = new ArrayList<String[]>();
-        for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
-            .entrySet()) {
-            String path = entry.getKey();
-            List<HoodieWriteStat> stats = entry.getValue();
-            long totalFilesAdded = 0;
-            long totalFilesUpdated = 0;
-            long totalRecordsUpdated = 0;
-            long totalRecordsInserted = 0;
-            long totalBytesWritten = 0;
-            long totalWriteErrors = 0;
-            for (HoodieWriteStat stat : stats) {
-                if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
-                    totalFilesAdded += 1;
-                    totalRecordsInserted += stat.getNumWrites();
-                } else {
-                    totalFilesUpdated += 1;
-                    totalRecordsUpdated += stat.getNumUpdateWrites();
-                }
-                totalBytesWritten += stat.getTotalWriteBytes();
-                totalWriteErrors += stat.getTotalWriteErrors();
-            }
-            rows.add(new String[] {path, String.valueOf(totalFilesAdded),
-                String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
-                String.valueOf(totalRecordsUpdated),
-                NumericUtils.humanReadableByteCount(totalBytesWritten),
-                String.valueOf(totalWriteErrors)});
-
-        }
-        return HoodiePrintHelper.print(
-            new String[] {"Partition Path", "Total Files Added", "Total Files Updated",
-                "Total Records Inserted", "Total Records Updated", "Total Bytes Written",
-                "Total Errors"}, rows.toArray(new String[rows.size()][]));
-    }
-
-    @CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
-    public String showCommitFiles(
-        @CliOption(key = {"commit"}, help = "Commit to show")
-        final String commitTime) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-
-        if (!timeline.containsInstant(commitInstant)) {
-            return "Commit " + commitTime + " not found in Commits " + timeline;
-        }
-        HoodieCommitMetadata meta =
-            HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
-        List<String[]> rows = new ArrayList<String[]>();
-        for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
-            .entrySet()) {
-            String path = entry.getKey();
-            List<HoodieWriteStat> stats = entry.getValue();
-            for (HoodieWriteStat stat : stats) {
-                rows.add(new String[] {path, stat.getFileId(), stat.getPrevCommit(),
-                    String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
-                    String.valueOf(stat.getTotalWriteBytes()),
-                    String.valueOf(stat.getTotalWriteErrors())});
-            }
-        }
-        return HoodiePrintHelper.print(
-            new String[] {"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
-                "Total Records Written", "Total Bytes Written", "Total Errors"},
-            rows.toArray(new String[rows.size()][]));
-    }
-
-    @CliAvailabilityIndicator({"commits compare"})
-    public boolean isCompareCommitsAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
-
-    @CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
-    public String compareCommits(
-        @CliOption(key = {"path"}, help = "Path of the dataset to compare to")
-        final String path) throws Exception {
-        HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.fs, path);
-        HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();;
-        HoodieTableMetaClient source = HoodieCLI.tableMetadata;
-        HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();;
-        String targetLatestCommit =
-            targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
-        String sourceLatestCommit =
-            sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
-
-        if (sourceLatestCommit != null &&
-                HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
-            // source is behind the target
-            List<String> commitsToCatchup =
-                targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
-                    .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
-            return "Source " + source.getTableConfig().getTableName() + " is behind by "
-                + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
+    HoodieCommitMetadata meta =
+        HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
+    List<String[]> rows = new ArrayList<String[]>();
+    for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
+        .entrySet()) {
+      String path = entry.getKey();
+      List<HoodieWriteStat> stats = entry.getValue();
+      long totalFilesAdded = 0;
+      long totalFilesUpdated = 0;
+      long totalRecordsUpdated = 0;
+      long totalRecordsInserted = 0;
+      long totalBytesWritten = 0;
+      long totalWriteErrors = 0;
+      for (HoodieWriteStat stat : stats) {
+        if (stat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT)) {
+          totalFilesAdded += 1;
+          totalRecordsInserted += stat.getNumWrites();
         } else {
-            List<String> commitsToCatchup =
-                sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
-                    .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
-            return "Source " + source.getTableConfig().getTableName() + " is ahead by "
-                + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
+          totalFilesUpdated += 1;
+          totalRecordsUpdated += stat.getNumUpdateWrites();
         }
-    }
+        totalBytesWritten += stat.getTotalWriteBytes();
+        totalWriteErrors += stat.getTotalWriteErrors();
+      }
+      rows.add(new String[]{path, String.valueOf(totalFilesAdded),
+          String.valueOf(totalFilesUpdated), String.valueOf(totalRecordsInserted),
+          String.valueOf(totalRecordsUpdated),
+          NumericUtils.humanReadableByteCount(totalBytesWritten),
+          String.valueOf(totalWriteErrors)});
 
-    @CliAvailabilityIndicator({"commits sync"})
-    public boolean isSyncCommitsAvailable() {
-        return HoodieCLI.tableMetadata != null;
     }
+    return HoodiePrintHelper.print(
+        new String[]{"Partition Path", "Total Files Added", "Total Files Updated",
+            "Total Records Inserted", "Total Records Updated", "Total Bytes Written",
+            "Total Errors"}, rows.toArray(new String[rows.size()][]));
+  }
 
-    @CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
-    public String syncCommits(
-        @CliOption(key = {"path"}, help = "Path of the dataset to compare to")
-        final String path) throws Exception {
-        HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.fs, path);
-        HoodieCLI.state = HoodieCLI.CLIState.SYNC;
-        return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
-            + " and " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
+  @CliCommand(value = "commit showfiles", help = "Show file level details of a commit")
+  public String showCommitFiles(
+      @CliOption(key = {"commit"}, help = "Commit to show")
+      final String commitTime) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
+        commitTime);
+
+    if (!timeline.containsInstant(commitInstant)) {
+      return "Commit " + commitTime + " not found in Commits " + timeline;
     }
+    HoodieCommitMetadata meta =
+        HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get());
+    List<String[]> rows = new ArrayList<String[]>();
+    for (Map.Entry<String, List<HoodieWriteStat>> entry : meta.getPartitionToWriteStats()
+        .entrySet()) {
+      String path = entry.getKey();
+      List<HoodieWriteStat> stats = entry.getValue();
+      for (HoodieWriteStat stat : stats) {
+        rows.add(new String[]{path, stat.getFileId(), stat.getPrevCommit(),
+            String.valueOf(stat.getNumUpdateWrites()), String.valueOf(stat.getNumWrites()),
+            String.valueOf(stat.getTotalWriteBytes()),
+            String.valueOf(stat.getTotalWriteErrors())});
+      }
+    }
+    return HoodiePrintHelper.print(
+        new String[]{"Partition Path", "File ID", "Previous Commit", "Total Records Updated",
+            "Total Records Written", "Total Bytes Written", "Total Errors"},
+        rows.toArray(new String[rows.size()][]));
+  }
+
+  @CliAvailabilityIndicator({"commits compare"})
+  public boolean isCompareCommitsAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset")
+  public String compareCommits(
+      @CliOption(key = {"path"}, help = "Path of the dataset to compare to")
+      final String path) throws Exception {
+    HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.fs, path);
+    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    ;
+    HoodieTableMetaClient source = HoodieCLI.tableMetadata;
+    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants();
+    ;
+    String targetLatestCommit =
+        targetTimeline.getInstants().iterator().hasNext() ? "0"
+            : targetTimeline.lastInstant().get().getTimestamp();
+    String sourceLatestCommit =
+        sourceTimeline.getInstants().iterator().hasNext() ? "0"
+            : sourceTimeline.lastInstant().get().getTimestamp();
+
+    if (sourceLatestCommit != null &&
+        HoodieTimeline
+            .compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
+      // source is behind the target
+      List<String> commitsToCatchup =
+          targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE)
+              .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+      return "Source " + source.getTableConfig().getTableName() + " is behind by "
+          + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
+    } else {
+      List<String> commitsToCatchup =
+          sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE)
+              .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+      return "Source " + source.getTableConfig().getTableName() + " is ahead by "
+          + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup;
+    }
+  }
+
+  @CliAvailabilityIndicator({"commits sync"})
+  public boolean isSyncCommitsAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset")
+  public String syncCommits(
+      @CliOption(key = {"path"}, help = "Path of the dataset to compare to")
+      final String path) throws Exception {
+    HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.fs, path);
+    HoodieCLI.state = HoodieCLI.CLIState.SYNC;
+    return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
+        + " and " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName();
+  }
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/DatasetsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/DatasetsCommand.java
index 9e17da1a4..fc1f22a3a 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/DatasetsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/DatasetsCommand.java
@@ -18,24 +18,24 @@ package com.uber.hoodie.cli.commands;
 
 import com.uber.hoodie.cli.HoodieCLI;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import java.io.IOException;
 import org.springframework.shell.core.CommandMarker;
 import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-
 @Component
 public class DatasetsCommand implements CommandMarker {
-    @CliCommand(value = "connect", help = "Connect to a hoodie dataset")
-    public String connect(
-        @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
-        final String path) throws IOException {
-        boolean initialized = HoodieCLI.initConf();
-        HoodieCLI.initFS(initialized);
-        HoodieCLI.setTableMetadata(new HoodieTableMetaClient(HoodieCLI.fs, path));
-        HoodieCLI.state = HoodieCLI.CLIState.DATASET;
-        return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
-            + " loaded";
-    }
+
+  @CliCommand(value = "connect", help = "Connect to a hoodie dataset")
+  public String connect(
+      @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset")
+      final String path) throws IOException {
+    boolean initialized = HoodieCLI.initConf();
+    HoodieCLI.initFS(initialized);
+    HoodieCLI.setTableMetadata(new HoodieTableMetaClient(HoodieCLI.fs, path));
+    HoodieCLI.state = HoodieCLI.CLIState.DATASET;
+    return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName()
+        + " loaded";
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HDFSParquetImportCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HDFSParquetImportCommand.java
index ca6e1ab05..c9d2f98b5 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HDFSParquetImportCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HDFSParquetImportCommand.java
@@ -33,58 +33,59 @@ import org.springframework.stereotype.Component;
 @Component
 public class HDFSParquetImportCommand implements CommandMarker {
 
-    private static Logger log = LogManager.getLogger(HDFSParquetImportCommand.class);
+  private static Logger log = LogManager.getLogger(HDFSParquetImportCommand.class);
 
-    @CliCommand(value = "hdfsparquetimport", help = "Imports hdfs dataset to a hoodie dataset")
-    public String convert(
-        @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset")
-        final String srcPath,
-        @CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset")
-        final String srcType,
-        @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset")
-        final String targetPath,
-        @CliOption(key = "tableName", mandatory = true, help = "Table name")
-        final String tableName,
-        @CliOption(key = "tableType", mandatory = true, help = "Table type")
-        final String tableType,
-        @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name")
-        final String rowKeyField,
-        @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name")
-        final String partitionPathField,
-        @CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert")
-        final String parallelism,
-        @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file")
-        final String schemaFilePath,
-        @CliOption(key = "format", mandatory = true, help = "Format for the input data")
-        final String format,
-        @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory")
-        final String sparkMemory,
-        @CliOption(key = "retry", mandatory = true, help = "Number of retries")
-        final String retry)
-        throws Exception {
+  @CliCommand(value = "hdfsparquetimport", help = "Imports hdfs dataset to a hoodie dataset")
+  public String convert(
+      @CliOption(key = "srcPath", mandatory = true, help = "Base path for the input dataset")
+      final String srcPath,
+      @CliOption(key = "srcType", mandatory = true, help = "Source type for the input dataset")
+      final String srcType,
+      @CliOption(key = "targetPath", mandatory = true, help = "Base path for the target hoodie dataset")
+      final String targetPath,
+      @CliOption(key = "tableName", mandatory = true, help = "Table name")
+      final String tableName,
+      @CliOption(key = "tableType", mandatory = true, help = "Table type")
+      final String tableType,
+      @CliOption(key = "rowKeyField", mandatory = true, help = "Row key field name")
+      final String rowKeyField,
+      @CliOption(key = "partitionPathField", mandatory = true, help = "Partition path field name")
+      final String partitionPathField,
+      @CliOption(key = {"parallelism"}, mandatory = true, help = "Parallelism for hoodie insert")
+      final String parallelism,
+      @CliOption(key = "schemaFilePath", mandatory = true, help = "Path for Avro schema file")
+      final String schemaFilePath,
+      @CliOption(key = "format", mandatory = true, help = "Format for the input data")
+      final String format,
+      @CliOption(key = "sparkMemory", mandatory = true, help = "Spark executor memory")
+      final String sparkMemory,
+      @CliOption(key = "retry", mandatory = true, help = "Number of retries")
+      final String retry)
+      throws Exception {
 
-        validate(format, srcType);
+    validate(format, srcType);
 
-        boolean initialized = HoodieCLI.initConf();
-        HoodieCLI.initFS(initialized);
-        String sparkPropertiesPath = Utils
-            .getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
-        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    boolean initialized = HoodieCLI.initConf();
+    HoodieCLI.initFS(initialized);
+    String sparkPropertiesPath = Utils
+        .getDefaultPropertiesFile(
+            scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
 
-        sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName,
-            tableType, rowKeyField, partitionPathField, parallelism, schemaFilePath, sparkMemory,
-            retry);
-        Process process = sparkLauncher.launch();
-        InputStreamConsumer.captureOutput(process);
-        int exitCode = process.waitFor();
-        if (exitCode != 0) {
-            return "Failed to import dataset to hoodie format";
-        }
-        return "Dataset imported to hoodie format";
+    sparkLauncher.addAppArgs(SparkCommand.IMPORT.toString(), srcPath, targetPath, tableName,
+        tableType, rowKeyField, partitionPathField, parallelism, schemaFilePath, sparkMemory,
+        retry);
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    if (exitCode != 0) {
+      return "Failed to import dataset to hoodie format";
     }
+    return "Dataset imported to hoodie format";
+  }
 
-    private void validate(String format, String srcType) {
-        (new HDFSParquetImporter.FormatValidator()).validate("format", format);
-        (new HDFSParquetImporter.SourceTypeValidator()).validate("srcType", srcType);
-    }
+  private void validate(String format, String srcType) {
+    (new HDFSParquetImporter.FormatValidator()).validate("format", format);
+    (new HDFSParquetImporter.SourceTypeValidator()).validate("srcType", srcType);
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieSyncCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieSyncCommand.java
index 15fc04a43..b19608bed 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieSyncCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieSyncCommand.java
@@ -16,105 +16,109 @@
 
 package com.uber.hoodie.cli.commands;
 
+import com.uber.hoodie.cli.HoodieCLI;
 import com.uber.hoodie.cli.utils.CommitUtil;
 import com.uber.hoodie.cli.utils.HiveUtil;
-import com.uber.hoodie.cli.HoodieCLI;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.springframework.shell.core.CommandMarker;
 import org.springframework.shell.core.annotation.CliAvailabilityIndicator;
 import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.util.List;
-import java.util.stream.Collectors;
-
 @Component
 public class HoodieSyncCommand implements CommandMarker {
-    @CliAvailabilityIndicator({"sync validate"})
-    public boolean isSyncVerificationAvailable() {
-        return HoodieCLI.tableMetadata != null && HoodieCLI.syncTableMetadata != null;
+
+  @CliAvailabilityIndicator({"sync validate"})
+  public boolean isSyncVerificationAvailable() {
+    return HoodieCLI.tableMetadata != null && HoodieCLI.syncTableMetadata != null;
+  }
+
+  @CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
+  public String validateSync(
+      @CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
+      final String mode,
+      @CliOption(key = {
+          "sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
+      final String srcDb,
+      @CliOption(key = {
+          "targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
+      final String tgtDb,
+      @CliOption(key = {
+          "partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
+      final int partitionCount,
+      @CliOption(key = {
+          "hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
+      final String hiveServerUrl,
+      @CliOption(key = {
+          "hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
+      final String hiveUser,
+      @CliOption(key = {
+          "hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
+      final String hivePass) throws Exception {
+    HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
+    HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsAndCompactionsTimeline();
+    HoodieTableMetaClient source = HoodieCLI.tableMetadata;
+    HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsAndCompactionsTimeline();
+    long sourceCount = 0;
+    long targetCount = 0;
+    if ("complete".equals(mode)) {
+      sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
+      targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
+    } else if ("latestPartitions".equals(mode)) {
+      sourceCount = HiveUtil
+          .countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
+      targetCount = HiveUtil
+          .countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
     }
 
-    @CliCommand(value = "sync validate", help = "Validate the sync by counting the number of records")
-    public String validateSync(
-        @CliOption(key = {"mode"}, unspecifiedDefaultValue = "complete", help = "Check mode")
-        final String mode,
-        @CliOption(key = {
-            "sourceDb"}, unspecifiedDefaultValue = "rawdata", help = "source database")
-        final String srcDb,
-        @CliOption(key = {
-            "targetDb"}, unspecifiedDefaultValue = "dwh_hoodie", help = "target database")
-        final String tgtDb,
-        @CliOption(key = {
-            "partitionCount"}, unspecifiedDefaultValue = "5", help = "total number of recent partitions to validate")
-        final int partitionCount,
-        @CliOption(key = {
-            "hiveServerUrl"}, mandatory = true, help = "hiveServerURL to connect to")
-        final String hiveServerUrl,
-        @CliOption(key = {
-            "hiveUser"}, mandatory = false, unspecifiedDefaultValue = "", help = "hive username to connect to")
-        final String hiveUser,
-        @CliOption(key = {
-            "hivePass"}, mandatory = true, unspecifiedDefaultValue = "", help = "hive password to connect to")
-        final String hivePass) throws Exception {
-        HoodieTableMetaClient target = HoodieCLI.syncTableMetadata;
-        HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsAndCompactionsTimeline();
-        HoodieTableMetaClient source = HoodieCLI.tableMetadata;
-        HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsAndCompactionsTimeline();
-        long sourceCount = 0;
-        long targetCount = 0;
-        if ("complete".equals(mode)) {
-            sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, hiveUser, hivePass);
-            targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, hiveUser, hivePass);
-        } else if ("latestPartitions".equals(mode)) {
-            sourceCount = HiveUtil.countRecords(hiveServerUrl, source, srcDb, partitionCount, hiveUser, hivePass);
-            targetCount = HiveUtil.countRecords(hiveServerUrl, target, tgtDb, partitionCount, hiveUser, hivePass);
-        }
+    String targetLatestCommit =
+        targetTimeline.getInstants().iterator().hasNext() ? "0"
+            : targetTimeline.lastInstant().get().getTimestamp();
+    String sourceLatestCommit =
+        sourceTimeline.getInstants().iterator().hasNext() ? "0"
+            : sourceTimeline.lastInstant().get().getTimestamp();
 
-        String targetLatestCommit =
-            targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp();
-        String sourceLatestCommit =
-            sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp();
+    if (sourceLatestCommit != null && HoodieTimeline
+        .compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
+      // source is behind the target
+      List<HoodieInstant> commitsToCatchup =
+          targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants()
+              .collect(Collectors.toList());
+      if (commitsToCatchup.isEmpty()) {
+        return "Count difference now is (count(" + target.getTableConfig().getTableName()
+            + ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
+            - sourceCount);
+      } else {
+        long newInserts = CommitUtil.countNewRecords(target,
+            commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
+                .collect(Collectors.toList()));
+        return "Count difference now is (count(" + target.getTableConfig().getTableName()
+            + ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
+            - sourceCount) + ". Catch up count is " + newInserts;
+      }
+    } else {
+      List<HoodieInstant> commitsToCatchup =
+          sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants()
+              .collect(Collectors.toList());
+      if (commitsToCatchup.isEmpty()) {
+        return "Count difference now is (count(" + source.getTableConfig().getTableName()
+            + ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
+            - targetCount);
+      } else {
+        long newInserts = CommitUtil.countNewRecords(source,
+            commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
+                .collect(Collectors.toList()));
+        return "Count difference now is (count(" + source.getTableConfig().getTableName()
+            + ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
+            - targetCount) + ". Catch up count is " + newInserts;
+      }
 
-        if (sourceLatestCommit != null && HoodieTimeline
-            .compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) {
-            // source is behind the target
-            List<HoodieInstant> commitsToCatchup =
-                targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE).getInstants()
-                    .collect(Collectors.toList());
-            if (commitsToCatchup.isEmpty()) {
-                return "Count difference now is (count(" + target.getTableConfig().getTableName()
-                    + ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
-                    - sourceCount);
-            } else {
-                long newInserts = CommitUtil.countNewRecords(target,
-                    commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
-                        .collect(Collectors.toList()));
-                return "Count difference now is (count(" + target.getTableConfig().getTableName()
-                    + ") - count(" + source.getTableConfig().getTableName() + ") == " + (targetCount
-                    - sourceCount) + ". Catch up count is " + newInserts;
-            }
-        } else {
-            List<HoodieInstant> commitsToCatchup =
-                sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE).getInstants()
-                    .collect(Collectors.toList());
-            if (commitsToCatchup.isEmpty()) {
-                return "Count difference now is (count(" + source.getTableConfig().getTableName()
-                    + ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
-                    - targetCount);
-            } else {
-                long newInserts = CommitUtil.countNewRecords(source,
-                    commitsToCatchup.stream().map(HoodieInstant::getTimestamp)
-                        .collect(Collectors.toList()));
-                return "Count difference now is (count(" + source.getTableConfig().getTableName()
-                    + ") - count(" + target.getTableConfig().getTableName() + ") == " + (sourceCount
-                    - targetCount) + ". Catch up count is " + newInserts;
-            }
-
-        }
     }
+  }
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/RepairsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/RepairsCommand.java
index e7998d9d8..1db6075e1 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/RepairsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/RepairsCommand.java
@@ -22,7 +22,8 @@ import com.uber.hoodie.cli.utils.InputStreamConsumer;
 import com.uber.hoodie.cli.utils.SparkUtil;
 import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.util.FSUtils;
-
+import java.io.IOException;
+import java.util.List;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.launcher.SparkLauncher;
 import org.springframework.shell.core.CommandMarker;
@@ -31,80 +32,80 @@ import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-import java.util.List;
-
 @Component
 public class RepairsCommand implements CommandMarker {
 
-    @CliAvailabilityIndicator({"repair deduplicate"})
-    public boolean isRepairDeduplicateAvailable() {
-        return HoodieCLI.tableMetadata != null;
+  @CliAvailabilityIndicator({"repair deduplicate"})
+  public boolean isRepairDeduplicateAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"repair addpartitionmeta"})
+  public boolean isRepairAddPartitionMetaAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
+  public String deduplicate(
+      @CliOption(key = {
+          "duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true)
+      final String duplicatedPartitionPath,
+      @CliOption(key = {
+          "repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true)
+      final String repairedOutputPath,
+      @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path", mandatory = true)
+      final String sparkPropertiesPath) throws Exception {
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher
+        .addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
+            repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+
+    if (exitCode != 0) {
+      return "Deduplicated files placed in:  " + repairedOutputPath;
     }
+    return "Deduplication failed ";
+  }
 
-    @CliAvailabilityIndicator({"repair addpartitionmeta"})
-    public boolean isRepairAddPartitionMetaAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
 
-    @CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce repaired files to replace with")
-    public String deduplicate(
-            @CliOption(key = {
-                    "duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true)
-            final String duplicatedPartitionPath,
-            @CliOption(key = {"repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true)
-            final String repairedOutputPath,
-            @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path", mandatory = true)
-            final String sparkPropertiesPath) throws Exception {
-        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher
-                .addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath,
-                        repairedOutputPath, HoodieCLI.tableMetadata.getBasePath());
-        Process process = sparkLauncher.launch();
-        InputStreamConsumer.captureOutput(process);
-        int exitCode = process.waitFor();
+  @CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
+  public String addPartitionMeta(
+      @CliOption(key = {"dryrun"},
+          help = "Should we actually add or just print what would be done",
+          unspecifiedDefaultValue = "true")
+      final boolean dryRun) throws IOException {
 
-        if (exitCode != 0) {
-            return "Deduplicated files placed in:  " + repairedOutputPath;
+    String latestCommit = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline()
+        .lastInstant().get().getTimestamp();
+    List<String> partitionPaths = FSUtils.getAllFoldersThreeLevelsDown(HoodieCLI.fs,
+        HoodieCLI.tableMetadata.getBasePath());
+    Path basePath = new Path(HoodieCLI.tableMetadata.getBasePath());
+    String[][] rows = new String[partitionPaths.size() + 1][];
+
+    int ind = 0;
+    for (String partition : partitionPaths) {
+      Path partitionPath = new Path(basePath, partition);
+      String[] row = new String[3];
+      row[0] = partition;
+      row[1] = "Yes";
+      row[2] = "None";
+      if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
+        row[1] = "No";
+        if (!dryRun) {
+          HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(
+              HoodieCLI.fs,
+              latestCommit,
+              basePath,
+              partitionPath);
+          partitionMetadata.trySave(0);
         }
-        return "Deduplication failed ";
+      }
+      rows[ind++] = row;
     }
 
-
-
-    @CliCommand(value = "repair addpartitionmeta", help = "Add partition metadata to a dataset, if not present")
-    public String addPartitionMeta(
-            @CliOption(key = {"dryrun"},
-                    help = "Should we actually add or just print what would be done",
-                    unspecifiedDefaultValue = "true")
-            final boolean dryRun) throws IOException {
-
-        String latestCommit  = HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get().getTimestamp();
-        List<String> partitionPaths = FSUtils.getAllFoldersThreeLevelsDown(HoodieCLI.fs,
-                HoodieCLI.tableMetadata.getBasePath());
-        Path basePath = new Path(HoodieCLI.tableMetadata.getBasePath());
-        String[][] rows = new String[partitionPaths.size() + 1][];
-
-        int ind = 0;
-        for (String partition: partitionPaths) {
-            Path partitionPath = new Path(basePath, partition);
-            String[] row = new String[3];
-            row[0] = partition; row[1] = "Yes"; row[2] = "None";
-            if (!HoodiePartitionMetadata.hasPartitionMetadata(HoodieCLI.fs, partitionPath)) {
-                row[1] = "No";
-                if (!dryRun) {
-                    HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(
-                            HoodieCLI.fs,
-                            latestCommit,
-                            basePath,
-                            partitionPath);
-                    partitionMetadata.trySave(0);
-                }
-            }
-            rows[ind++] = row;
-        }
-
-        return HoodiePrintHelper.print(
-                new String[] {"Partition Path", "Metadata Present?", "Action"}, rows);
-    }
+    return HoodiePrintHelper.print(
+        new String[]{"Partition Path", "Metadata Present?", "Action"}, rows);
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SavepointsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SavepointsCommand.java
index 350c9d81e..4f5b2c9a3 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SavepointsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SavepointsCommand.java
@@ -27,6 +27,10 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieIndexConfig;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.index.HoodieIndex;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.launcher.SparkLauncher;
 import org.springframework.shell.core.CommandMarker;
@@ -35,122 +39,118 @@ import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-
 @Component
 public class SavepointsCommand implements CommandMarker {
-    @CliAvailabilityIndicator({"savepoints show"})
-    public boolean isShowAvailable() {
-        return HoodieCLI.tableMetadata != null;
+
+  @CliAvailabilityIndicator({"savepoints show"})
+  public boolean isShowAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"savepoints refresh"})
+  public boolean isRefreshAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+
+  @CliAvailabilityIndicator({"savepoint create"})
+  public boolean isCreateSavepointAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliAvailabilityIndicator({"savepoint rollback"})
+  public boolean isRollbackToSavepointAvailable() {
+    return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline()
+        .getSavePointTimeline().filterCompletedInstants().empty();
+  }
+
+  @CliCommand(value = "savepoints show", help = "Show the savepoints")
+  public String showSavepoints() throws IOException {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants();
+    List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
+    String[][] rows = new String[commits.size()][];
+    Collections.reverse(commits);
+    for (int i = 0; i < commits.size(); i++) {
+      HoodieInstant commit = commits.get(i);
+      rows[i] = new String[]{commit.getTimestamp()};
+    }
+    return HoodiePrintHelper.print(new String[]{"SavepointTime"}, rows);
+  }
+
+  @CliCommand(value = "savepoint create", help = "Savepoint a commit")
+  public String savepoint(
+      @CliOption(key = {"commit"}, help = "Commit to savepoint")
+      final String commitTime,
+      @CliOption(key = {"user"}, help = "User who is creating the savepoint")
+      final String user,
+      @CliOption(key = {"comments"}, help = "Comments for creating the savepoint")
+      final String comments) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
+    HoodieInstant
+        commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
+
+    if (!timeline.containsInstant(commitInstant)) {
+      return "Commit " + commitTime + " not found in Commits " + timeline;
     }
 
-    @CliAvailabilityIndicator({"savepoints refresh"})
-    public boolean isRefreshAvailable() {
-        return HoodieCLI.tableMetadata != null;
+    HoodieWriteClient client = createHoodieClient(null, HoodieCLI.tableMetadata.getBasePath());
+    if (client.savepoint(commitTime, user, comments)) {
+      // Refresh the current
+      refreshMetaClient();
+      return String.format("The commit \"%s\" has been savepointed.", commitTime);
+    }
+    return String.format("Failed: Could not savepoint commit \"%s\".", commitTime);
+  }
+
+  @CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
+  public String rollbackToSavepoint(
+      @CliOption(key = {"savepoint"}, help = "Savepoint to rollback")
+      final String commitTime,
+      @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
+      final String sparkPropertiesPath) throws Exception {
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
+    HoodieInstant
+        commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
+
+    if (!timeline.containsInstant(commitInstant)) {
+      return "Commit " + commitTime + " not found in Commits " + timeline;
     }
 
-
-    @CliAvailabilityIndicator({"savepoint create"})
-    public boolean isCreateSavepointAvailable() {
-        return HoodieCLI.tableMetadata != null;
-    }
-
-    @CliAvailabilityIndicator({"savepoint rollback"})
-    public boolean isRollbackToSavepointAvailable() {
-        return HoodieCLI.tableMetadata != null && !HoodieCLI.tableMetadata.getActiveTimeline().getSavePointTimeline().filterCompletedInstants().empty();
-    }
-
-    @CliCommand(value = "savepoints show", help = "Show the savepoints")
-    public String showSavepoints() throws IOException {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants();
-        List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList());
-        String[][] rows = new String[commits.size()][];
-        Collections.reverse(commits);
-        for (int i = 0; i < commits.size(); i++) {
-            HoodieInstant commit = commits.get(i);
-            rows[i] = new String[] {commit.getTimestamp()};
-        }
-        return HoodiePrintHelper.print(new String[] {"SavepointTime"}, rows);
-    }
-
-    @CliCommand(value = "savepoint create", help = "Savepoint a commit")
-    public String savepoint(
-        @CliOption(key = {"commit"}, help = "Commit to savepoint")
-        final String commitTime,
-        @CliOption(key = {"user"}, help = "User who is creating the savepoint")
-        final String user,
-        @CliOption(key = {"comments"}, help = "Comments for creating the savepoint")
-        final String comments) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
-        HoodieInstant
-            commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-
-        if (!timeline.containsInstant(commitInstant)) {
-            return "Commit " + commitTime + " not found in Commits " + timeline;
-        }
-
-        HoodieWriteClient client = createHoodieClient(null, HoodieCLI.tableMetadata.getBasePath());
-        if (client.savepoint(commitTime, user, comments)) {
-            // Refresh the current
-            refreshMetaClient();
-            return String.format("The commit \"%s\" has been savepointed.", commitTime);
-        }
-        return String.format("Failed: Could not savepoint commit \"%s\".", commitTime);
-    }
-
-    @CliCommand(value = "savepoint rollback", help = "Savepoint a commit")
-    public String rollbackToSavepoint(
-        @CliOption(key = {"savepoint"}, help = "Savepoint to rollback")
-        final String commitTime,
-        @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path")
-        final String sparkPropertiesPath) throws Exception {
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
-        HoodieInstant
-            commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-
-        if (!timeline.containsInstant(commitInstant)) {
-            return "Commit " + commitTime + " not found in Commits " + timeline;
-        }
-
-        SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
-        sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(),
-            commitTime,
-            HoodieCLI.tableMetadata.getBasePath());
-        Process process = sparkLauncher.launch();
-        InputStreamConsumer.captureOutput(process);
-        int exitCode = process.waitFor();
-        // Refresh the current
-        refreshMetaClient();
-        if (exitCode != 0) {
-            return "Savepoint " + commitTime + " failed to roll back";
-        }
-        return "Savepoint " + commitTime + " rolled back";
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK_TO_SAVEPOINT.toString(),
+        commitTime,
+        HoodieCLI.tableMetadata.getBasePath());
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    // Refresh the current
+    refreshMetaClient();
+    if (exitCode != 0) {
+      return "Savepoint " + commitTime + " failed to roll back";
     }
+    return "Savepoint " + commitTime + " rolled back";
+  }
 
 
-    @CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
-    public String refreshMetaClient() throws IOException {
-        HoodieTableMetaClient metadata =
-            new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
-        HoodieCLI.setTableMetadata(metadata);
-        return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
-    }
-
-    private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
-        throws Exception {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-            .build();
-        return new HoodieWriteClient(jsc, config, false);
-    }
+  @CliCommand(value = "savepoints refresh", help = "Refresh the savepoints")
+  public String refreshMetaClient() throws IOException {
+    HoodieTableMetaClient metadata =
+        new HoodieTableMetaClient(HoodieCLI.fs, HoodieCLI.tableMetadata.getBasePath());
+    HoodieCLI.setTableMetadata(metadata);
+    return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed.";
+  }
 
+  private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
+      throws Exception {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .build();
+    return new HoodieWriteClient(jsc, config, false);
+  }
 
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SparkMain.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SparkMain.java
index 1564b87b2..aba2d9da8 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SparkMain.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/SparkMain.java
@@ -30,109 +30,110 @@ import org.apache.spark.sql.SQLContext;
 
 public class SparkMain {
 
-    protected final static Logger LOG = Logger.getLogger(SparkMain.class);
+  protected final static Logger LOG = Logger.getLogger(SparkMain.class);
 
 
-    /**
-     * Commands
-     */
-    enum SparkCommand {
-        ROLLBACK,
-        DEDUPLICATE,
-        ROLLBACK_TO_SAVEPOINT,
-        SAVEPOINT,
-        IMPORT
+  /**
+   * Commands
+   */
+  enum SparkCommand {
+    ROLLBACK,
+    DEDUPLICATE,
+    ROLLBACK_TO_SAVEPOINT,
+    SAVEPOINT,
+    IMPORT
+  }
+
+  public static void main(String[] args) throws Exception {
+    String command = args[0];
+    LOG.info("Invoking SparkMain:" + command);
+
+    SparkCommand cmd = SparkCommand.valueOf(command);
+
+    JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
+    int returnCode = 0;
+    switch (cmd) {
+      case ROLLBACK:
+        assert (args.length == 3);
+        returnCode = rollback(jsc, args[1], args[2]);
+        break;
+      case DEDUPLICATE:
+        assert (args.length == 4);
+        returnCode = deduplicatePartitionPath(jsc, args[1], args[2], args[3]);
+        break;
+      case ROLLBACK_TO_SAVEPOINT:
+        assert (args.length == 3);
+        returnCode = rollbackToSavepoint(jsc, args[1], args[2]);
+        break;
+      case IMPORT:
+        assert (args.length == 11);
+        returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6],
+            Integer.parseInt(args[7]), args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9],
+            Integer.parseInt(args[10]));
+        break;
     }
 
-    public static void main(String[] args) throws Exception {
-        String command = args[0];
-        LOG.info("Invoking SparkMain:" + command);
+    System.exit(returnCode);
+  }
 
-        SparkCommand cmd = SparkCommand.valueOf(command);
+  private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath,
+      String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
+      String schemaFile, String sparkMaster, String sparkMemory, int retry) throws Exception {
+    HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
+    cfg.srcPath = srcPath;
+    cfg.targetPath = targetPath;
+    cfg.tableName = tableName;
+    cfg.tableType = tableType;
+    cfg.rowKey = rowKey;
+    cfg.partitionKey = partitionKey;
+    cfg.parallelism = parallelism;
+    cfg.schemaFile = schemaFile;
+    jsc.getConf().set("spark.executor.memory", sparkMemory);
+    return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
+  }
 
-        JavaSparkContext jsc = SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
-        int returnCode = 0;
-        switch(cmd) {
-            case ROLLBACK:
-                assert (args.length == 3);
-                returnCode = rollback(jsc, args[1], args[2]);
-                break;
-            case DEDUPLICATE:
-                assert (args.length == 4);
-                returnCode = deduplicatePartitionPath(jsc, args[1], args[2], args[3]);
-                break;
-            case ROLLBACK_TO_SAVEPOINT:
-                assert (args.length == 3);
-                returnCode = rollbackToSavepoint(jsc, args[1], args[2]);
-                break;
-            case IMPORT:
-                assert (args.length == 11);
-                returnCode = dataImport(jsc, args[1], args[2], args[3], args[4], args[5], args[6],
-                    Integer.parseInt(args[7]), args[8], SparkUtil.DEFUALT_SPARK_MASTER, args[9],
-                    Integer.parseInt(args[10]));
-                break;
-        }
+  private static int deduplicatePartitionPath(JavaSparkContext jsc,
+      String duplicatedPartitionPath,
+      String repairedOutputPath,
+      String basePath)
+      throws Exception {
+    DedupeSparkJob job = new DedupeSparkJob(basePath,
+        duplicatedPartitionPath, repairedOutputPath, new SQLContext(jsc), FSUtils.getFs());
+    job.fixDuplicates(true);
+    return 0;
+  }
 
-        System.exit(returnCode);
+  private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
+      throws Exception {
+    HoodieWriteClient client = createHoodieClient(jsc, basePath);
+    if (client.rollback(commitTime)) {
+      LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
+      return 0;
+    } else {
+      LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
+      return -1;
     }
+  }
 
-    private static int dataImport(JavaSparkContext jsc, String srcPath, String targetPath,
-        String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
-        String schemaFile, String sparkMaster, String sparkMemory, int retry) throws Exception {
-        HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
-        cfg.srcPath = srcPath;
-        cfg.targetPath = targetPath;
-        cfg.tableName = tableName;
-        cfg.tableType = tableType;
-        cfg.rowKey = rowKey;
-        cfg.partitionKey = partitionKey;
-        cfg.parallelism = parallelism;
-        cfg.schemaFile = schemaFile;
-        jsc.getConf().set("spark.executor.memory", sparkMemory);
-        return new HDFSParquetImporter(cfg).dataImport(jsc, retry);
+  private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime,
+      String basePath)
+      throws Exception {
+    HoodieWriteClient client = createHoodieClient(jsc, basePath);
+    if (client.rollbackToSavepoint(savepointTime)) {
+      LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
+      return 0;
+    } else {
+      LOG.info(String.format("The commit \"%s\" failed to roll back.", savepointTime));
+      return -1;
     }
+  }
 
-    private static int deduplicatePartitionPath(JavaSparkContext jsc,
-                                                String duplicatedPartitionPath,
-                                                String repairedOutputPath,
-                                                String basePath)
-        throws Exception {
-        DedupeSparkJob job = new DedupeSparkJob(basePath,
-                duplicatedPartitionPath,repairedOutputPath,new SQLContext(jsc), FSUtils.getFs());
-        job.fixDuplicates(true);
-        return 0;
-    }
-
-    private static int rollback(JavaSparkContext jsc, String commitTime, String basePath)
-        throws Exception {
-        HoodieWriteClient client = createHoodieClient(jsc, basePath);
-        if (client.rollback(commitTime)) {
-            LOG.info(String.format("The commit \"%s\" rolled back.", commitTime));
-            return 0;
-        } else {
-            LOG.info(String.format("The commit \"%s\" failed to roll back.", commitTime));
-            return -1;
-        }
-    }
-
-    private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath)
-        throws Exception {
-        HoodieWriteClient client = createHoodieClient(jsc, basePath);
-        if (client.rollbackToSavepoint(savepointTime)) {
-            LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
-            return 0;
-        } else {
-            LOG.info(String.format("The commit \"%s\" failed to roll back.", savepointTime));
-            return -1;
-        }
-    }
-
-    private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
-        throws Exception {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-            .build();
-        return new HoodieWriteClient(jsc, config);
-    }
+  private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath)
+      throws Exception {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .build();
+    return new HoodieWriteClient(jsc, config);
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/StatsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/StatsCommand.java
index cef92ae67..cb61eef0b 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/StatsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/StatsCommand.java
@@ -28,7 +28,10 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.NumericUtils;
-
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.HashMap;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -38,106 +41,105 @@ import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
 import org.springframework.stereotype.Component;
 
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.HashMap;
-import java.util.stream.Collectors;
-
 @Component
 public class StatsCommand implements CommandMarker {
-    @CliAvailabilityIndicator({"stats wa"})
-    public boolean isWriteAmpAvailable() {
-        return HoodieCLI.tableMetadata != null;
+
+  @CliAvailabilityIndicator({"stats wa"})
+  public boolean isWriteAmpAvailable() {
+    return HoodieCLI.tableMetadata != null;
+  }
+
+  @CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
+  public String writeAmplificationStats() throws IOException {
+    long totalRecordsUpserted = 0;
+    long totalRecordsWritten = 0;
+
+    HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
+    HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
+
+    String[][] rows = new String[new Long(timeline.countInstants()).intValue() + 1][];
+    int i = 0;
+    DecimalFormat df = new DecimalFormat("#.00");
+    for (HoodieInstant commitTime : timeline.getInstants().collect(
+        Collectors.toList())) {
+      String waf = "0";
+      HoodieCommitMetadata commit = HoodieCommitMetadata
+          .fromBytes(activeTimeline.getInstantDetails(commitTime).get());
+      if (commit.fetchTotalUpdateRecordsWritten() > 0) {
+        waf = df.format(
+            (float) commit.fetchTotalRecordsWritten() / commit
+                .fetchTotalUpdateRecordsWritten());
+      }
+      rows[i++] = new String[]{commitTime.getTimestamp(),
+          String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
+          String.valueOf(commit.fetchTotalRecordsWritten()), waf};
+      totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
+      totalRecordsWritten += commit.fetchTotalRecordsWritten();
+    }
+    String waf = "0";
+    if (totalRecordsUpserted > 0) {
+      waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
+    }
+    rows[i] = new String[]{"Total", String.valueOf(totalRecordsUpserted),
+        String.valueOf(totalRecordsWritten), waf};
+    return HoodiePrintHelper.print(
+        new String[]{"CommitTime", "Total Upserted", "Total Written",
+            "Write Amplifiation Factor"}, rows);
+
+  }
+
+
+  private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
+    return new String[]{
+        commitTime,
+        NumericUtils.humanReadableByteCount(s.getMin()),
+        NumericUtils.humanReadableByteCount(s.getValue(0.1)),
+        NumericUtils.humanReadableByteCount(s.getMedian()),
+        NumericUtils.humanReadableByteCount(s.getMean()),
+        NumericUtils.humanReadableByteCount(s.get95thPercentile()),
+        NumericUtils.humanReadableByteCount(s.getMax()),
+        String.valueOf(s.size()),
+        NumericUtils.humanReadableByteCount(s.getStdDev())
+    };
+  }
+
+  @CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
+  public String fileSizeStats(
+      @CliOption(key = {
+          "partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
+      final String globRegex) throws IOException {
+
+    FileSystem fs = HoodieCLI.fs;
+    String globPath = String.format("%s/%s/*",
+        HoodieCLI.tableMetadata.getBasePath(),
+        globRegex);
+    FileStatus[] statuses = fs.globStatus(new Path(globPath));
+
+    // max, min, #small files < 10MB, 50th, avg, 95th
+    final int MAX_FILES = 1000000;
+    Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
+    HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
+    for (FileStatus fileStatus : statuses) {
+      String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
+      long sz = fileStatus.getLen();
+      if (!commitHistoMap.containsKey(commitTime)) {
+        commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
+      }
+      commitHistoMap.get(commitTime).update(sz);
+      globalHistogram.update(sz);
     }
 
-    @CliCommand(value = "stats wa", help = "Write Amplification. Ratio of how many records were upserted to how many records were actually written")
-    public String writeAmplificationStats() throws IOException {
-        long totalRecordsUpserted = 0;
-        long totalRecordsWritten = 0;
-
-        HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline();
-        HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants();
-
-        String[][] rows = new String[new Long(timeline.countInstants()).intValue() + 1][];
-        int i = 0;
-        DecimalFormat df = new DecimalFormat("#.00");
-        for (HoodieInstant commitTime : timeline.getInstants().collect(
-            Collectors.toList())) {
-            String waf = "0";
-            HoodieCommitMetadata commit = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitTime).get());
-            if (commit.fetchTotalUpdateRecordsWritten() > 0) {
-                waf = df.format(
-                    (float) commit.fetchTotalRecordsWritten() / commit
-                        .fetchTotalUpdateRecordsWritten());
-            }
-            rows[i++] = new String[] {commitTime.getTimestamp(),
-                String.valueOf(commit.fetchTotalUpdateRecordsWritten()),
-                String.valueOf(commit.fetchTotalRecordsWritten()), waf};
-            totalRecordsUpserted += commit.fetchTotalUpdateRecordsWritten();
-            totalRecordsWritten += commit.fetchTotalRecordsWritten();
-        }
-        String waf = "0";
-        if (totalRecordsUpserted > 0) {
-            waf = df.format((float) totalRecordsWritten / totalRecordsUpserted);
-        }
-        rows[i] = new String[] {"Total", String.valueOf(totalRecordsUpserted),
-            String.valueOf(totalRecordsWritten), waf};
-        return HoodiePrintHelper.print(
-            new String[] {"CommitTime", "Total Upserted", "Total Written",
-                "Write Amplifiation Factor"}, rows);
-
+    String[][] rows = new String[commitHistoMap.size() + 1][];
+    int ind = 0;
+    for (String commitTime : commitHistoMap.keySet()) {
+      Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
+      rows[ind++] = printFileSizeHistogram(commitTime, s);
     }
+    Snapshot s = globalHistogram.getSnapshot();
+    rows[ind++] = printFileSizeHistogram("ALL", s);
 
-
-    private String[] printFileSizeHistogram(String commitTime, Snapshot s) {
-        return new String[]{
-                commitTime,
-                NumericUtils.humanReadableByteCount(s.getMin()),
-                NumericUtils.humanReadableByteCount(s.getValue(0.1)),
-                NumericUtils.humanReadableByteCount(s.getMedian()),
-                NumericUtils.humanReadableByteCount(s.getMean()),
-                NumericUtils.humanReadableByteCount(s.get95thPercentile()),
-                NumericUtils.humanReadableByteCount(s.getMax()),
-                String.valueOf(s.size()),
-                NumericUtils.humanReadableByteCount(s.getStdDev())
-        };
-    }
-
-    @CliCommand(value = "stats filesizes", help = "File Sizes. Display summary stats on sizes of files")
-    public String fileSizeStats(
-            @CliOption(key = {"partitionPath"}, help = "regex to select files, eg: 2016/08/02", unspecifiedDefaultValue = "*/*/*")
-            final String globRegex) throws IOException {
-
-        FileSystem fs = HoodieCLI.fs;
-        String globPath = String.format("%s/%s/*",
-                HoodieCLI.tableMetadata.getBasePath(),
-                globRegex);
-        FileStatus[] statuses = fs.globStatus(new Path(globPath));
-
-        // max, min, #small files < 10MB, 50th, avg, 95th
-        final int MAX_FILES = 1000000;
-        Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
-        HashMap<String, Histogram> commitHistoMap = new HashMap<String, Histogram>();
-        for (FileStatus fileStatus: statuses) {
-            String commitTime = FSUtils.getCommitTime(fileStatus.getPath().getName());
-            long sz = fileStatus.getLen();
-            if (!commitHistoMap.containsKey(commitTime)) {
-             commitHistoMap.put(commitTime, new Histogram(new UniformReservoir(MAX_FILES)));
-            }
-            commitHistoMap.get(commitTime).update(sz);
-            globalHistogram.update(sz);
-        }
-
-        String[][] rows = new String[commitHistoMap.size() + 1][];
-        int ind = 0;
-        for (String commitTime: commitHistoMap.keySet()) {
-            Snapshot s = commitHistoMap.get(commitTime).getSnapshot();
-            rows[ind++] = printFileSizeHistogram(commitTime, s);
-        }
-        Snapshot s = globalHistogram.getSnapshot();
-        rows[ind++] = printFileSizeHistogram("ALL", s);
-
-        return HoodiePrintHelper.print(
-                new String[] {"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles", "StdDev"}, rows);
-    }
+    return HoodiePrintHelper.print(
+        new String[]{"CommitTime", "Min", "10th", "50th", "avg", "95th", "Max", "NumFiles",
+            "StdDev"}, rows);
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/UtilsCommand.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/UtilsCommand.java
index b5abb6a6e..3733a8c0c 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/UtilsCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/UtilsCommand.java
@@ -23,12 +23,13 @@ import org.springframework.stereotype.Component;
 
 @Component
 public class UtilsCommand implements CommandMarker {
-    @CliCommand(value = "utils loadClass", help = "Load a class" )
-    public String loadClass(
-        @CliOption(key = {"class"}, help = "Check mode" ) final String clazz
-    ) throws Exception {
-        Class klass = Class.forName(clazz);
-        return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
-    }
+
+  @CliCommand(value = "utils loadClass", help = "Load a class")
+  public String loadClass(
+      @CliOption(key = {"class"}, help = "Check mode") final String clazz
+  ) throws Exception {
+    Class klass = Class.forName(clazz);
+    return klass.getProtectionDomain().getCodeSource().getLocation().toExternalForm();
+  }
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/CommitUtil.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/CommitUtil.java
index 8f5aabf4f..71ed5aca6 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/CommitUtil.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/CommitUtil.java
@@ -20,21 +20,22 @@ import com.uber.hoodie.common.model.HoodieCommitMetadata;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
-
 import java.io.IOException;
 import java.util.List;
 
 public class CommitUtil {
-    public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
-        throws IOException {
-        long totalNew = 0;
-        HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants();
-        for(String commit:commitsToCatchup) {
-            HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline
-                .getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit))
-                .get());
-            totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
-        }
-        return totalNew;
+
+  public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup)
+      throws IOException {
+    long totalNew = 0;
+    HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline()
+        .filterCompletedInstants();
+    for (String commit : commitsToCatchup) {
+      HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes(timeline
+          .getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit))
+          .get());
+      totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten();
     }
+    return totalNew;
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java
index 1d4b00349..52ec668c2 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/HiveUtil.java
@@ -17,107 +17,112 @@
 package com.uber.hoodie.cli.utils;
 
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import org.apache.commons.dbcp.BasicDataSource;
-import org.joda.time.DateTime;
-
-import javax.sql.DataSource;
 import java.sql.Connection;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
+import javax.sql.DataSource;
+import org.apache.commons.dbcp.BasicDataSource;
+import org.joda.time.DateTime;
 
 public class HiveUtil {
-    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
 
-    static {
-        try {
-            Class.forName(driverName);
-        } catch (ClassNotFoundException e) {
-            throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
-        }
+  private static String driverName = "org.apache.hive.jdbc.HiveDriver";
+
+  static {
+    try {
+      Class.forName(driverName);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
     }
+  }
 
-    private static Connection connection;
+  private static Connection connection;
 
-    private static Connection getConnection(String jdbcUrl, String user, String pass) throws SQLException {
-        DataSource ds = getDatasource(jdbcUrl, user, pass);
-        return ds.getConnection();
+  private static Connection getConnection(String jdbcUrl, String user, String pass)
+      throws SQLException {
+    DataSource ds = getDatasource(jdbcUrl, user, pass);
+    return ds.getConnection();
+  }
+
+  private static DataSource getDatasource(String jdbcUrl, String user, String pass) {
+    BasicDataSource ds = new BasicDataSource();
+    ds.setDriverClassName(driverName);
+    ds.setUrl(jdbcUrl);
+    ds.setUsername(user);
+    ds.setPassword(pass);
+    return ds;
+  }
+
+  public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName,
+      String user, String pass) throws SQLException {
+    Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
+    ResultSet rs = null;
+    Statement stmt = conn.createStatement();
+    try {
+      //stmt.execute("set mapred.job.queue.name=<queue_name>");
+      stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
+      stmt.execute("set hive.stats.autogather=false");
+      rs = stmt.executeQuery(
+          "select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source
+              .getTableConfig()
+              .getTableName());
+      long count = -1;
+      if (rs.next()) {
+        count = rs.getLong("cnt");
+      }
+      System.out
+          .println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
+      return count;
+    } finally {
+      if (rs != null) {
+        rs.close();
+      }
+      if (stmt != null) {
+        stmt.close();
+      }
     }
+  }
 
-    private static DataSource getDatasource(String jdbcUrl, String user, String pass) {
-        BasicDataSource ds = new BasicDataSource();
-        ds.setDriverClassName(driverName);
-        ds.setUrl(jdbcUrl);
-        ds.setUsername(user);
-        ds.setPassword(pass);
-        return ds;
-    }
+  public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
+      int partitions, String user, String pass) throws SQLException {
+    DateTime dateTime = DateTime.now();
+    String endDateStr =
+        dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
+            String.format("%02d", dateTime.getDayOfMonth());
+    dateTime = dateTime.minusDays(partitions);
+    String startDateStr =
+        dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
+            String.format("%02d", dateTime.getDayOfMonth());
+    System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
+    return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
+  }
 
-    public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String dbName, String user, String pass) throws SQLException {
-        Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
-        ResultSet rs = null;
-        Statement stmt = conn.createStatement();
-        try {
-            //stmt.execute("set mapred.job.queue.name=<queue_name>");
-            stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat" );
-            stmt.execute("set hive.stats.autogather=false" );
-            rs = stmt.executeQuery(
-                "select count(`_hoodie_commit_time`) as cnt from " + dbName + "." + source.getTableConfig()
-                    .getTableName());
-            long count = -1;
-            if(rs.next()) {
-                count = rs.getLong("cnt");
-            }
-            System.out.println("Total records in " + source.getTableConfig().getTableName() + " is " + count);
-            return count;
-        } finally {
-            if (rs != null) {
-                rs.close();
-            }
-            if (stmt != null) {
-                stmt.close();
-            }
-        }
-    }
-
-    public static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
-        int partitions, String user, String pass) throws SQLException {
-        DateTime dateTime = DateTime.now();
-        String endDateStr =
-            dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
-                String.format("%02d", dateTime.getDayOfMonth());
-        dateTime = dateTime.minusDays(partitions);
-        String startDateStr =
-            dateTime.getYear() + "-" + String.format("%02d", dateTime.getMonthOfYear()) + "-" +
-                String.format("%02d", dateTime.getDayOfMonth());
-        System.out.println("Start date " + startDateStr + " and end date " + endDateStr);
-        return countRecords(jdbcUrl, source, srcDb, startDateStr, endDateStr, user, pass);
-    }
-
-    private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb, String startDateStr,
-        String endDateStr, String user, String pass) throws SQLException {
-        Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
-        ResultSet rs = null;
-        Statement stmt = conn.createStatement();
-        try {
-            //stmt.execute("set mapred.job.queue.name=<queue_name>");
-            stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
-            stmt.execute("set hive.stats.autogather=false");
-            rs = stmt.executeQuery(
-                "select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig()
-                    .getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
-                    + endDateStr + "'");
-            if(rs.next()) {
-                return rs.getLong("cnt");
-            }
-            return -1;
-        } finally {
-            if (rs != null) {
-                rs.close();
-            }
-            if (stmt != null) {
-                stmt.close();
-            }
-        }
+  private static long countRecords(String jdbcUrl, HoodieTableMetaClient source, String srcDb,
+      String startDateStr,
+      String endDateStr, String user, String pass) throws SQLException {
+    Connection conn = HiveUtil.getConnection(jdbcUrl, user, pass);
+    ResultSet rs = null;
+    Statement stmt = conn.createStatement();
+    try {
+      //stmt.execute("set mapred.job.queue.name=<queue_name>");
+      stmt.execute("set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat");
+      stmt.execute("set hive.stats.autogather=false");
+      rs = stmt.executeQuery(
+          "select count(`_hoodie_commit_time`) as cnt from " + srcDb + "." + source.getTableConfig()
+              .getTableName() + " where datestr>'" + startDateStr + "' and datestr<='"
+              + endDateStr + "'");
+      if (rs.next()) {
+        return rs.getLong("cnt");
+      }
+      return -1;
+    } finally {
+      if (rs != null) {
+        rs.close();
+      }
+      if (stmt != null) {
+        stmt.close();
+      }
     }
+  }
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/InputStreamConsumer.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/InputStreamConsumer.java
index 8da872ef3..4d926cea3 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/InputStreamConsumer.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/InputStreamConsumer.java
@@ -23,34 +23,37 @@ import java.io.InputStreamReader;
 import java.util.logging.Logger;
 
 public class InputStreamConsumer extends Thread {
-    protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
-    private InputStream is;
-    public InputStreamConsumer(InputStream is) {
-        this.is = is;
-    }
 
-    @Override
-    public void run() {
-        try {
-            InputStreamReader isr = new InputStreamReader(is);
-            BufferedReader br = new BufferedReader(isr);
-            String line;
-            while ( (line = br.readLine()) != null)
-                LOG.info(line);
-        } catch (IOException ioe) {
-            LOG.severe(ioe.toString());
-            ioe.printStackTrace();
-        }
-    }
+  protected final static Logger LOG = Logger.getLogger(InputStreamConsumer.class.getName());
+  private InputStream is;
 
-    public static void captureOutput(Process p) {
-        InputStreamConsumer stdout;
-        InputStreamConsumer errout;
-        errout = new InputStreamConsumer(p.getErrorStream());
-        stdout = new InputStreamConsumer(p.getInputStream());
-        errout.start();
-        stdout.start();
+  public InputStreamConsumer(InputStream is) {
+    this.is = is;
+  }
+
+  @Override
+  public void run() {
+    try {
+      InputStreamReader isr = new InputStreamReader(is);
+      BufferedReader br = new BufferedReader(isr);
+      String line;
+      while ((line = br.readLine()) != null) {
+        LOG.info(line);
+      }
+    } catch (IOException ioe) {
+      LOG.severe(ioe.toString());
+      ioe.printStackTrace();
     }
+  }
+
+  public static void captureOutput(Process p) {
+    InputStreamConsumer stdout;
+    InputStreamConsumer errout;
+    errout = new InputStreamConsumer(p.getErrorStream());
+    stdout = new InputStreamConsumer(p.getInputStream());
+    errout.start();
+    stdout.start();
+  }
 
 
 }
diff --git a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/SparkUtil.java b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/SparkUtil.java
index 5cb5e4bd2..4b4ab2a2d 100644
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/SparkUtil.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/utils/SparkUtil.java
@@ -18,59 +18,54 @@ package com.uber.hoodie.cli.utils;
 
 import com.uber.hoodie.HoodieWriteClient;
 import com.uber.hoodie.cli.commands.SparkMain;
-
+import java.io.File;
+import java.net.URISyntaxException;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.launcher.SparkLauncher;
 
-import java.io.File;
-import java.net.URISyntaxException;
-
 public class SparkUtil {
 
-    public static Logger logger = Logger.getLogger(SparkUtil.class);
-    public static final String DEFUALT_SPARK_MASTER = "yarn-client";
+  public static Logger logger = Logger.getLogger(SparkUtil.class);
+  public static final String DEFUALT_SPARK_MASTER = "yarn-client";
 
-    /**
-     *
-     * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
-     *
-     * @return
-     * @throws URISyntaxException
-     */
-    public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
-        String currentJar = new File(
-            SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
-            .getAbsolutePath();
-        SparkLauncher sparkLauncher =
-            new SparkLauncher().setAppResource(currentJar)
-                .setMainClass(SparkMain.class.getName())
-                .setPropertiesFile(propertiesFile);
-        File libDirectory = new File(new File(currentJar).getParent(), "lib");
-        for (String library : libDirectory.list()) {
-            sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
-        }
-        return sparkLauncher;
+  /**
+   * TODO: Need to fix a bunch of hardcoded stuff here eg: history server, spark distro
+   */
+  public static SparkLauncher initLauncher(String propertiesFile) throws URISyntaxException {
+    String currentJar = new File(
+        SparkUtil.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath())
+        .getAbsolutePath();
+    SparkLauncher sparkLauncher =
+        new SparkLauncher().setAppResource(currentJar)
+            .setMainClass(SparkMain.class.getName())
+            .setPropertiesFile(propertiesFile);
+    File libDirectory = new File(new File(currentJar).getParent(), "lib");
+    for (String library : libDirectory.list()) {
+      sparkLauncher.addJar(new File(libDirectory, library).getAbsolutePath());
     }
+    return sparkLauncher;
+  }
 
-    public static JavaSparkContext initJavaSparkConf(String name) {
-        SparkConf sparkConf = new SparkConf().setAppName(name);
-        sparkConf.setMaster(DEFUALT_SPARK_MASTER);
-        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-        sparkConf.set("spark.driver.maxResultSize", "2g");
-        sparkConf.set("spark.eventLog.overwrite", "true");
-        sparkConf.set("spark.eventLog.enabled", "true");
+  public static JavaSparkContext initJavaSparkConf(String name) {
+    SparkConf sparkConf = new SparkConf().setAppName(name);
+    sparkConf.setMaster(DEFUALT_SPARK_MASTER);
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    sparkConf.set("spark.driver.maxResultSize", "2g");
+    sparkConf.set("spark.eventLog.overwrite", "true");
+    sparkConf.set("spark.eventLog.enabled", "true");
 
-        // Configure hadoop conf
-        sparkConf.set("spark.hadoop.mapred.output.compress", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
-        sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
+    // Configure hadoop conf
+    sparkConf.set("spark.hadoop.mapred.output.compress", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec",
+        "org.apache.hadoop.io.compress.GzipCodec");
+    sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
 
-        sparkConf = HoodieWriteClient.registerClasses(sparkConf);
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        jsc.hadoopConfiguration().setBoolean("parquet.enable.summary-metadata", false);
-        return jsc;
-    }
+    sparkConf = HoodieWriteClient.registerClasses(sparkConf);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    jsc.hadoopConfiguration().setBoolean("parquet.enable.summary-metadata", false);
+    return jsc;
+  }
 }
diff --git a/hoodie-cli/src/main/resources/META-INF/spring/spring-shell-plugin.xml b/hoodie-cli/src/main/resources/META-INF/spring/spring-shell-plugin.xml
index 900c41dd7..2b4563658 100644
--- a/hoodie-cli/src/main/resources/META-INF/spring/spring-shell-plugin.xml
+++ b/hoodie-cli/src/main/resources/META-INF/spring/spring-shell-plugin.xml
@@ -16,11 +16,11 @@
   -->
 
 <beans xmlns="http://www.springframework.org/schema/beans"
-       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-       xmlns:context="http://www.springframework.org/schema/context"
-       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xmlns:context="http://www.springframework.org/schema/context"
+  xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
 		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd">
 
-  <context:component-scan base-package="com.uber.hoodie.cli" />
+  <context:component-scan base-package="com.uber.hoodie.cli"/>
 
 </beans>
diff --git a/hoodie-cli/src/main/scala/com/uber/hoodie/cli/DedupeSparkJob.scala b/hoodie-cli/src/main/scala/com/uber/hoodie/cli/DedupeSparkJob.scala
index 249a8cc62..82c97e0a4 100644
--- a/hoodie-cli/src/main/scala/com/uber/hoodie/cli/DedupeSparkJob.scala
+++ b/hoodie-cli/src/main/scala/com/uber/hoodie/cli/DedupeSparkJob.scala
@@ -34,11 +34,11 @@ import scala.collection.mutable._
 /**
   * Spark job to de-duplicate data present in a partition path
   */
-class DedupeSparkJob (basePath: String,
-                      duplicatedPartitionPath: String,
-                      repairOutputPath: String,
-                      sqlContext: SQLContext,
-                      fs: FileSystem) {
+class DedupeSparkJob(basePath: String,
+                     duplicatedPartitionPath: String,
+                     repairOutputPath: String,
+                     sqlContext: SQLContext,
+                     fs: FileSystem) {
 
 
   val sparkHelper = new SparkHelper(sqlContext, fs)
@@ -50,8 +50,9 @@ class DedupeSparkJob (basePath: String,
     * @param tblName
     * @return
     */
-  def getDupeKeyDF(tblName: String) : DataFrame = {
-    val dupeSql = s"""
+  def getDupeKeyDF(tblName: String): DataFrame = {
+    val dupeSql =
+      s"""
       select  `${HoodieRecord.RECORD_KEY_METADATA_FIELD}` as dupe_key,
       count(*) as dupe_cnt
       from ${tblName}
@@ -69,7 +70,7 @@ class DedupeSparkJob (basePath: String,
     *
     * @return
     */
-  private def planDuplicateFix() : HashMap[String, HashSet[String]] = {
+  private def planDuplicateFix(): HashMap[String, HashSet[String]] = {
 
     val tmpTableName = s"htbl_${System.currentTimeMillis()}"
     val dedupeTblName = s"${tmpTableName}_dupeKeys"
@@ -78,17 +79,18 @@ class DedupeSparkJob (basePath: String,
 
     val allFiles = fs.listStatus(new org.apache.hadoop.fs.Path(s"${basePath}/${duplicatedPartitionPath}"))
     val fsView = new HoodieTableFileSystemView(metadata, metadata.getActiveTimeline.getCommitTimeline.filterCompletedInstants(), allFiles)
-    val latestFiles:java.util.List[HoodieDataFile] = fsView.getLatestDataFiles().collect(Collectors.toList[HoodieDataFile]())
+    val latestFiles: java.util.List[HoodieDataFile] = fsView.getLatestDataFiles().collect(Collectors.toList[HoodieDataFile]())
     val filteredStatuses = latestFiles.map(f => f.getPath)
     LOG.info(s" List of files under partition: ${} =>  ${filteredStatuses.mkString(" ")}")
 
-    val df = sqlContext.parquetFile(filteredStatuses:_*)
+    val df = sqlContext.parquetFile(filteredStatuses: _*)
     df.registerTempTable(tmpTableName)
     val dupeKeyDF = getDupeKeyDF(tmpTableName)
     dupeKeyDF.registerTempTable(dedupeTblName)
 
     // Obtain necessary satellite information for duplicate rows
-    val dupeDataSql = s"""
+    val dupeDataSql =
+      s"""
         SELECT `_hoodie_record_key`, `_hoodie_partition_path`, `_hoodie_file_name`, `_hoodie_commit_time`
         FROM ${tmpTableName} h
         JOIN ${dedupeTblName} d
@@ -111,9 +113,9 @@ class DedupeSparkJob (basePath: String,
 
       rows.foreach(r => {
         val c = r(3).asInstanceOf[String].toLong
-        if (c != maxCommit){
+        if (c != maxCommit) {
           val f = r(2).asInstanceOf[String].split("_")(0)
-          if (!fileToDeleteKeyMap.contains(f)){
+          if (!fileToDeleteKeyMap.contains(f)) {
             fileToDeleteKeyMap(f) = HashSet[String]()
           }
           fileToDeleteKeyMap(f).add(key)
@@ -130,28 +132,30 @@ class DedupeSparkJob (basePath: String,
     val allFiles = fs.listStatus(new Path(s"${basePath}/${duplicatedPartitionPath}"))
     val fsView = new HoodieTableFileSystemView(metadata, metadata.getActiveTimeline.getCommitTimeline.filterCompletedInstants(), allFiles)
 
-    val latestFiles:java.util.List[HoodieDataFile] = fsView.getLatestDataFiles().collect(Collectors.toList[HoodieDataFile]())
+    val latestFiles: java.util.List[HoodieDataFile] = fsView.getLatestDataFiles().collect(Collectors.toList[HoodieDataFile]())
 
     val fileNameToPathMap = latestFiles.map(f => (f.getFileId, new Path(f.getPath))).toMap
     val dupeFixPlan = planDuplicateFix()
 
     // 1. Copy all latest files into the temp fix path
-    fileNameToPathMap.foreach{ case(fileName, filePath)  => {
+    fileNameToPathMap.foreach { case (fileName, filePath) => {
       val badSuffix = if (dupeFixPlan.contains(fileName)) ".bad" else ""
       val dstPath = new Path(s"${repairOutputPath}/${filePath.getName}${badSuffix}")
       LOG.info(s"Copying from ${filePath} to ${dstPath}")
       FileUtil.copy(fs, filePath, fs, dstPath, false, true, fs.getConf)
-    }}
+    }
+    }
 
     // 2. Remove duplicates from the bad files
-    dupeFixPlan.foreach{case(fileName, keysToSkip) => {
+    dupeFixPlan.foreach { case (fileName, keysToSkip) => {
       val commitTime = FSUtils.getCommitTime(fileNameToPathMap(fileName).getName)
       val badFilePath = new Path(s"${repairOutputPath}/${fileNameToPathMap(fileName).getName}.bad")
       val newFilePath = new Path(s"${repairOutputPath}/${fileNameToPathMap(fileName).getName}")
       LOG.info(" Skipping and writing new file for : " + fileName)
       SparkHelpers.skipKeysAndWriteNewFile(commitTime, fs, badFilePath, newFilePath, dupeFixPlan(fileName))
       fs.delete(badFilePath, false)
-    }}
+    }
+    }
 
     // 3. Check that there are no duplicates anymore.
     val df = sqlContext.read.parquet(s"${repairOutputPath}/*.parquet")
@@ -186,6 +190,7 @@ class DedupeSparkJob (basePath: String,
         LOG.info(s"[FOR REAL!!!] Copying from ${srcPath} to ${dstPath}")
         FileUtil.copy(fs, srcPath, fs, dstPath, false, true, fs.getConf)
       }
-    }}
+    }
+    }
   }
 }
diff --git a/hoodie-cli/src/main/scala/com/uber/hoodie/cli/SparkHelpers.scala b/hoodie-cli/src/main/scala/com/uber/hoodie/cli/SparkHelpers.scala
index 0323d6f87..3fc18c8e4 100644
--- a/hoodie-cli/src/main/scala/com/uber/hoodie/cli/SparkHelpers.scala
+++ b/hoodie-cli/src/main/scala/com/uber/hoodie/cli/SparkHelpers.scala
@@ -17,9 +17,9 @@
 package com.uber.hoodie.cli
 
 import com.uber.hoodie.avro.HoodieAvroWriteSupport
-import com.uber.hoodie.common.{BloomFilter, HoodieJsonPayload}
 import com.uber.hoodie.common.model.HoodieRecord
 import com.uber.hoodie.common.util.ParquetUtils
+import com.uber.hoodie.common.{BloomFilter, HoodieJsonPayload}
 import com.uber.hoodie.config.{HoodieIndexConfig, HoodieStorageConfig}
 import com.uber.hoodie.io.storage.{HoodieParquetConfig, HoodieParquetWriter}
 import org.apache.avro.Schema
@@ -107,7 +107,7 @@ class SparkHelper(sqlContext: SQLContext, fs: FileSystem) {
     * @param file
     * @param sqlContext
     */
-  def getKeyCount(file: String, sqlContext: org.apache.spark.sql.SQLContext) ={
+  def getKeyCount(file: String, sqlContext: org.apache.spark.sql.SQLContext) = {
     println(getRowKeyDF(file).collect().size)
   }
 
@@ -122,7 +122,7 @@ class SparkHelper(sqlContext: SQLContext, fs: FileSystem) {
     * @param file
     * @return
     */
-  def fileKeysAgainstBF(conf: Configuration, sqlContext: SQLContext, file: String) : Boolean = {
+  def fileKeysAgainstBF(conf: Configuration, sqlContext: SQLContext, file: String): Boolean = {
     val bfStr = SparkHelpers.getBloomFilter(file, conf)
     val bf = new com.uber.hoodie.common.BloomFilter(bfStr)
     val foundCount = sqlContext.parquetFile(file)
@@ -134,7 +134,7 @@ class SparkHelper(sqlContext: SQLContext, fs: FileSystem) {
     totalCount == foundCount
   }
 
-  def getDistinctKeyDF(paths: List[String]) : DataFrame = {
-    sqlContext.read.parquet(paths:_*).select(s"`${HoodieRecord.RECORD_KEY_METADATA_FIELD}`").distinct()
+  def getDistinctKeyDF(paths: List[String]): DataFrame = {
+    sqlContext.read.parquet(paths: _*).select(s"`${HoodieRecord.RECORD_KEY_METADATA_FIELD}`").distinct()
   }
 }
diff --git a/hoodie-client/pom.xml b/hoodie-client/pom.xml
index db6fdc3ae..a3b1ec640 100644
--- a/hoodie-client/pom.xml
+++ b/hoodie-client/pom.xml
@@ -15,180 +15,182 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>hoodie</artifactId>
-        <groupId>com.uber.hoodie</groupId>
-        <version>0.4.1-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hoodie</artifactId>
+    <groupId>com.uber.hoodie</groupId>
+    <version>0.4.1-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
 
-    <artifactId>hoodie-client</artifactId>
-    <packaging>jar</packaging>
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.jacoco</groupId>
-                <artifactId>jacoco-maven-plugin</artifactId>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-jar-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>test-jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-            </plugin>
-        </plugins>
+  <artifactId>hoodie-client</artifactId>
+  <packaging>jar</packaging>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+    </plugins>
 
-        <resources>
-            <resource>
-                <directory>src/main/resources</directory>
-            </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
-        </resources>
-    </build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>src/test/resources</directory>
+      </resource>
+    </resources>
+  </build>
 
-    <dependencies>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-hdfs</artifactId>
-            <classifier>tests</classifier>
-            <!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
-            <exclusions>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet.jsp</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <classifier>tests</classifier>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet.jsp</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-            <type>test-jar</type>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>io.dropwizard.metrics</groupId>
-            <artifactId>metrics-graphite</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>io.dropwizard.metrics</groupId>
-            <artifactId>metrics-core</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.beust</groupId>
-            <artifactId>jcommander</artifactId>
-            <version>1.48</version>
-        </dependency>
+  <dependencies>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <classifier>tests</classifier>
+      <!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <classifier>tests</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.dropwizard.metrics</groupId>
+      <artifactId>metrics-graphite</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>io.dropwizard.metrics</groupId>
+      <artifactId>metrics-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.beust</groupId>
+      <artifactId>jcommander</artifactId>
+      <version>1.48</version>
+    </dependency>
 
-        <!-- Parent dependencies -->
-        <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-        </dependency>
+    <!-- Parent dependencies -->
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-client</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-avro</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-hadoop</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_2.11</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_2.11</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hbase</groupId>
-            <artifactId>hbase-client</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-all</artifactId>
-            <version>1.10.19</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-hadoop-mr</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hive</groupId>
-            <artifactId>hive-exec</artifactId>
-            <scope>test</scope>
-        </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.10.19</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-hadoop-mr</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-exec</artifactId>
+      <scope>test</scope>
+    </dependency>
 
-    </dependencies>
+  </dependencies>
 
 </project>
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java b/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java
index 0417aeaff..851947286 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java
@@ -17,25 +17,19 @@
 package com.uber.hoodie;
 
 import com.google.common.base.Optional;
-
-import com.uber.hoodie.common.model.HoodieCommitMetadata;
-import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.common.table.TableFileSystemView;
-import com.uber.hoodie.common.table.timeline.HoodieInstant;
-import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.index.bloom.HoodieBloomIndex;
-
 import com.uber.hoodie.table.HoodieTable;
-
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
@@ -46,136 +40,126 @@ import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.types.StructType;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-
 import scala.Tuple2;
 
 /**
  * Provides an RDD based API for accessing/filtering Hoodie tables, based on keys.
- *
  */
 public class HoodieReadClient implements Serializable {
 
-    private static Logger logger = LogManager.getLogger(HoodieReadClient.class);
+  private static Logger logger = LogManager.getLogger(HoodieReadClient.class);
 
-    private transient final JavaSparkContext jsc;
+  private transient final JavaSparkContext jsc;
 
-    private transient final FileSystem fs;
-    /**
-     * TODO: We need to persist the index type into hoodie.properties and be able to access the
-     * index just with a simple basepath pointing to the dataset. Until, then just always assume a
-     * BloomIndex
-     */
-    private transient final HoodieBloomIndex index;
-    private final HoodieTimeline commitTimeline;
-    private HoodieTable hoodieTable;
-    private transient Optional<SQLContext> sqlContextOpt;
+  private transient final FileSystem fs;
+  /**
+   * TODO: We need to persist the index type into hoodie.properties and be able to access the index
+   * just with a simple basepath pointing to the dataset. Until, then just always assume a
+   * BloomIndex
+   */
+  private transient final HoodieBloomIndex index;
+  private final HoodieTimeline commitTimeline;
+  private HoodieTable hoodieTable;
+  private transient Optional<SQLContext> sqlContextOpt;
 
-    /**
-     * @param basePath path to Hoodie dataset
-     */
-    public HoodieReadClient(JavaSparkContext jsc, String basePath) {
-        this.jsc = jsc;
-        this.fs = FSUtils.getFs();
-        // Create a Hoodie table which encapsulated the commits and files visible
-        this.hoodieTable = HoodieTable
-                .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
-        this.commitTimeline = hoodieTable.getCompletedCompactionCommitTimeline();
-        this.index =
-                new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
-        this.sqlContextOpt = Optional.absent();
+  /**
+   * @param basePath path to Hoodie dataset
+   */
+  public HoodieReadClient(JavaSparkContext jsc, String basePath) {
+    this.jsc = jsc;
+    this.fs = FSUtils.getFs();
+    // Create a Hoodie table which encapsulated the commits and files visible
+    this.hoodieTable = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
+    this.commitTimeline = hoodieTable.getCompletedCompactionCommitTimeline();
+    this.index =
+        new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
+    this.sqlContextOpt = Optional.absent();
+  }
+
+  /**
+   *
+   * @param jsc
+   * @param basePath
+   * @param sqlContext
+   */
+  public HoodieReadClient(JavaSparkContext jsc, String basePath, SQLContext sqlContext) {
+    this(jsc, basePath);
+    this.sqlContextOpt = Optional.of(sqlContext);
+  }
+
+  /**
+   * Adds support for accessing Hoodie built tables from SparkSQL, as you normally would.
+   *
+   * @return SparkConf object to be used to construct the SparkContext by caller
+   */
+  public static SparkConf addHoodieSupport(SparkConf conf) {
+    conf.set("spark.sql.hive.convertMetastoreParquet", "false");
+    return conf;
+  }
+
+  private void assertSqlContext() {
+    if (!sqlContextOpt.isPresent()) {
+      throw new IllegalStateException(
+          "SQLContext must be set, when performing dataframe operations");
     }
+  }
 
-    /**
-     *
-     * @param jsc
-     * @param basePath
-     * @param sqlContext
-     */
-    public HoodieReadClient(JavaSparkContext jsc, String basePath, SQLContext sqlContext) {
-        this(jsc, basePath);
-        this.sqlContextOpt = Optional.of(sqlContext);
-    }
+  /**
+   * Given a bunch of hoodie keys, fetches all the individual records out as a data frame
+   *
+   * @return a dataframe
+   */
+  public Dataset<Row> read(JavaRDD<HoodieKey> hoodieKeys, int parallelism)
+      throws Exception {
 
-    /**
-     * Adds support for accessing Hoodie built tables from SparkSQL, as you normally would.
-     *
-     * @return SparkConf object to be used to construct the SparkContext by caller
-     */
-    public static SparkConf addHoodieSupport(SparkConf conf) {
-        conf.set("spark.sql.hive.convertMetastoreParquet", "false");
-        return conf;
-    }
+    assertSqlContext();
+    JavaPairRDD<HoodieKey, Optional<String>> keyToFileRDD =
+        index.fetchRecordLocation(hoodieKeys, hoodieTable);
+    List<String> paths = keyToFileRDD
+        .filter(keyFileTuple -> keyFileTuple._2().isPresent())
+        .map(keyFileTuple -> keyFileTuple._2().get())
+        .collect();
 
-    private void assertSqlContext() {
-        if (!sqlContextOpt.isPresent()) {
-            throw new IllegalStateException("SQLContext must be set, when performing dataframe operations");
-        }
-    }
+    // record locations might be same for multiple keys, so need a unique list
+    Set<String> uniquePaths = new HashSet<>(paths);
+    Dataset<Row> originalDF = sqlContextOpt.get().read()
+        .parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
+    StructType schema = originalDF.schema();
+    JavaPairRDD<HoodieKey, Row> keyRowRDD = originalDF.javaRDD()
+        .mapToPair(row -> {
+          HoodieKey key = new HoodieKey(
+              row.<String>getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD),
+              row.<String>getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD));
+          return new Tuple2<>(key, row);
+        });
 
-    /**
-     * Given a bunch of hoodie keys, fetches all the individual records out as a data frame
-     *
-     * @return a dataframe
-     */
-    public Dataset<Row> read(JavaRDD<HoodieKey> hoodieKeys, int parallelism)
-            throws Exception {
+    // Now, we need to further filter out, for only rows that match the supplied hoodie keys
+    JavaRDD<Row> rowRDD = keyRowRDD.join(keyToFileRDD, parallelism)
+        .map(tuple -> tuple._2()._1());
 
-        assertSqlContext();
-        JavaPairRDD<HoodieKey, Optional<String>> keyToFileRDD =
-                index.fetchRecordLocation(hoodieKeys, hoodieTable);
-        List<String> paths = keyToFileRDD
-                .filter(keyFileTuple -> keyFileTuple._2().isPresent())
-                .map(keyFileTuple -> keyFileTuple._2().get())
-                .collect();
+    return sqlContextOpt.get().createDataFrame(rowRDD, schema);
+  }
 
-        // record locations might be same for multiple keys, so need a unique list
-        Set<String> uniquePaths = new HashSet<>(paths);
-        Dataset<Row> originalDF = sqlContextOpt.get().read()
-                .parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
-        StructType schema = originalDF.schema();
-        JavaPairRDD<HoodieKey, Row> keyRowRDD = originalDF.javaRDD()
-                .mapToPair(row -> {
-                    HoodieKey key = new HoodieKey(
-                            row.<String>getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD),
-                            row.<String>getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD));
-                    return new Tuple2<>(key, row);
-                });
+  /**
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
+   * If the optional FullFilePath value is not present, then the key is not found. If the
+   * FullFilePath value is present, it is the path component (without scheme) of the URI underlying
+   * file
+   */
+  public JavaPairRDD<HoodieKey, Optional<String>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
+    return index.fetchRecordLocation(hoodieKeys, hoodieTable);
+  }
 
-        // Now, we need to further filter out, for only rows that match the supplied hoodie keys
-        JavaRDD<Row> rowRDD = keyRowRDD.join(keyToFileRDD, parallelism)
-                .map(tuple -> tuple._2()._1());
-
-        return sqlContextOpt.get().createDataFrame(rowRDD, schema);
-    }
-
-    /**
-     * Checks if the given [Keys] exists in the hoodie table and returns [Key,
-     * Optional[FullFilePath]] If the optional FullFilePath value is not present, then the key is
-     * not found. If the FullFilePath value is present, it is the path component (without scheme) of
-     * the URI underlying file
-     */
-    public JavaPairRDD<HoodieKey, Optional<String>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
-        return index.fetchRecordLocation(hoodieKeys, hoodieTable);
-    }
-
-    /**
-     * Filter out HoodieRecords that already exists in the output folder. This is useful in
-     * deduplication.
-     *
-     * @param hoodieRecords Input RDD of Hoodie records.
-     * @return A subset of hoodieRecords RDD, with existing records filtered out.
-     */
-    public JavaRDD<HoodieRecord> filterExists(JavaRDD<HoodieRecord> hoodieRecords) {
-        JavaRDD<HoodieRecord> recordsWithLocation = index.tagLocation(hoodieRecords, hoodieTable);
-        return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
-    }
+  /**
+   * Filter out HoodieRecords that already exists in the output folder. This is useful in
+   * deduplication.
+   *
+   * @param hoodieRecords Input RDD of Hoodie records.
+   * @return A subset of hoodieRecords RDD, with existing records filtered out.
+   */
+  public JavaRDD<HoodieRecord> filterExists(JavaRDD<HoodieRecord> hoodieRecords) {
+    JavaRDD<HoodieRecord> recordsWithLocation = index.tagLocation(hoodieRecords, hoodieTable);
+    return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java b/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java
index f402825f7..b55fc5ae6 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java
@@ -50,10 +50,21 @@ import com.uber.hoodie.func.BulkInsertMapFunction;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.io.HoodieCommitArchiveLog;
 import com.uber.hoodie.metrics.HoodieMetrics;
-import com.uber.hoodie.table.UserDefinedBulkInsertPartitioner;
 import com.uber.hoodie.table.HoodieTable;
+import com.uber.hoodie.table.UserDefinedBulkInsertPartitioner;
 import com.uber.hoodie.table.WorkloadProfile;
 import com.uber.hoodie.table.WorkloadStat;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -66,25 +77,12 @@ import org.apache.spark.storage.StorageLevel;
 import scala.Option;
 import scala.Tuple2;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-import java.text.ParseException;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
 /**
- * Hoodie Write Client helps you build datasets on HDFS [insert()] and then
- * perform efficient mutations on a HDFS dataset [upsert()]
- *
- * Note that, at any given time, there can only be one Spark job performing
- * these operatons on a Hoodie dataset.
+ * Hoodie Write Client helps you build datasets on HDFS [insert()] and then perform efficient
+ * mutations on a HDFS dataset [upsert()]
  *
+ * Note that, at any given time, there can only be one Spark job performing these operatons on a
+ * Hoodie dataset.
  */
 public class HoodieWriteClient<T extends HoodieRecordPayload> implements Serializable {
 
@@ -102,7 +100,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
      * @param clientConfig
      * @throws Exception
      */
-    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig) throws Exception {
+    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig)
+        throws Exception {
         this(jsc, clientConfig, false);
     }
 
@@ -111,7 +110,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
      * @param clientConfig
      * @param rollbackInFlight
      */
-    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight) {
+    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig,
+        boolean rollbackInFlight) {
         this.fs = FSUtils.getFs();
         this.jsc = jsc;
         this.config = clientConfig;
@@ -121,7 +121,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
 
         if (rollbackInFlight) {
             rollbackInflightCommits();
-        }
+    }
     }
 
     /**
@@ -163,17 +163,17 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                 throw (HoodieUpsertException) e;
             }
             throw new HoodieUpsertException("Failed to upsert for commit time " + commitTime, e);
-        }
+    }
     }
 
     /**
      * Inserts the given HoodieRecords, into the table. This API is intended to be used for normal
      * writes.
      *
-     * This implementation skips the index check and is able to leverage benefits such as
-     * small file handling/blocking alignment, as with upsert(), by profiling the workload
+     * This implementation skips the index check and is able to leverage benefits such as small file
+     * handling/blocking alignment, as with upsert(), by profiling the workload
      *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
      * @param commitTime Commit Time handle
      * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
      */
@@ -194,7 +194,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                 throw e;
             }
             throw new HoodieInsertException("Failed to insert for commit time " + commitTime, e);
-        }
+    }
     }
 
     /**
@@ -206,11 +206,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
      * attempts to control the numbers of files with less memory compared to the {@link
      * HoodieWriteClient#insert(JavaRDD, String)}
      *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
      * @param commitTime Commit Time handle
      * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
      */
-    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
+    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records,
+        final String commitTime) {
         return bulkInsert(records, commitTime, Option.empty());
     }
 
@@ -221,16 +222,18 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
      *
      * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and
      * attempts to control the numbers of files with less memory compared to the {@link
-     * HoodieWriteClient#insert(JavaRDD, String)}. Optionally it allows users to specify their own partitioner. If
-     * specified then it will be used for repartitioning records. See {@link UserDefinedBulkInsertPartitioner}.
+     * HoodieWriteClient#insert(JavaRDD, String)}. Optionally it allows users to specify their own
+     * partitioner. If specified then it will be used for repartitioning records. See {@link
+     * UserDefinedBulkInsertPartitioner}.
      *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
      * @param commitTime Commit Time handle
-     * @param bulkInsertPartitioner If specified then it will be used to partition input records before they are
-     * inserted into hoodie.
+     * @param bulkInsertPartitioner If specified then it will be used to partition input records
+     * before they are inserted into hoodie.
      * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
      */
-    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime,
+    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records,
+        final String commitTime,
         Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
         writeContext = metrics.getCommitCtx();
         // Create a Hoodie table which encapsulated the commits and files visible
@@ -240,7 +243,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
         try {
             // De-dupe/merge if needed
             JavaRDD<HoodieRecord<T>> dedupedRecords =
-                combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
+                combineOnCondition(config.shouldCombineBeforeInsert(), records,
+                    config.getInsertShuffleParallelism());
 
             final JavaRDD<HoodieRecord<T>> repartitionedRecords;
             if (bulkInsertPartitioner.isDefined()) {
@@ -259,20 +263,22 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                     }, true, config.getBulkInsertShuffleParallelism());
             }
             JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
-                    .mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table), true)
-                    .flatMap(writeStatuses -> writeStatuses.iterator());
+                .mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table),
+                    true)
+                .flatMap(writeStatuses -> writeStatuses.iterator());
 
             return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
         } catch (Throwable e) {
             if (e instanceof HoodieInsertException) {
                 throw e;
             }
-            throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime, e);
-        }
+            throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime,
+                e);
+    }
     }
 
     private void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD) {
-        if(config.shouldAutoCommit()) {
+        if (config.shouldAutoCommit()) {
             logger.info("Auto commit enabled: Committing " + commitTime);
             boolean commitResult = commit(commitTime, resultRDD);
             if (!commitResult) {
@@ -280,30 +286,28 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             }
         } else {
             logger.info("Auto commit disabled for " + commitTime);
-        }
+    }
     }
 
     private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition,
-                                                        JavaRDD<HoodieRecord<T>> records,
-                                                        int parallelism) {
-        if(condition) {
+        JavaRDD<HoodieRecord<T>> records,
+        int parallelism) {
+        if (condition) {
             return deduplicateRecords(records, parallelism);
         }
         return records;
     }
 
     /**
-     *
-     * Save the workload profile in an intermediate file (here re-using commit files)
-     * This is useful when performing rollback for MOR datasets. Only updates are recorded
-     * in the workload profile metadata since updates to log blocks are unknown across batches
-     * Inserts (which are new parquet files) are rolled back based on commit time.
-     * // TODO : Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
-     * @param profile
-     * @param commitTime
-     * @throws HoodieCommitException
+     * Save the workload profile in an intermediate file (here re-using commit files) This is useful
+     * when performing rollback for MOR datasets. Only updates are recorded in the workload profile
+     * metadata since updates to log blocks are unknown across batches Inserts (which are new parquet
+     * files) are rolled back based on commit time. // TODO : Create a new WorkloadProfile metadata
+     * file instead of using HoodieCommitMetadata
      */
-    private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable<T> table, String commitTime) throws HoodieCommitException {
+    private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile,
+        HoodieTable<T> table,
+        String commitTime) throws HoodieCommitException {
         try {
             HoodieCommitMetadata metadata = new HoodieCommitMetadata();
             profile.getPartitionPaths().stream().forEach(path -> {
@@ -319,16 +323,17 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
             Optional<HoodieInstant> instant = activeTimeline.filterInflights().lastInstant();
             activeTimeline.saveToInflight(instant.get(),
-                    Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
-        } catch(IOException io) {
-            throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io);
-        }
+                Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        } catch (IOException io) {
+            throw new HoodieCommitException(
+                "Failed to commit " + commitTime + " unable to save inflight metadata ", io);
+    }
     }
 
     private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords,
-                                                       String commitTime,
-                                                       HoodieTable<T> hoodieTable,
-                                                       final boolean isUpsert) {
+        String commitTime,
+        HoodieTable<T> hoodieTable,
+        final boolean isUpsert) {
 
         // Cache the tagged records, so we don't end up computing both
         preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
@@ -344,29 +349,31 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
         final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
         JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
         JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords
-                .mapPartitionsWithIndex((partition, recordItr) -> {
-                    if (isUpsert) {
-                        return hoodieTable
-                            .handleUpsertPartition(commitTime, partition, recordItr, partitioner);
-                    } else {
-                        return hoodieTable
-                            .handleInsertPartition(commitTime, partition, recordItr, partitioner);
-                    }
-                }, true)
-                .flatMap(writeStatuses -> writeStatuses.iterator());
+            .mapPartitionsWithIndex((partition, recordItr) -> {
+                if (isUpsert) {
+                    return hoodieTable
+                        .handleUpsertPartition(commitTime, partition, recordItr, partitioner);
+                } else {
+                    return hoodieTable
+                        .handleInsertPartition(commitTime, partition, recordItr, partitioner);
+                }
+            }, true)
+            .flatMap(writeStatuses -> writeStatuses.iterator());
 
         return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, commitTime);
     }
 
-    private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
+    private Partitioner getPartitioner(HoodieTable table, boolean isUpsert,
+        WorkloadProfile profile) {
         if (isUpsert) {
             return table.getUpsertPartitioner(profile);
         } else {
             return table.getInsertPartitioner(profile);
-        }
+    }
     }
 
-    private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table, String commitTime) {
+    private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
+        HoodieTable<T> table, String commitTime) {
         // Update the index back
         JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, table);
         // Trigger the insert and collect statuses
@@ -375,12 +382,15 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
         return statuses;
     }
 
-    private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords, Partitioner partitioner) {
+    private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords,
+        Partitioner partitioner) {
         return dedupedRecords
-                .mapToPair(record ->
-                        new Tuple2<>(new Tuple2<>(record.getKey(), Option.apply(record.getCurrentLocation())), record))
-                .partitionBy(partitioner)
-                .map(tuple -> tuple._2());
+            .mapToPair(record ->
+                new Tuple2<>(
+                    new Tuple2<>(record.getKey(), Option.apply(record.getCurrentLocation())),
+                    record))
+            .partitionBy(partitioner)
+            .map(tuple -> tuple._2());
     }
 
     /**
@@ -394,8 +404,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
      * Commit changes performed at the given commitTime marker
      */
     public boolean commit(String commitTime,
-                          JavaRDD<WriteStatus> writeStatuses,
-                          Optional<HashMap<String, String>> extraMetadata) {
+        JavaRDD<WriteStatus> writeStatuses,
+        Optional<HashMap<String, String>> extraMetadata) {
 
         logger.info("Commiting " + commitTime);
         // Create a Hoodie table which encapsulated the commits and files visible
@@ -405,9 +415,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
         HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
 
         List<Tuple2<String, HoodieWriteStat>> stats = writeStatuses
-                    .mapToPair((PairFunction<WriteStatus, String, HoodieWriteStat>) writeStatus ->
-                            new Tuple2<>(writeStatus.getPartitionPath(), writeStatus.getStat()))
-                    .collect();
+            .mapToPair((PairFunction<WriteStatus, String, HoodieWriteStat>) writeStatus ->
+                new Tuple2<>(writeStatus.getPartitionPath(), writeStatus.getStat()))
+            .collect();
 
         HoodieCommitMetadata metadata = new HoodieCommitMetadata();
         for (Tuple2<String, HoodieWriteStat> stat : stats) {
@@ -438,7 +448,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
 
             // We cannot have unbounded commit files. Archive commits if we have to archive
             archiveLog.archiveIfRequired();
-            if(config.isAutoClean()) {
+            if (config.isAutoClean()) {
                 // Call clean to cleanup if there is anything to cleanup after the commit,
                 logger.info("Auto cleaning is enabled. Running cleaner now");
                 clean(commitTime);
@@ -465,12 +475,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime
-     * will be referenced in the savepoint and will never be cleaned. The savepointed commit
-     * will never be rolledback or archived.
+     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime will
+     * be referenced in the savepoint and will never be cleaned. The savepointed commit will never be
+     * rolledback or archived.
      *
-     * This gives an option to rollback the state to the savepoint anytime.
-     * Savepoint needs to be manually created and deleted.
+     * This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be
+     * manually created and deleted.
      *
      * Savepoint should be on a commit that could not have been cleaned.
      *
@@ -491,12 +501,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime
-     * will be referenced in the savepoint and will never be cleaned. The savepointed commit
-     * will never be rolledback or archived.
+     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime will
+     * be referenced in the savepoint and will never be cleaned. The savepointed commit will never be
+     * rolledback or archived.
      *
-     * This gives an option to rollback the state to the savepoint anytime.
-     * Savepoint needs to be manually created and deleted.
+     * This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be
+     * manually created and deleted.
      *
      * Savepoint should be on a commit that could not have been cleaned.
      *
@@ -510,9 +520,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             .getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
         Optional<HoodieInstant> cleanInstant = table.getCompletedCleanTimeline().lastInstant();
 
-        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-        if(!table.getCompletedCommitTimeline().containsInstant(commitInstant)) {
-            throw new HoodieSavepointException("Could not savepoint non-existing commit " + commitInstant);
+        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
+            commitTime);
+        if (!table.getCompletedCommitTimeline().containsInstant(commitInstant)) {
+            throw new HoodieSavepointException(
+                "Could not savepoint non-existing commit " + commitInstant);
         }
 
         try {
@@ -534,7 +546,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                     + lastCommitRetained);
 
             Map<String, List<String>> latestFilesMap = jsc.parallelize(
-                FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()))
+                FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(),
+                    config.shouldAssumeDatePartitioning()))
                 .mapToPair((PairFunction<String, String, List<String>>) partitionPath -> {
                     // Scan all partitions files with this commit time
                     logger.info("Collecting latest files in partition path " + partitionPath);
@@ -555,12 +568,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             return true;
         } catch (IOException e) {
             throw new HoodieSavepointException("Failed to savepoint " + commitTime, e);
-        }
+    }
     }
 
     /**
-     * Delete a savepoint that was created. Once the savepoint is deleted, the commit can be rolledback
-     * and cleaner may clean up data files.
+     * Delete a savepoint that was created. Once the savepoint is deleted, the commit can be
+     * rolledback and cleaner may clean up data files.
      *
      * @param savepointTime - delete the savepoint
      * @return true if the savepoint was deleted successfully
@@ -586,9 +599,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Rollback the state to the savepoint.
-     * WARNING: This rollsback recent commits and deleted data files. Queries accessing the files
-     * will mostly fail. This should be done during a downtime.
+     * Rollback the state to the savepoint. WARNING: This rollsback recent commits and deleted data
+     * files. Queries accessing the files will mostly fail. This should be done during a downtime.
      *
      * @param savepointTime - savepoint time to rollback to
      * @return true if the savepoint was rollecback to successfully
@@ -616,7 +628,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
 
         // Make sure the rollback was successful
         Optional<HoodieInstant> lastInstant =
-            activeTimeline.reload().getCommitsAndCompactionsTimeline().filterCompletedInstants().lastInstant();
+            activeTimeline.reload().getCommitsAndCompactionsTimeline().filterCompletedInstants()
+                .lastInstant();
         Preconditions.checkArgument(lastInstant.isPresent());
         Preconditions.checkArgument(lastInstant.get().getTimestamp().equals(savepointTime),
             savepointTime + "is not the last commit after rolling back " + commitsToRollback
@@ -625,12 +638,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Rollback the (inflight/committed) record changes with the given commit time.
-     * Three steps:
-     * (1) Atomically unpublish this commit
-     * (2) clean indexing data,
-     * (3) clean new generated parquet files.
-     * (4) Finally delete .commit or .inflight file,
+     * Rollback the (inflight/committed) record changes with the given commit time. Three steps: (1)
+     * Atomically unpublish this commit (2) clean indexing data, (3) clean new generated parquet
+     * files. (4) Finally delete .commit or .inflight file,
      */
     public boolean rollback(final String commitTime) throws HoodieRollbackException {
         rollback(Lists.newArrayList(commitTime));
@@ -638,7 +648,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     private void rollback(List<String> commits) {
-        if(commits.isEmpty()) {
+        if (commits.isEmpty()) {
             logger.info("List of commits to rollback is empty");
             return;
         }
@@ -702,7 +712,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             Optional<Long> durationInMs = Optional.empty();
             if (context != null) {
                 durationInMs = Optional.of(metrics.getDurationInMs(context.stop()));
-                Long numFilesDeleted = stats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
+                Long numFilesDeleted = stats.stream()
+                    .mapToLong(stat -> stat.getSuccessDeleteFiles().size())
+                    .sum();
                 metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
             }
             HoodieRollbackMetadata rollbackMetadata =
@@ -722,7 +734,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
         } catch (IOException e) {
             throw new HoodieRollbackException("Failed to rollback " +
                 config.getBasePath() + " commits " + commits, e);
-        }
+    }
     }
 
     /**
@@ -733,9 +745,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Clean up any stale/old files/data lying around (either on file storage or index storage)
-     * based on the configurations and CleaningPolicy used. (typically files that no longer can be used
-     * by a running query can be cleaned)
+     * Clean up any stale/old files/data lying around (either on file storage or index storage) based
+     * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a
+     * running query can be cleaned)
      */
     public void clean() throws HoodieIOException {
         String startCleanTime = HoodieActiveTimeline.createNewCommitTime();
@@ -743,11 +755,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     /**
-     * Clean up any stale/old files/data lying around (either on file storage or index storage)
-     * based on the configurations and CleaningPolicy used. (typically files that no longer can be used
-     * by a running query can be cleaned)
+     * Clean up any stale/old files/data lying around (either on file storage or index storage) based
+     * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a
+     * running query can be cleaned)
      */
-    private void clean(String startCleanTime) throws HoodieIOException  {
+    private void clean(String startCleanTime) throws HoodieIOException {
         try {
             logger.info("Cleaner started");
             final Timer.Context context = metrics.getCleanCtx();
@@ -788,7 +800,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
             }
         } catch (IOException e) {
             throw new HoodieIOException("Failed to clean up after commit", e);
-        }
+    }
     }
 
     /**
@@ -811,30 +823,30 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     }
 
     public static SparkConf registerClasses(SparkConf conf) {
-        conf.registerKryoClasses(new Class[]{HoodieWriteConfig.class, HoodieRecord.class, HoodieKey.class});
+        conf.registerKryoClasses(
+            new Class[]{HoodieWriteConfig.class, HoodieRecord.class, HoodieKey.class});
         return conf;
     }
 
     /**
      * Deduplicate Hoodie records, using the given deduplication funciton.
      */
-    private JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) {
+    private JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records,
+        int parallelism) {
         return records
-                .mapToPair(record -> new Tuple2<>(record.getKey(), record))
-                .reduceByKey((rec1, rec2) -> {
-                    @SuppressWarnings("unchecked")
-                    T reducedData = (T) rec1.getData().preCombine(rec2.getData());
-                    // we cannot allow the user to change the key or partitionPath, since that will affect everything
-                    // so pick it from one of the records.
-                    return new HoodieRecord<T>(rec1.getKey(), reducedData);
-                }, parallelism)
-                .map(recordTuple -> recordTuple._2());
+            .mapToPair(record -> new Tuple2<>(record.getKey(), record))
+            .reduceByKey((rec1, rec2) -> {
+                @SuppressWarnings("unchecked")
+                T reducedData = (T) rec1.getData().preCombine(rec2.getData());
+                // we cannot allow the user to change the key or partitionPath, since that will affect everything
+                // so pick it from one of the records.
+                return new HoodieRecord<T>(rec1.getKey(), reducedData);
+            }, parallelism)
+            .map(recordTuple -> recordTuple._2());
     }
 
     /**
      * Cleanup all inflight commits
-     *
-     * @throws IOException
      */
     private void rollbackInflightCommits() {
         HoodieTable<T> table = HoodieTable
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java b/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
index 302a0fcd2..7854e128e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
@@ -19,7 +19,6 @@ package com.uber.hoodie;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieWriteStat;
-
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -32,124 +31,130 @@ import java.util.Optional;
  */
 public class WriteStatus implements Serializable {
 
-    private final HashMap<HoodieKey, Throwable> errors = new HashMap<>();
+  private final HashMap<HoodieKey, Throwable> errors = new HashMap<>();
 
-    private final List<HoodieRecord> writtenRecords = new ArrayList<>();
+  private final List<HoodieRecord> writtenRecords = new ArrayList<>();
 
-    private final List<HoodieRecord> failedRecords  = new ArrayList<>();
+  private final List<HoodieRecord> failedRecords = new ArrayList<>();
 
-    private Throwable globalError = null;
+  private Throwable globalError = null;
 
-    private String fileId = null;
+  private String fileId = null;
 
-    private String partitionPath = null;
+  private String partitionPath = null;
 
-    private HoodieWriteStat stat = null;
+  private HoodieWriteStat stat = null;
 
-    private long totalRecords = 0;
-    private long totalErrorRecords = 0;
+  private long totalRecords = 0;
+  private long totalErrorRecords = 0;
 
-    /**
-     * Mark write as success, optionally using given parameters for the purpose of calculating
-     * some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
-     * objects are collected in Spark Driver.
-     *
-     * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-     * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
-     */
-    public void markSuccess(HoodieRecord record,
-        Optional<Map<String, String>> optionalRecordMetadata) {
-      writtenRecords.add(record);
-      totalRecords++;
-    }
+  /**
+   * Mark write as success, optionally using given parameters for the purpose of calculating some
+   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * objects are collected in Spark Driver.
+   *
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link
+   * HoodieRecord} before deflation.
+   */
+  public void markSuccess(HoodieRecord record,
+      Optional<Map<String, String>> optionalRecordMetadata) {
+    writtenRecords.add(record);
+    totalRecords++;
+  }
 
-    /**
-     * Mark write as failed, optionally using given parameters for the purpose of calculating
-     * some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
-     * objects are collected in Spark Driver.
-     *
-     * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-     * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
-     */
-    public void markFailure(HoodieRecord record, Throwable t,
-        Optional<Map<String, String>> optionalRecordMetadata) {
-      failedRecords.add(record);
-      errors.put(record.getKey(), t);
-      totalRecords++;
-      totalErrorRecords++;
-    }
+  /**
+   * Mark write as failed, optionally using given parameters for the purpose of calculating some
+   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * objects are collected in Spark Driver.
+   *
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link
+   * HoodieRecord} before deflation.
+   */
+  public void markFailure(HoodieRecord record, Throwable t,
+      Optional<Map<String, String>> optionalRecordMetadata) {
+    failedRecords.add(record);
+    errors.put(record.getKey(), t);
+    totalRecords++;
+    totalErrorRecords++;
+  }
 
-    public String getFileId() {
-        return fileId;
-    }
+  public String getFileId() {
+    return fileId;
+  }
 
-    public void setFileId(String fileId) {
-        this.fileId = fileId;
-    }
+  public void setFileId(String fileId) {
+    this.fileId = fileId;
+  }
 
-    public boolean hasErrors() {
-        return totalErrorRecords > 0;
-    }
+  public boolean hasErrors() {
+    return totalErrorRecords > 0;
+  }
 
-    public boolean isErrored(HoodieKey key) {
-        return errors.containsKey(key);
-    }
+  public boolean isErrored(HoodieKey key) {
+    return errors.containsKey(key);
+  }
 
-    public HashMap<HoodieKey, Throwable> getErrors() {
-        return errors;
-    }
+  public HashMap<HoodieKey, Throwable> getErrors() {
+    return errors;
+  }
 
-    public boolean hasGlobalError() {
-        return globalError != null;
-    }
+  public boolean hasGlobalError() {
+    return globalError != null;
+  }
 
-    public void setGlobalError(Throwable t) {
-        this.globalError = t;
-    }
+  public void setGlobalError(Throwable t) {
+    this.globalError = t;
+  }
 
-    public Throwable getGlobalError() {
-        return this.globalError;
-    }
+  public Throwable getGlobalError() {
+    return this.globalError;
+  }
 
-    public List<HoodieRecord> getWrittenRecords() {
-        return writtenRecords;
-    }
+  public List<HoodieRecord> getWrittenRecords() {
+    return writtenRecords;
+  }
 
-    public List<HoodieRecord> getFailedRecords() {
-        return failedRecords;
-    }
+  public List<HoodieRecord> getFailedRecords() {
+    return failedRecords;
+  }
 
-    public HoodieWriteStat getStat() {
-        return stat;
-    }
+  public HoodieWriteStat getStat() {
+    return stat;
+  }
 
-    public void setStat(HoodieWriteStat stat) {
-        this.stat = stat;
-    }
+  public void setStat(HoodieWriteStat stat) {
+    this.stat = stat;
+  }
 
-    public String getPartitionPath() {
-        return partitionPath;
-    }
+  public String getPartitionPath() {
+    return partitionPath;
+  }
 
-    public void setPartitionPath(String partitionPath) {
-        this.partitionPath = partitionPath;
-    }
+  public void setPartitionPath(String partitionPath) {
+    this.partitionPath = partitionPath;
+  }
 
-    public long getTotalRecords() {
-        return totalRecords;
-    }
+  public long getTotalRecords() {
+    return totalRecords;
+  }
 
-    public long getTotalErrorRecords() { return totalErrorRecords; }
+  public long getTotalErrorRecords() {
+    return totalErrorRecords;
+  }
 
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WriteStatus {");
-        sb.append("fileId=").append(fileId);
-        sb.append(", globalError='").append(globalError).append('\'');
-        sb.append(", hasErrors='").append(hasErrors()).append('\'');
-        sb.append(", errorCount='").append(totalErrorRecords).append('\'');
-        sb.append(", errorPct='").append((100.0 * totalErrorRecords) / totalRecords).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WriteStatus {");
+    sb.append("fileId=").append(fileId);
+    sb.append(", globalError='").append(globalError).append('\'');
+    sb.append(", hasErrors='").append(hasErrors()).append('\'');
+    sb.append(", errorCount='").append(totalErrorRecords).append('\'');
+    sb.append(", errorPct='").append((100.0 * totalErrorRecords) / totalRecords).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java
index bf363a38e..948a1e00e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java
@@ -17,33 +17,35 @@
 package com.uber.hoodie.config;
 
 import java.io.Serializable;
-import java.util.Map;
 import java.util.Properties;
 
 /**
  * Default Way to load Hoodie config through a java.util.Properties
  */
 public class DefaultHoodieConfig implements Serializable {
-    protected final Properties props;
-    public DefaultHoodieConfig(Properties props) {
-        this.props = props;
-    }
 
-    public Properties getProps() {
-        return props;
-    }
+  protected final Properties props;
 
-    public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
-        String defaultValue) {
-        if (condition) {
-            props.setProperty(propName, defaultValue);
-        }
-    }
+  public DefaultHoodieConfig(Properties props) {
+    this.props = props;
+  }
 
-    public static void setDefaultOnCondition(Properties props, boolean condition, DefaultHoodieConfig config) {
-        if (condition) {
-            props.putAll(config.getProps());
-        }
+  public Properties getProps() {
+    return props;
+  }
+
+  public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
+      String defaultValue) {
+    if (condition) {
+      props.setProperty(propName, defaultValue);
     }
+  }
+
+  public static void setDefaultOnCondition(Properties props, boolean condition,
+      DefaultHoodieConfig config) {
+    if (condition) {
+      props.putAll(config.getProps());
+    }
+  }
 
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java
index d47dd1d52..39f076e5a 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java
@@ -19,231 +19,239 @@ package com.uber.hoodie.config;
 import com.google.common.base.Preconditions;
 import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieCleaningPolicy;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import com.uber.hoodie.io.compact.strategy.LogFileSizeBasedCompactionStrategy;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
 
 /**
  * Compaction related config
  */
 @Immutable
 public class HoodieCompactionConfig extends DefaultHoodieConfig {
-    public static final String CLEANER_POLICY_PROP = "hoodie.cleaner.policy";
-    private static final String DEFAULT_CLEANER_POLICY =
-        HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
 
-    public static final String AUTO_CLEAN_PROP = "hoodie.clean.automatic";
-    private static final String DEFAULT_AUTO_CLEAN = "true";
+  public static final String CLEANER_POLICY_PROP = "hoodie.cleaner.policy";
+  private static final String DEFAULT_CLEANER_POLICY =
+      HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();
 
-    // Turn on inline compaction - after fw delta commits a inline compaction will be run
-    public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline";
-    private static final String DEFAULT_INLINE_COMPACT = "true";
+  public static final String AUTO_CLEAN_PROP = "hoodie.clean.automatic";
+  private static final String DEFAULT_AUTO_CLEAN = "true";
 
-    // Run a compaction every N delta commits
-    public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits";
-    private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "10";
+  // Turn on inline compaction - after fw delta commits a inline compaction will be run
+  public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline";
+  private static final String DEFAULT_INLINE_COMPACT = "true";
 
-    public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
-        "hoodie.cleaner.fileversions.retained";
-    private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
+  // Run a compaction every N delta commits
+  public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits";
+  private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "10";
 
-    public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained";
-    private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "24";
+  public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
+      "hoodie.cleaner.fileversions.retained";
+  private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
 
-    public static final String MAX_COMMITS_TO_KEEP = "hoodie.keep.max.commits";
-    private static final String DEFAULT_MAX_COMMITS_TO_KEEP = String.valueOf(128);
-    public static final String MIN_COMMITS_TO_KEEP = "hoodie.keep.min.commits";
-    private static final String DEFAULT_MIN_COMMITS_TO_KEEP = String.valueOf(96);
-    // Upsert uses this file size to compact new data onto existing files..
-    public static final String PARQUET_SMALL_FILE_LIMIT_BYTES = "hoodie.parquet.small.file.limit";
-    // Turned off by default
-    public static final String DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES = String.valueOf(0);
+  public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained";
+  private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "24";
+
+  public static final String MAX_COMMITS_TO_KEEP = "hoodie.keep.max.commits";
+  private static final String DEFAULT_MAX_COMMITS_TO_KEEP = String.valueOf(128);
+  public static final String MIN_COMMITS_TO_KEEP = "hoodie.keep.min.commits";
+  private static final String DEFAULT_MIN_COMMITS_TO_KEEP = String.valueOf(96);
+  // Upsert uses this file size to compact new data onto existing files..
+  public static final String PARQUET_SMALL_FILE_LIMIT_BYTES = "hoodie.parquet.small.file.limit";
+  // Turned off by default
+  public static final String DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES = String.valueOf(0);
 
 
-    /** Configs related to specific table types **/
-    // Number of inserts, that will be put each partition/bucket for writing
-    public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert.split.size";
-    // The rationale to pick the insert parallelism is the following. Writing out 100MB files,
-    // with atleast 1kb records, means 100K records per file. we just overprovision to 500K
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000);
+  /**
+   * Configs related to specific table types
+   **/
+  // Number of inserts, that will be put each partition/bucket for writing
+  public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert.split.size";
+  // The rationale to pick the insert parallelism is the following. Writing out 100MB files,
+  // with atleast 1kb records, means 100K records per file. we just overprovision to 500K
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000);
 
-    // Config to control whether we control insert split sizes automatically based on average record sizes
-    public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert.auto.split";
-    // its off by default
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(false);
+  // Config to control whether we control insert split sizes automatically based on average record sizes
+  public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert.auto.split";
+  // its off by default
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(false);
 
 
-    // This value is used as a guessimate for the record size, if we can't determine this from previous commits
-    public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite.record.size.estimate";
-    // Used to determine how much more can be packed into a small file, before it exceeds the size limit.
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String.valueOf(1024);
+  // This value is used as a guessimate for the record size, if we can't determine this from previous commits
+  public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite.record.size.estimate";
+  // Used to determine how much more can be packed into a small file, before it exceeds the size limit.
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String
+      .valueOf(1024);
 
-    public static final String CLEANER_PARALLELISM = "hoodie.cleaner.parallelism";
-    public static final String DEFAULT_CLEANER_PARALLELISM = String.valueOf(200);
+  public static final String CLEANER_PARALLELISM = "hoodie.cleaner.parallelism";
+  public static final String DEFAULT_CLEANER_PARALLELISM = String.valueOf(200);
 
-    public static final String TARGET_IO_PER_COMPACTION_IN_MB_PROP = "hoodie.compaction.target.io";
-    // 500GB of target IO per compaction (both read and write)
-    public static final String DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB = String.valueOf(500 * 1024);
+  public static final String TARGET_IO_PER_COMPACTION_IN_MB_PROP = "hoodie.compaction.target.io";
+  // 500GB of target IO per compaction (both read and write)
+  public static final String DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB = String.valueOf(500 * 1024);
 
-    public static final String COMPACTION_STRATEGY_PROP = "hoodie.compaction.strategy";
-    // 200GB of target IO per compaction
-    public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class.getName();
+  public static final String COMPACTION_STRATEGY_PROP = "hoodie.compaction.strategy";
+  // 200GB of target IO per compaction
+  public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class
+      .getName();
 
-    // used to merge records written to log file
-    public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
-    public static final String PAYLOAD_CLASS = "hoodie.compaction.payload.class";
+  // used to merge records written to log file
+  public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
+  public static final String PAYLOAD_CLASS = "hoodie.compaction.payload.class";
 
-    private HoodieCompactionConfig(Properties props) {
-        super(props);
+  private HoodieCompactionConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieCompactionConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
     }
 
-    public static HoodieCompactionConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
     }
 
-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-
-        public Builder withAutoClean(Boolean autoClean) {
-            props.setProperty(AUTO_CLEAN_PROP, String.valueOf(autoClean));
-            return this;
-        }
-
-        public Builder withInlineCompaction(Boolean inlineCompaction) {
-            props.setProperty(INLINE_COMPACT_PROP, String.valueOf(inlineCompaction));
-            return this;
-        }
-
-        public Builder inlineCompactionEvery(int deltaCommits) {
-            props.setProperty(INLINE_COMPACT_PROP, String.valueOf(deltaCommits));
-            return this;
-        }
-
-        public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
-            props.setProperty(CLEANER_POLICY_PROP, policy.name());
-            return this;
-        }
-
-        public Builder retainFileVersions(int fileVersionsRetained) {
-            props.setProperty(CLEANER_FILE_VERSIONS_RETAINED_PROP,
-                String.valueOf(fileVersionsRetained));
-            return this;
-        }
-
-        public Builder retainCommits(int commitsRetained) {
-            props.setProperty(CLEANER_COMMITS_RETAINED_PROP, String.valueOf(commitsRetained));
-            return this;
-        }
-
-        public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
-            props.setProperty(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
-            props.setProperty(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
-            return this;
-        }
-
-        public Builder compactionSmallFileSize(long smallFileLimitBytes) {
-            props.setProperty(PARQUET_SMALL_FILE_LIMIT_BYTES, String.valueOf(smallFileLimitBytes));
-            return this;
-        }
-
-        public Builder insertSplitSize(int insertSplitSize) {
-            props.setProperty(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, String.valueOf(insertSplitSize));
-            return this;
-        }
-
-        public Builder autoTuneInsertSplits(boolean autoTuneInsertSplits) {
-            props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, String.valueOf(autoTuneInsertSplits));
-            return this;
-        }
-
-        public Builder approxRecordSize(int recordSizeEstimate) {
-            props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, String.valueOf(recordSizeEstimate));
-            return this;
-        }
-
-        public Builder withCleanerParallelism(int cleanerParallelism) {
-            props.setProperty(CLEANER_PARALLELISM, String.valueOf(cleanerParallelism));
-            return this;
-        }
-
-        public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
-            props.setProperty(COMPACTION_STRATEGY_PROP, compactionStrategy.getClass().getName());
-            return this;
-        }
-
-        public Builder withPayloadClass(String payloadClassName) {
-            props.setProperty(PAYLOAD_CLASS, payloadClassName);
-            return this;
-        }
-
-        public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
-            props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP, String.valueOf(targetIOPerCompactionInMB));
-            return this;
-        }
-
-        public HoodieCompactionConfig build() {
-            HoodieCompactionConfig config = new HoodieCompactionConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP),
-                    AUTO_CLEAN_PROP, DEFAULT_AUTO_CLEAN);
-            setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_PROP),
-                INLINE_COMPACT_PROP, DEFAULT_INLINE_COMPACT);
-            setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP),
-                INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_POLICY_PROP),
-                CLEANER_POLICY_PROP, DEFAULT_CLEANER_POLICY);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_FILE_VERSIONS_RETAINED_PROP),
-                CLEANER_FILE_VERSIONS_RETAINED_PROP, DEFAULT_CLEANER_FILE_VERSIONS_RETAINED);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP),
-                CLEANER_COMMITS_RETAINED_PROP, DEFAULT_CLEANER_COMMITS_RETAINED);
-            setDefaultOnCondition(props, !props.containsKey(MAX_COMMITS_TO_KEEP),
-                MAX_COMMITS_TO_KEEP, DEFAULT_MAX_COMMITS_TO_KEEP);
-            setDefaultOnCondition(props, !props.containsKey(MIN_COMMITS_TO_KEEP),
-                MIN_COMMITS_TO_KEEP, DEFAULT_MIN_COMMITS_TO_KEEP);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES),
-                PARQUET_SMALL_FILE_LIMIT_BYTES, DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE),
-                COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS),
-                COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE),
-                COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_PARALLELISM),
-                CLEANER_PARALLELISM, DEFAULT_CLEANER_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP),
-                COMPACTION_STRATEGY_PROP, DEFAULT_COMPACTION_STRATEGY);
-            setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS),
-                    PAYLOAD_CLASS, DEFAULT_PAYLOAD_CLASS);
-            setDefaultOnCondition(props, !props.containsKey(TARGET_IO_PER_COMPACTION_IN_MB_PROP),
-                TARGET_IO_PER_COMPACTION_IN_MB_PROP, DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB);
-
-            HoodieCleaningPolicy.valueOf(props.getProperty(CLEANER_POLICY_PROP));
-            Preconditions.checkArgument(
-                Integer.parseInt(props.getProperty(MAX_COMMITS_TO_KEEP)) > Integer
-                    .parseInt(props.getProperty(MIN_COMMITS_TO_KEEP)));
-            return config;
-        }
 
+    public Builder withAutoClean(Boolean autoClean) {
+      props.setProperty(AUTO_CLEAN_PROP, String.valueOf(autoClean));
+      return this;
     }
+
+    public Builder withInlineCompaction(Boolean inlineCompaction) {
+      props.setProperty(INLINE_COMPACT_PROP, String.valueOf(inlineCompaction));
+      return this;
+    }
+
+    public Builder inlineCompactionEvery(int deltaCommits) {
+      props.setProperty(INLINE_COMPACT_PROP, String.valueOf(deltaCommits));
+      return this;
+    }
+
+    public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
+      props.setProperty(CLEANER_POLICY_PROP, policy.name());
+      return this;
+    }
+
+    public Builder retainFileVersions(int fileVersionsRetained) {
+      props.setProperty(CLEANER_FILE_VERSIONS_RETAINED_PROP,
+          String.valueOf(fileVersionsRetained));
+      return this;
+    }
+
+    public Builder retainCommits(int commitsRetained) {
+      props.setProperty(CLEANER_COMMITS_RETAINED_PROP, String.valueOf(commitsRetained));
+      return this;
+    }
+
+    public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
+      props.setProperty(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
+      props.setProperty(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
+      return this;
+    }
+
+    public Builder compactionSmallFileSize(long smallFileLimitBytes) {
+      props.setProperty(PARQUET_SMALL_FILE_LIMIT_BYTES, String.valueOf(smallFileLimitBytes));
+      return this;
+    }
+
+    public Builder insertSplitSize(int insertSplitSize) {
+      props.setProperty(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, String.valueOf(insertSplitSize));
+      return this;
+    }
+
+    public Builder autoTuneInsertSplits(boolean autoTuneInsertSplits) {
+      props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS,
+          String.valueOf(autoTuneInsertSplits));
+      return this;
+    }
+
+    public Builder approxRecordSize(int recordSizeEstimate) {
+      props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+          String.valueOf(recordSizeEstimate));
+      return this;
+    }
+
+    public Builder withCleanerParallelism(int cleanerParallelism) {
+      props.setProperty(CLEANER_PARALLELISM, String.valueOf(cleanerParallelism));
+      return this;
+    }
+
+    public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
+      props.setProperty(COMPACTION_STRATEGY_PROP, compactionStrategy.getClass().getName());
+      return this;
+    }
+
+    public Builder withPayloadClass(String payloadClassName) {
+      props.setProperty(PAYLOAD_CLASS, payloadClassName);
+      return this;
+    }
+
+    public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
+      props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP,
+          String.valueOf(targetIOPerCompactionInMB));
+      return this;
+    }
+
+    public HoodieCompactionConfig build() {
+      HoodieCompactionConfig config = new HoodieCompactionConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP),
+          AUTO_CLEAN_PROP, DEFAULT_AUTO_CLEAN);
+      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_PROP),
+          INLINE_COMPACT_PROP, DEFAULT_INLINE_COMPACT);
+      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP),
+          INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_POLICY_PROP),
+          CLEANER_POLICY_PROP, DEFAULT_CLEANER_POLICY);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_FILE_VERSIONS_RETAINED_PROP),
+          CLEANER_FILE_VERSIONS_RETAINED_PROP, DEFAULT_CLEANER_FILE_VERSIONS_RETAINED);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP),
+          CLEANER_COMMITS_RETAINED_PROP, DEFAULT_CLEANER_COMMITS_RETAINED);
+      setDefaultOnCondition(props, !props.containsKey(MAX_COMMITS_TO_KEEP),
+          MAX_COMMITS_TO_KEEP, DEFAULT_MAX_COMMITS_TO_KEEP);
+      setDefaultOnCondition(props, !props.containsKey(MIN_COMMITS_TO_KEEP),
+          MIN_COMMITS_TO_KEEP, DEFAULT_MIN_COMMITS_TO_KEEP);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES),
+          PARQUET_SMALL_FILE_LIMIT_BYTES, DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE),
+          COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS),
+          COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE),
+          COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+          DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_PARALLELISM),
+          CLEANER_PARALLELISM, DEFAULT_CLEANER_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP),
+          COMPACTION_STRATEGY_PROP, DEFAULT_COMPACTION_STRATEGY);
+      setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS),
+          PAYLOAD_CLASS, DEFAULT_PAYLOAD_CLASS);
+      setDefaultOnCondition(props, !props.containsKey(TARGET_IO_PER_COMPACTION_IN_MB_PROP),
+          TARGET_IO_PER_COMPACTION_IN_MB_PROP, DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB);
+
+      HoodieCleaningPolicy.valueOf(props.getProperty(CLEANER_POLICY_PROP));
+      Preconditions.checkArgument(
+          Integer.parseInt(props.getProperty(MAX_COMMITS_TO_KEEP)) > Integer
+              .parseInt(props.getProperty(MIN_COMMITS_TO_KEEP)));
+      return config;
+    }
+
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java
index 9a5fefbbe..a7a722de1 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java
@@ -16,14 +16,12 @@
 
 package com.uber.hoodie.config;
 
-import com.google.common.base.Preconditions;
 import com.uber.hoodie.index.HoodieIndex;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
 
 /**
  * Indexing related config
@@ -31,123 +29,124 @@ import java.util.Properties;
 @Immutable
 public class HoodieIndexConfig extends DefaultHoodieConfig {
 
-    public static final String INDEX_TYPE_PROP = "hoodie.index.type";
-    public static final String DEFAULT_INDEX_TYPE = HoodieIndex.IndexType.BLOOM.name();
+  public static final String INDEX_TYPE_PROP = "hoodie.index.type";
+  public static final String DEFAULT_INDEX_TYPE = HoodieIndex.IndexType.BLOOM.name();
 
-    // *****  Bloom Index configs *****
-    public static final String BLOOM_FILTER_NUM_ENTRIES = "hoodie.index.bloom.num_entries";
-    public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = "60000";
-    public static final String BLOOM_FILTER_FPP = "hoodie.index.bloom.fpp";
-    public static final String DEFAULT_BLOOM_FILTER_FPP = "0.000000001";
-    public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
-    // Disable explicit bloom index parallelism setting by default - hoodie auto computes
-    public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
-    public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges";
-    public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
-    public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
-    public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
+  // *****  Bloom Index configs *****
+  public static final String BLOOM_FILTER_NUM_ENTRIES = "hoodie.index.bloom.num_entries";
+  public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = "60000";
+  public static final String BLOOM_FILTER_FPP = "hoodie.index.bloom.fpp";
+  public static final String DEFAULT_BLOOM_FILTER_FPP = "0.000000001";
+  public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
+  // Disable explicit bloom index parallelism setting by default - hoodie auto computes
+  public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
+  public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges";
+  public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
+  public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
+  public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
 
-    // ***** HBase Index Configs *****
-    public final static String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum";
-    public final static String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport";
-    public final static String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table";
+  // ***** HBase Index Configs *****
+  public final static String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum";
+  public final static String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport";
+  public final static String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table";
 
-    // ***** Bucketed Index Configs *****
-    public final static String BUCKETED_INDEX_NUM_BUCKETS_PROP = "hoodie.index.bucketed.numbuckets";
+  // ***** Bucketed Index Configs *****
+  public final static String BUCKETED_INDEX_NUM_BUCKETS_PROP = "hoodie.index.bucketed.numbuckets";
 
-    private HoodieIndexConfig(Properties props) {
-        super(props);
+  private HoodieIndexConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieIndexConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
     }
 
-    public static HoodieIndexConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
     }
 
-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-        public Builder withIndexType(HoodieIndex.IndexType indexType) {
-            props.setProperty(INDEX_TYPE_PROP, indexType.name());
-            return this;
-        }
-
-        public Builder bloomFilterNumEntries(int numEntries) {
-            props.setProperty(BLOOM_FILTER_NUM_ENTRIES, String.valueOf(numEntries));
-            return this;
-        }
-
-        public Builder bloomFilterFPP(double fpp) {
-            props.setProperty(BLOOM_FILTER_FPP, String.valueOf(fpp));
-            return this;
-        }
-
-        public Builder hbaseZkQuorum(String zkString) {
-            props.setProperty(HBASE_ZKQUORUM_PROP, zkString);
-            return this;
-        }
-
-        public Builder hbaseZkPort(int port) {
-            props.setProperty(HBASE_ZKPORT_PROP, String.valueOf(port));
-            return this;
-        }
-
-        public Builder hbaseTableName(String tableName) {
-            props.setProperty(HBASE_TABLENAME_PROP, tableName);
-            return this;
-        }
-
-        public Builder bloomIndexParallelism(int parallelism) {
-            props.setProperty(BLOOM_INDEX_PARALLELISM_PROP, String.valueOf(parallelism));
-            return this;
-        }
-
-        public Builder bloomIndexPruneByRanges(boolean pruneRanges) {
-            props.setProperty(BLOOM_INDEX_PRUNE_BY_RANGES_PROP, String.valueOf(pruneRanges));
-            return this;
-        }
-
-        public Builder bloomIndexUseCaching(boolean useCaching) {
-            props.setProperty(BLOOM_INDEX_USE_CACHING_PROP, String.valueOf(useCaching));
-            return this;
-        }
-
-        public Builder numBucketsPerPartition(int numBuckets) {
-            props.setProperty(BUCKETED_INDEX_NUM_BUCKETS_PROP, String.valueOf(numBuckets));
-            return this;
-        }
-
-        public HoodieIndexConfig build() {
-            HoodieIndexConfig config = new HoodieIndexConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP),
-                INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES),
-                BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP),
-                BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP),
-                    BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP),
-                    BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP),
-                    BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
-            // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
-            HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
-            return config;
-        }
+    public Builder withIndexType(HoodieIndex.IndexType indexType) {
+      props.setProperty(INDEX_TYPE_PROP, indexType.name());
+      return this;
     }
+
+    public Builder bloomFilterNumEntries(int numEntries) {
+      props.setProperty(BLOOM_FILTER_NUM_ENTRIES, String.valueOf(numEntries));
+      return this;
+    }
+
+    public Builder bloomFilterFPP(double fpp) {
+      props.setProperty(BLOOM_FILTER_FPP, String.valueOf(fpp));
+      return this;
+    }
+
+    public Builder hbaseZkQuorum(String zkString) {
+      props.setProperty(HBASE_ZKQUORUM_PROP, zkString);
+      return this;
+    }
+
+    public Builder hbaseZkPort(int port) {
+      props.setProperty(HBASE_ZKPORT_PROP, String.valueOf(port));
+      return this;
+    }
+
+    public Builder hbaseTableName(String tableName) {
+      props.setProperty(HBASE_TABLENAME_PROP, tableName);
+      return this;
+    }
+
+    public Builder bloomIndexParallelism(int parallelism) {
+      props.setProperty(BLOOM_INDEX_PARALLELISM_PROP, String.valueOf(parallelism));
+      return this;
+    }
+
+    public Builder bloomIndexPruneByRanges(boolean pruneRanges) {
+      props.setProperty(BLOOM_INDEX_PRUNE_BY_RANGES_PROP, String.valueOf(pruneRanges));
+      return this;
+    }
+
+    public Builder bloomIndexUseCaching(boolean useCaching) {
+      props.setProperty(BLOOM_INDEX_USE_CACHING_PROP, String.valueOf(useCaching));
+      return this;
+    }
+
+    public Builder numBucketsPerPartition(int numBuckets) {
+      props.setProperty(BUCKETED_INDEX_NUM_BUCKETS_PROP, String.valueOf(numBuckets));
+      return this;
+    }
+
+    public HoodieIndexConfig build() {
+      HoodieIndexConfig config = new HoodieIndexConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP),
+          INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES),
+          BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP),
+          BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP),
+          BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP),
+          BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP),
+          BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
+      // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
+      HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
+      return config;
+    }
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java
index 0ef107de5..f3ef71077 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java
@@ -17,12 +17,11 @@
 package com.uber.hoodie.config;
 
 import com.uber.hoodie.metrics.MetricsReporterType;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
 
 /**
  * Fetch the configurations used by the Metrics system.
@@ -30,89 +29,90 @@ import java.util.Properties;
 @Immutable
 public class HoodieMetricsConfig extends DefaultHoodieConfig {
 
-    public final static String METRIC_PREFIX = "hoodie.metrics";
-    public final static String METRICS_ON = METRIC_PREFIX + ".on";
-    public final static boolean DEFAULT_METRICS_ON = false;
-    public final static String METRICS_REPORTER_TYPE = METRIC_PREFIX + ".reporter.type";
-    public final static MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE =
-        MetricsReporterType.GRAPHITE;
+  public final static String METRIC_PREFIX = "hoodie.metrics";
+  public final static String METRICS_ON = METRIC_PREFIX + ".on";
+  public final static boolean DEFAULT_METRICS_ON = false;
+  public final static String METRICS_REPORTER_TYPE = METRIC_PREFIX + ".reporter.type";
+  public final static MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE =
+      MetricsReporterType.GRAPHITE;
 
-    // Graphite
-    public final static String GRAPHITE_PREFIX = METRIC_PREFIX + ".graphite";
-    public final static String GRAPHITE_SERVER_HOST = GRAPHITE_PREFIX + ".host";
-    public final static String DEFAULT_GRAPHITE_SERVER_HOST = "localhost";
+  // Graphite
+  public final static String GRAPHITE_PREFIX = METRIC_PREFIX + ".graphite";
+  public final static String GRAPHITE_SERVER_HOST = GRAPHITE_PREFIX + ".host";
+  public final static String DEFAULT_GRAPHITE_SERVER_HOST = "localhost";
 
-    public final static String GRAPHITE_SERVER_PORT = GRAPHITE_PREFIX + ".port";
-    public final static int DEFAULT_GRAPHITE_SERVER_PORT = 4756;
+  public final static String GRAPHITE_SERVER_PORT = GRAPHITE_PREFIX + ".port";
+  public final static int DEFAULT_GRAPHITE_SERVER_PORT = 4756;
 
-    public final static String GRAPHITE_METRIC_PREFIX = GRAPHITE_PREFIX + ".metric.prefix";
+  public final static String GRAPHITE_METRIC_PREFIX = GRAPHITE_PREFIX + ".metric.prefix";
 
-    private HoodieMetricsConfig(Properties props) {
-        super(props);
+  private HoodieMetricsConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieMetricsConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
     }
 
-    public static HoodieMetricsConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
     }
 
-    public static class Builder {
-        private final Properties props = new Properties();
 
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-
-        public Builder on(boolean metricsOn) {
-            props.setProperty(METRICS_ON, String.valueOf(metricsOn));
-            return this;
-        }
-
-        public Builder withReporterType(String reporterType) {
-            props.setProperty(METRICS_REPORTER_TYPE, reporterType);
-            return this;
-        }
-
-        public Builder toGraphiteHost(String host) {
-            props.setProperty(GRAPHITE_SERVER_HOST, host);
-            return this;
-        }
-
-        public Builder onGraphitePort(int port) {
-            props.setProperty(GRAPHITE_SERVER_PORT, String.valueOf(port));
-            return this;
-        }
-
-        public Builder usePrefix(String prefix) {
-            props.setProperty(GRAPHITE_METRIC_PREFIX, prefix);
-            return this;
-        }
-
-        public HoodieMetricsConfig build() {
-            HoodieMetricsConfig config = new HoodieMetricsConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON,
-                String.valueOf(DEFAULT_METRICS_ON));
-            setDefaultOnCondition(props, !props.containsKey(METRICS_REPORTER_TYPE),
-                METRICS_REPORTER_TYPE, DEFAULT_METRICS_REPORTER_TYPE.name());
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_HOST),
-                GRAPHITE_SERVER_HOST, DEFAULT_GRAPHITE_SERVER_HOST);
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
-                GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
-                GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
-            return config;
-        }
+    public Builder on(boolean metricsOn) {
+      props.setProperty(METRICS_ON, String.valueOf(metricsOn));
+      return this;
     }
 
+    public Builder withReporterType(String reporterType) {
+      props.setProperty(METRICS_REPORTER_TYPE, reporterType);
+      return this;
+    }
+
+    public Builder toGraphiteHost(String host) {
+      props.setProperty(GRAPHITE_SERVER_HOST, host);
+      return this;
+    }
+
+    public Builder onGraphitePort(int port) {
+      props.setProperty(GRAPHITE_SERVER_PORT, String.valueOf(port));
+      return this;
+    }
+
+    public Builder usePrefix(String prefix) {
+      props.setProperty(GRAPHITE_METRIC_PREFIX, prefix);
+      return this;
+    }
+
+    public HoodieMetricsConfig build() {
+      HoodieMetricsConfig config = new HoodieMetricsConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON,
+          String.valueOf(DEFAULT_METRICS_ON));
+      setDefaultOnCondition(props, !props.containsKey(METRICS_REPORTER_TYPE),
+          METRICS_REPORTER_TYPE, DEFAULT_METRICS_REPORTER_TYPE.name());
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_HOST),
+          GRAPHITE_SERVER_HOST, DEFAULT_GRAPHITE_SERVER_HOST);
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
+          GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
+          GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
+      return config;
+    }
+  }
+
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
index b9ce48aca..cc4e6d465 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
@@ -16,75 +16,77 @@
 
 package com.uber.hoodie.config;
 
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
 
 /**
  * Storage related config
  */
 @Immutable
 public class HoodieStorageConfig extends DefaultHoodieConfig {
-    public static final String PARQUET_FILE_MAX_BYTES = "hoodie.parquet.max.file.size";
-    public static final String DEFAULT_PARQUET_FILE_MAX_BYTES = String.valueOf(120 * 1024 * 1024);
-    public static final String PARQUET_BLOCK_SIZE_BYTES = "hoodie.parquet.block.size";
-    public static final String DEFAULT_PARQUET_BLOCK_SIZE_BYTES = DEFAULT_PARQUET_FILE_MAX_BYTES;
-    public static final String PARQUET_PAGE_SIZE_BYTES = "hoodie.parquet.page.size";
-    public static final String DEFAULT_PARQUET_PAGE_SIZE_BYTES = String.valueOf(1 * 1024 * 1024);
 
-    private HoodieStorageConfig(Properties props) {
-        super(props);
+  public static final String PARQUET_FILE_MAX_BYTES = "hoodie.parquet.max.file.size";
+  public static final String DEFAULT_PARQUET_FILE_MAX_BYTES = String.valueOf(120 * 1024 * 1024);
+  public static final String PARQUET_BLOCK_SIZE_BYTES = "hoodie.parquet.block.size";
+  public static final String DEFAULT_PARQUET_BLOCK_SIZE_BYTES = DEFAULT_PARQUET_FILE_MAX_BYTES;
+  public static final String PARQUET_PAGE_SIZE_BYTES = "hoodie.parquet.page.size";
+  public static final String DEFAULT_PARQUET_PAGE_SIZE_BYTES = String.valueOf(1 * 1024 * 1024);
+
+  private HoodieStorageConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieStorageConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
     }
 
-    public static HoodieStorageConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
     }
 
-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-        public Builder limitFileSize(int maxFileSize) {
-            props.setProperty(PARQUET_FILE_MAX_BYTES, String.valueOf(maxFileSize));
-            return this;
-        }
-
-        public Builder parquetBlockSize(int blockSize) {
-            props.setProperty(PARQUET_BLOCK_SIZE_BYTES, String.valueOf(blockSize));
-            return this;
-        }
-
-        public Builder parquetPageSize(int pageSize) {
-            props.setProperty(PARQUET_PAGE_SIZE_BYTES, String.valueOf(pageSize));
-            return this;
-        }
-
-        public HoodieStorageConfig build() {
-            HoodieStorageConfig config = new HoodieStorageConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES),
-                PARQUET_FILE_MAX_BYTES, DEFAULT_PARQUET_FILE_MAX_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES),
-                PARQUET_BLOCK_SIZE_BYTES, DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES),
-                PARQUET_PAGE_SIZE_BYTES, DEFAULT_PARQUET_PAGE_SIZE_BYTES);
-            return config;
-        }
+    public Builder limitFileSize(int maxFileSize) {
+      props.setProperty(PARQUET_FILE_MAX_BYTES, String.valueOf(maxFileSize));
+      return this;
     }
 
+    public Builder parquetBlockSize(int blockSize) {
+      props.setProperty(PARQUET_BLOCK_SIZE_BYTES, String.valueOf(blockSize));
+      return this;
+    }
+
+    public Builder parquetPageSize(int pageSize) {
+      props.setProperty(PARQUET_PAGE_SIZE_BYTES, String.valueOf(pageSize));
+      return this;
+    }
+
+    public HoodieStorageConfig build() {
+      HoodieStorageConfig config = new HoodieStorageConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES),
+          PARQUET_FILE_MAX_BYTES, DEFAULT_PARQUET_FILE_MAX_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES),
+          PARQUET_BLOCK_SIZE_BYTES, DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES),
+          PARQUET_PAGE_SIZE_BYTES, DEFAULT_PARQUET_PAGE_SIZE_BYTES);
+      return config;
+    }
+  }
+
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
index b954a7bbf..b2efc8254 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
@@ -24,395 +24,401 @@ import com.uber.hoodie.common.util.ReflectionUtils;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import com.uber.hoodie.metrics.MetricsReporterType;
-import org.apache.spark.storage.StorageLevel;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Map;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
+import org.apache.spark.storage.StorageLevel;
 
 /**
  * Class storing configs for the {@link com.uber.hoodie.HoodieWriteClient}
  */
 @Immutable
 public class HoodieWriteConfig extends DefaultHoodieConfig {
-    private static final String BASE_PATH_PROP = "hoodie.base.path";
-    private static final String AVRO_SCHEMA = "hoodie.avro.schema";
-    public static final String TABLE_NAME = "hoodie.table.name";
-    private static final String DEFAULT_PARALLELISM = "200";
-    private static final String INSERT_PARALLELISM = "hoodie.insert.shuffle.parallelism";
-    private static final String BULKINSERT_PARALLELISM = "hoodie.bulkinsert.shuffle.parallelism";
-    private static final String UPSERT_PARALLELISM = "hoodie.upsert.shuffle.parallelism";
-    private static final String COMBINE_BEFORE_INSERT_PROP = "hoodie.combine.before.insert";
-    private static final String DEFAULT_COMBINE_BEFORE_INSERT = "false";
-    private static final String COMBINE_BEFORE_UPSERT_PROP = "hoodie.combine.before.upsert";
-    private static final String DEFAULT_COMBINE_BEFORE_UPSERT = "true";
-    private static final String WRITE_STATUS_STORAGE_LEVEL = "hoodie.write.status.storage.level";
-    private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
-    private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
-    private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
-    private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
-    private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
-    private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
-    private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
 
-    private HoodieWriteConfig(Properties props) {
-        super(props);
-    }
+  private static final String BASE_PATH_PROP = "hoodie.base.path";
+  private static final String AVRO_SCHEMA = "hoodie.avro.schema";
+  public static final String TABLE_NAME = "hoodie.table.name";
+  private static final String DEFAULT_PARALLELISM = "200";
+  private static final String INSERT_PARALLELISM = "hoodie.insert.shuffle.parallelism";
+  private static final String BULKINSERT_PARALLELISM = "hoodie.bulkinsert.shuffle.parallelism";
+  private static final String UPSERT_PARALLELISM = "hoodie.upsert.shuffle.parallelism";
+  private static final String COMBINE_BEFORE_INSERT_PROP = "hoodie.combine.before.insert";
+  private static final String DEFAULT_COMBINE_BEFORE_INSERT = "false";
+  private static final String COMBINE_BEFORE_UPSERT_PROP = "hoodie.combine.before.upsert";
+  private static final String DEFAULT_COMBINE_BEFORE_UPSERT = "true";
+  private static final String WRITE_STATUS_STORAGE_LEVEL = "hoodie.write.status.storage.level";
+  private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
+  private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
+  private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
+  private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
+  private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
+  private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
+  private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();
 
-    /**
-     * base properties
-     **/
-    public String getBasePath() {
-        return props.getProperty(BASE_PATH_PROP);
-    }
+  private HoodieWriteConfig(Properties props) {
+    super(props);
+  }
 
-    public String getSchema() {
-        return props.getProperty(AVRO_SCHEMA);
-    }
+  /**
+   * base properties
+   **/
+  public String getBasePath() {
+    return props.getProperty(BASE_PATH_PROP);
+  }
 
-    public String getTableName() {
-        return props.getProperty(TABLE_NAME);
-    }
+  public String getSchema() {
+    return props.getProperty(AVRO_SCHEMA);
+  }
 
-    public Boolean shouldAutoCommit() {
-        return Boolean.parseBoolean(props.getProperty(HOODIE_AUTO_COMMIT_PROP));
-    }
+  public String getTableName() {
+    return props.getProperty(TABLE_NAME);
+  }
 
-    public Boolean shouldAssumeDatePartitioning() {
-        return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
-    }
+  public Boolean shouldAutoCommit() {
+    return Boolean.parseBoolean(props.getProperty(HOODIE_AUTO_COMMIT_PROP));
+  }
 
-    public int getBulkInsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
-    }
+  public Boolean shouldAssumeDatePartitioning() {
+    return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
+  }
 
-    public int getInsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(INSERT_PARALLELISM));
-    }
+  public int getBulkInsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
+  }
 
-    public int getUpsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(UPSERT_PARALLELISM));
-    }
+  public int getInsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(INSERT_PARALLELISM));
+  }
 
-    public boolean shouldCombineBeforeInsert() {
-        return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_INSERT_PROP));
-    }
+  public int getUpsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(UPSERT_PARALLELISM));
+  }
 
-    public boolean shouldCombineBeforeUpsert() {
-        return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_UPSERT_PROP));
-    }
+  public boolean shouldCombineBeforeInsert() {
+    return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_INSERT_PROP));
+  }
 
-    public StorageLevel getWriteStatusStorageLevel() {
-        return StorageLevel.fromString(props.getProperty(WRITE_STATUS_STORAGE_LEVEL));
-    }
+  public boolean shouldCombineBeforeUpsert() {
+    return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_UPSERT_PROP));
+  }
 
-    public String getWriteStatusClassName() {
-        return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
-    }
+  public StorageLevel getWriteStatusStorageLevel() {
+    return StorageLevel.fromString(props.getProperty(WRITE_STATUS_STORAGE_LEVEL));
+  }
 
-    /**
-     * compaction properties
-     **/
-    public HoodieCleaningPolicy getCleanerPolicy() {
-        return HoodieCleaningPolicy
-            .valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
-    }
+  public String getWriteStatusClassName() {
+    return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
+  }
 
-    public int getCleanerFileVersionsRetained() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
-    }
+  /**
+   * compaction properties
+   **/
+  public HoodieCleaningPolicy getCleanerPolicy() {
+    return HoodieCleaningPolicy
+        .valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
+  }
 
-    public int getCleanerCommitsRetained() {
-        return Integer
-            .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
-    }
+  public int getCleanerFileVersionsRetained() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
+  }
 
-    public int getMaxCommitsToKeep() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
-    }
+  public int getCleanerCommitsRetained() {
+    return Integer
+        .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
+  }
 
-    public int getMinCommitsToKeep() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
-    }
+  public int getMaxCommitsToKeep() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
+  }
 
-    public int getParquetSmallFileLimit() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
-    }
+  public int getMinCommitsToKeep() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
+  }
 
-    public int getCopyOnWriteInsertSplitSize() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
-    }
+  public int getParquetSmallFileLimit() {
+    return Integer
+        .parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
+  }
 
-    public int getCopyOnWriteRecordSizeEstimate() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
-    }
+  public int getCopyOnWriteInsertSplitSize() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
+  }
 
-    public boolean shouldAutoTuneInsertSplits() {
-        return Boolean.parseBoolean(
-                props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
-    }
+  public int getCopyOnWriteRecordSizeEstimate() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
+  }
 
-    public int getCleanerParallelism() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_PARALLELISM));
-    }
+  public boolean shouldAutoTuneInsertSplits() {
+    return Boolean.parseBoolean(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
+  }
 
-    public boolean isAutoClean() {
-        return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.AUTO_CLEAN_PROP));
-    }
+  public int getCleanerParallelism() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_PARALLELISM));
+  }
 
-    public boolean isInlineCompaction() {
-        return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_PROP));
-    }
+  public boolean isAutoClean() {
+    return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.AUTO_CLEAN_PROP));
+  }
 
-    public int getInlineCompactDeltaCommitMax() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
-    }
+  public boolean isInlineCompaction() {
+    return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_PROP));
+  }
 
-    public CompactionStrategy getCompactionStrategy() {
-        return ReflectionUtils.loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
-    }
+  public int getInlineCompactDeltaCommitMax() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
+  }
 
-    public Long getTargetIOPerCompactionInMB() {
-        return Long.parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
-    }
+  public CompactionStrategy getCompactionStrategy() {
+    return ReflectionUtils
+        .loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
+  }
 
-    /**
-     * index properties
-     **/
-    public HoodieIndex.IndexType getIndexType() {
-        return HoodieIndex.IndexType.valueOf(props.getProperty(HoodieIndexConfig.INDEX_TYPE_PROP));
-    }
+  public Long getTargetIOPerCompactionInMB() {
+    return Long
+        .parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
+  }
 
-    public int getBloomFilterNumEntries() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES));
-    }
+  /**
+   * index properties
+   **/
+  public HoodieIndex.IndexType getIndexType() {
+    return HoodieIndex.IndexType.valueOf(props.getProperty(HoodieIndexConfig.INDEX_TYPE_PROP));
+  }
 
-    public double getBloomFilterFPP() {
-        return Double.parseDouble(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_FPP));
-    }
+  public int getBloomFilterNumEntries() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES));
+  }
 
-    public String getHbaseZkQuorum() {
-        return props.getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
-    }
+  public double getBloomFilterFPP() {
+    return Double.parseDouble(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_FPP));
+  }
 
-    public int getHbaseZkPort() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP));
-    }
+  public String getHbaseZkQuorum() {
+    return props.getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
+  }
 
-    public String getHbaseTableName() {
-        return props.getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
-    }
+  public int getHbaseZkPort() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP));
+  }
 
-    public int getBloomIndexParallelism() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PARALLELISM_PROP));
-    }
+  public String getHbaseTableName() {
+    return props.getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  }
 
-    public boolean getBloomIndexPruneByRanges() {
-        return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
-    }
+  public int getBloomIndexParallelism() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PARALLELISM_PROP));
+  }
 
-    public boolean getBloomIndexUseCaching() {
-        return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
-    }
+  public boolean getBloomIndexPruneByRanges() {
+    return Boolean
+        .parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
+  }
 
-    public int getNumBucketsPerPartition() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP));
-    }
+  public boolean getBloomIndexUseCaching() {
+    return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
+  }
 
-    /**
-     * storage properties
-     **/
-    public int getParquetMaxFileSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES));
-    }
+  public int getNumBucketsPerPartition() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP));
+  }
 
-    public int getParquetBlockSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_BLOCK_SIZE_BYTES));
-    }
+  /**
+   * storage properties
+   **/
+  public int getParquetMaxFileSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES));
+  }
 
-    public int getParquetPageSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_PAGE_SIZE_BYTES));
-    }
+  public int getParquetBlockSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_BLOCK_SIZE_BYTES));
+  }
 
-    /**
-     * metrics properties
-     **/
-    public boolean isMetricsOn() {
-        return Boolean.parseBoolean(props.getProperty(HoodieMetricsConfig.METRICS_ON));
-    }
+  public int getParquetPageSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_PAGE_SIZE_BYTES));
+  }
 
-    public MetricsReporterType getMetricsReporterType() {
-        return MetricsReporterType
-            .valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
-    }
+  /**
+   * metrics properties
+   **/
+  public boolean isMetricsOn() {
+    return Boolean.parseBoolean(props.getProperty(HoodieMetricsConfig.METRICS_ON));
+  }
 
-    public String getGraphiteServerHost() {
-        return props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_HOST);
-    }
+  public MetricsReporterType getMetricsReporterType() {
+    return MetricsReporterType
+        .valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
+  }
 
-    public int getGraphiteServerPort() {
-        return Integer.parseInt(props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_PORT));
-    }
+  public String getGraphiteServerHost() {
+    return props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_HOST);
+  }
 
-    public String getGraphiteMetricPrefix() {
-        return props.getProperty(HoodieMetricsConfig.GRAPHITE_METRIC_PREFIX);
-    }
+  public int getGraphiteServerPort() {
+    return Integer.parseInt(props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_PORT));
+  }
 
-    public static HoodieWriteConfig.Builder newBuilder() {
-        return new Builder();
-    }
+  public String getGraphiteMetricPrefix() {
+    return props.getProperty(HoodieMetricsConfig.GRAPHITE_METRIC_PREFIX);
+  }
 
+  public static HoodieWriteConfig.Builder newBuilder() {
+    return new Builder();
+  }
 
 
   public static class Builder {
-        private final Properties props = new Properties();
-        private boolean isIndexConfigSet = false;
-        private boolean isStorageConfigSet = false;
-        private boolean isCompactionConfigSet = false;
-        private boolean isMetricsConfigSet = false;
-        private boolean isAutoCommit = true;
 
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
+    private final Properties props = new Properties();
+    private boolean isIndexConfigSet = false;
+    private boolean isStorageConfigSet = false;
+    private boolean isCompactionConfigSet = false;
+    private boolean isMetricsConfigSet = false;
+    private boolean isAutoCommit = true;
 
-        public Builder fromInputStream(InputStream inputStream) throws IOException {
-            try {
-                this.props.load(inputStream);
-                return this;
-            } finally {
-                inputStream.close();
-            }
-        }
-
-        public Builder withProps(Map kvprops) {
-            props.putAll(kvprops);
-            return this;
-        }
-
-        public Builder withPath(String basePath) {
-            props.setProperty(BASE_PATH_PROP, basePath);
-            return this;
-        }
-
-        public Builder withSchema(String schemaStr) {
-            props.setProperty(AVRO_SCHEMA, schemaStr);
-            return this;
-        }
-
-        public Builder forTable(String tableName) {
-            props.setProperty(TABLE_NAME, tableName);
-            return this;
-        }
-
-        public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
-            props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
-            return this;
-        }
-
-        public Builder withParallelism(int insertShuffleParallelism, int upsertShuffleParallelism) {
-            props.setProperty(INSERT_PARALLELISM, String.valueOf(insertShuffleParallelism));
-            props.setProperty(UPSERT_PARALLELISM, String.valueOf(upsertShuffleParallelism));
-            return this;
-        }
-
-        public Builder combineInput(boolean onInsert, boolean onUpsert) {
-            props.setProperty(COMBINE_BEFORE_INSERT_PROP, String.valueOf(onInsert));
-            props.setProperty(COMBINE_BEFORE_UPSERT_PROP, String.valueOf(onUpsert));
-            return this;
-        }
-
-        public Builder withWriteStatusStorageLevel(String level) {
-            props.setProperty(WRITE_STATUS_STORAGE_LEVEL, level);
-            return this;
-        }
-
-        public Builder withIndexConfig(HoodieIndexConfig indexConfig) {
-            props.putAll(indexConfig.getProps());
-            isIndexConfigSet = true;
-            return this;
-        }
-
-        public Builder withStorageConfig(HoodieStorageConfig storageConfig) {
-            props.putAll(storageConfig.getProps());
-            isStorageConfigSet = true;
-            return this;
-        }
-
-        public Builder withCompactionConfig(HoodieCompactionConfig compactionConfig) {
-            props.putAll(compactionConfig.getProps());
-            isCompactionConfigSet = true;
-            return this;
-        }
-
-        public Builder withMetricsConfig(HoodieMetricsConfig metricsConfig) {
-            props.putAll(metricsConfig.getProps());
-            isMetricsConfigSet = true;
-            return this;
-        }
-
-        public Builder withAutoCommit(boolean autoCommit) {
-            props.setProperty(HOODIE_AUTO_COMMIT_PROP, String.valueOf(autoCommit));
-            return this;
-        }
-
-        public Builder withAssumeDatePartitioning(boolean assumeDatePartitioning) {
-            props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP, String.valueOf(assumeDatePartitioning));
-            return this;
-        }
-
-        public Builder withWriteStatusClass(Class<? extends WriteStatus> writeStatusClass) {
-            props.setProperty(HOODIE_WRITE_STATUS_CLASS_PROP, writeStatusClass.getName());
-            return this;
-        }
-
-        public HoodieWriteConfig build() {
-            HoodieWriteConfig config = new HoodieWriteConfig(props);
-            // Check for mandatory properties
-            Preconditions.checkArgument(config.getBasePath() != null);
-            setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
-                DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM), BULKINSERT_PARALLELISM,
-                    DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM,
-                DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP),
-                COMBINE_BEFORE_INSERT_PROP, DEFAULT_COMBINE_BEFORE_INSERT);
-            setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP),
-                COMBINE_BEFORE_UPSERT_PROP, DEFAULT_COMBINE_BEFORE_UPSERT);
-            setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL),
-                WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
-                HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
-                    HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
-                    HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
-
-            // Make sure the props is propagated
-            setDefaultOnCondition(props, !isIndexConfigSet,
-                HoodieIndexConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isStorageConfigSet,
-                HoodieStorageConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isCompactionConfigSet,
-                HoodieCompactionConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isMetricsConfigSet,
-                HoodieMetricsConfig.newBuilder().fromProperties(props).build());
-            return config;
-        }
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
     }
+
+    public Builder fromInputStream(InputStream inputStream) throws IOException {
+      try {
+        this.props.load(inputStream);
+        return this;
+      } finally {
+        inputStream.close();
+      }
+    }
+
+    public Builder withProps(Map kvprops) {
+      props.putAll(kvprops);
+      return this;
+    }
+
+    public Builder withPath(String basePath) {
+      props.setProperty(BASE_PATH_PROP, basePath);
+      return this;
+    }
+
+    public Builder withSchema(String schemaStr) {
+      props.setProperty(AVRO_SCHEMA, schemaStr);
+      return this;
+    }
+
+    public Builder forTable(String tableName) {
+      props.setProperty(TABLE_NAME, tableName);
+      return this;
+    }
+
+    public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
+      props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
+      return this;
+    }
+
+    public Builder withParallelism(int insertShuffleParallelism, int upsertShuffleParallelism) {
+      props.setProperty(INSERT_PARALLELISM, String.valueOf(insertShuffleParallelism));
+      props.setProperty(UPSERT_PARALLELISM, String.valueOf(upsertShuffleParallelism));
+      return this;
+    }
+
+    public Builder combineInput(boolean onInsert, boolean onUpsert) {
+      props.setProperty(COMBINE_BEFORE_INSERT_PROP, String.valueOf(onInsert));
+      props.setProperty(COMBINE_BEFORE_UPSERT_PROP, String.valueOf(onUpsert));
+      return this;
+    }
+
+    public Builder withWriteStatusStorageLevel(String level) {
+      props.setProperty(WRITE_STATUS_STORAGE_LEVEL, level);
+      return this;
+    }
+
+    public Builder withIndexConfig(HoodieIndexConfig indexConfig) {
+      props.putAll(indexConfig.getProps());
+      isIndexConfigSet = true;
+      return this;
+    }
+
+    public Builder withStorageConfig(HoodieStorageConfig storageConfig) {
+      props.putAll(storageConfig.getProps());
+      isStorageConfigSet = true;
+      return this;
+    }
+
+    public Builder withCompactionConfig(HoodieCompactionConfig compactionConfig) {
+      props.putAll(compactionConfig.getProps());
+      isCompactionConfigSet = true;
+      return this;
+    }
+
+    public Builder withMetricsConfig(HoodieMetricsConfig metricsConfig) {
+      props.putAll(metricsConfig.getProps());
+      isMetricsConfigSet = true;
+      return this;
+    }
+
+    public Builder withAutoCommit(boolean autoCommit) {
+      props.setProperty(HOODIE_AUTO_COMMIT_PROP, String.valueOf(autoCommit));
+      return this;
+    }
+
+    public Builder withAssumeDatePartitioning(boolean assumeDatePartitioning) {
+      props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP,
+          String.valueOf(assumeDatePartitioning));
+      return this;
+    }
+
+    public Builder withWriteStatusClass(Class<? extends WriteStatus> writeStatusClass) {
+      props.setProperty(HOODIE_WRITE_STATUS_CLASS_PROP, writeStatusClass.getName());
+      return this;
+    }
+
+    public HoodieWriteConfig build() {
+      HoodieWriteConfig config = new HoodieWriteConfig(props);
+      // Check for mandatory properties
+      Preconditions.checkArgument(config.getBasePath() != null);
+      setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM),
+          BULKINSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP),
+          COMBINE_BEFORE_INSERT_PROP, DEFAULT_COMBINE_BEFORE_INSERT);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP),
+          COMBINE_BEFORE_UPSERT_PROP, DEFAULT_COMBINE_BEFORE_UPSERT);
+      setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL),
+          WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
+          HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
+          HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
+          HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
+
+      // Make sure the props is propagated
+      setDefaultOnCondition(props, !isIndexConfigSet,
+          HoodieIndexConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isStorageConfigSet,
+          HoodieStorageConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isCompactionConfigSet,
+          HoodieCompactionConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isMetricsConfigSet,
+          HoodieMetricsConfig.newBuilder().fromProperties(props).build());
+      return config;
+    }
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java
index 0ba0eb50c..2f896c93a 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;
 
 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta commit
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta
+ * commit </p>
  */
 public class HoodieAppendException extends HoodieException {
-    public HoodieAppendException(String msg, Throwable e) {
-        super(msg, e);
-    }
 
-    public HoodieAppendException(String msg) {
-        super(msg);
-    }
+  public HoodieAppendException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieAppendException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java
index bc4c139f5..a75b8153f 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;
 
 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
  * </p>
  */
 public class HoodieCommitException extends HoodieException {
-    public HoodieCommitException(String msg) {
-        super(msg);
-    }
 
-    public HoodieCommitException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieCommitException(String msg) {
+    super(msg);
+  }
+
+  public HoodieCommitException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java
index 9d016ec6d..66104a657 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java
@@ -17,6 +17,7 @@
 package com.uber.hoodie.exception;
 
 public class HoodieCompactionException extends HoodieException {
+
   public HoodieCompactionException(String msg) {
     super(msg);
   }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java
index 4f64d76ca..59ea7271e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java
@@ -18,18 +18,17 @@ package com.uber.hoodie.exception;
 
 
 /**
- * <p>
- * Exception thrown when dependent system is not available
- * </p>
+ * <p> Exception thrown when dependent system is not available </p>
  */
 public class HoodieDependentSystemUnavailableException extends HoodieException {
-    public static final String HBASE = "HBASE";
 
-    public HoodieDependentSystemUnavailableException(String system, String connectURL) {
-        super(getLogMessage(system, connectURL));
-    }
+  public static final String HBASE = "HBASE";
 
-    private static String getLogMessage(String system, String connectURL) {
-        return "System " + system + " unavailable. Tried to connect to " + connectURL;
-    }
+  public HoodieDependentSystemUnavailableException(String system, String connectURL) {
+    super(getLogMessage(system, connectURL));
+  }
+
+  private static String getLogMessage(String system, String connectURL) {
+    return "System " + system + " unavailable. Tried to connect to " + connectURL;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java
index a228541d3..3bcfa5434 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java
@@ -16,15 +16,13 @@
 
 package com.uber.hoodie.exception;
 
-import java.io.IOException;
-
 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk insert
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk
+ * insert </p>
  */
 public class HoodieInsertException extends HoodieException {
-    public HoodieInsertException(String msg, Throwable e) {
-        super(msg, e);
-    }
+
+  public HoodieInsertException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java
index 67e4835a6..477364d99 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java
@@ -18,11 +18,11 @@ package com.uber.hoodie.exception;
 
 public class HoodieRollbackException extends HoodieException {
 
-    public HoodieRollbackException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieRollbackException(String msg, Throwable e) {
+    super(msg, e);
+  }
 
-    public HoodieRollbackException(String msg) {
-        super(msg);
-    }
+  public HoodieRollbackException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java
index 83e1bd134..0b3e221b1 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java
@@ -18,11 +18,11 @@ package com.uber.hoodie.exception;
 
 public class HoodieSavepointException extends HoodieException {
 
-    public HoodieSavepointException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieSavepointException(String msg, Throwable e) {
+    super(msg, e);
+  }
 
-    public HoodieSavepointException(String msg) {
-        super(msg);
-    }
+  public HoodieSavepointException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java
index 16779a92b..e4b0f4c5c 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;
 
 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a incremental upsert
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a
+ * incremental upsert </p>
  */
-public class HoodieUpsertException  extends HoodieException {
-    public HoodieUpsertException(String msg, Throwable e) {
-        super(msg, e);
-    }
+public class HoodieUpsertException extends HoodieException {
 
-    public HoodieUpsertException(String msg) {
-        super(msg);
-    }
+  public HoodieUpsertException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieUpsertException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java b/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java
index ae130a62d..8d305d214 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java
@@ -16,16 +16,14 @@
 
 package com.uber.hoodie.func;
 
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.api.java.function.Function2;
-
 import java.util.Iterator;
 import java.util.List;
+import org.apache.spark.api.java.function.Function2;
 
 
 /**
@@ -34,20 +32,21 @@ import java.util.List;
 public class BulkInsertMapFunction<T extends HoodieRecordPayload>
     implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {
 
-    private String commitTime;
-    private HoodieWriteConfig config;
-    private HoodieTable<T> hoodieTable;
+  private String commitTime;
+  private HoodieWriteConfig config;
+  private HoodieTable<T> hoodieTable;
 
-    public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
-        HoodieTable<T> hoodieTable) {
-        this.commitTime = commitTime;
-        this.config = config;
-        this.hoodieTable = hoodieTable;
-    }
+  public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
+      HoodieTable<T> hoodieTable) {
+    this.commitTime = commitTime;
+    this.config = config;
+    this.hoodieTable = hoodieTable;
+  }
 
-    @Override
-    public Iterator<List<WriteStatus>> call(Integer partition, Iterator<HoodieRecord<T>> sortedRecordItr)
-        throws Exception {
-        return new LazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable);
-    }
+  @Override
+  public Iterator<List<WriteStatus>> call(Integer partition,
+      Iterator<HoodieRecord<T>> sortedRecordItr)
+      throws Exception {
+    return new LazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
index aa11e7efe..cc038f21a 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
@@ -16,99 +16,101 @@
 
 package com.uber.hoodie.func;
 
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
-import com.uber.hoodie.io.HoodieIOHandle;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.HoodieCreateHandle;
+import com.uber.hoodie.io.HoodieIOHandle;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.TaskContext;
-
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
+import org.apache.spark.TaskContext;
 
 /**
- * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath,
- * into new files.
+ * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
+ * files.
  */
-public class LazyInsertIterable<T extends HoodieRecordPayload> extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
+public class LazyInsertIterable<T extends HoodieRecordPayload> extends
+    LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
 
-    private final HoodieWriteConfig hoodieConfig;
-    private final String commitTime;
-    private final HoodieTable<T> hoodieTable;
-    private Set<String> partitionsCleaned;
-    private HoodieCreateHandle handle;
+  private final HoodieWriteConfig hoodieConfig;
+  private final String commitTime;
+  private final HoodieTable<T> hoodieTable;
+  private Set<String> partitionsCleaned;
+  private HoodieCreateHandle handle;
 
-    public LazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
-        String commitTime, HoodieTable<T> hoodieTable) {
-        super(sortedRecordItr);
-        this.partitionsCleaned = new HashSet<>();
-        this.hoodieConfig = config;
-        this.commitTime = commitTime;
-        this.hoodieTable = hoodieTable;
+  public LazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
+      String commitTime, HoodieTable<T> hoodieTable) {
+    super(sortedRecordItr);
+    this.partitionsCleaned = new HashSet<>();
+    this.hoodieConfig = config;
+    this.commitTime = commitTime;
+    this.hoodieTable = hoodieTable;
+  }
+
+  @Override
+  protected void start() {
+  }
+
+
+  @Override
+  protected List<WriteStatus> computeNext() {
+    List<WriteStatus> statuses = new ArrayList<>();
+
+    while (inputItr.hasNext()) {
+      HoodieRecord record = inputItr.next();
+
+      // clean up any partial failures
+      if (!partitionsCleaned.contains(record.getPartitionPath())) {
+        // This insert task could fail multiple times, but Spark will faithfully retry with
+        // the same data again. Thus, before we open any files under a given partition, we
+        // first delete any files in the same partitionPath written by same Spark partition
+        HoodieIOHandle.cleanupTmpFilesFromCurrentCommit(hoodieConfig,
+            commitTime,
+            record.getPartitionPath(),
+            TaskContext.getPartitionId());
+        partitionsCleaned.add(record.getPartitionPath());
+      }
+
+      // lazily initialize the handle, for the first time
+      if (handle == null) {
+        handle =
+            new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
+                record.getPartitionPath());
+      }
+
+      if (handle.canWrite(record)) {
+        // write the record, if the handle has capacity
+        handle.write(record);
+      } else {
+        // handle is full.
+        statuses.add(handle.close());
+        // Need to handle the rejected record & open new handle
+        handle =
+            new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
+                record.getPartitionPath());
+        handle.write(record); // we should be able to write 1 record.
+        break;
+      }
     }
 
-    @Override protected void start() {
+    // If we exited out, because we ran out of records, just close the pending handle.
+    if (!inputItr.hasNext()) {
+      if (handle != null) {
+        statuses.add(handle.close());
+      }
     }
 
+    assert statuses.size() > 0; // should never return empty statuses
+    return statuses;
+  }
 
-    @Override protected List<WriteStatus> computeNext()  {
-        List<WriteStatus> statuses = new ArrayList<>();
+  @Override
+  protected void end() {
 
-        while (inputItr.hasNext()) {
-            HoodieRecord record = inputItr.next();
-
-            // clean up any partial failures
-            if (!partitionsCleaned.contains(record.getPartitionPath())) {
-                // This insert task could fail multiple times, but Spark will faithfully retry with
-                // the same data again. Thus, before we open any files under a given partition, we
-                // first delete any files in the same partitionPath written by same Spark partition
-                HoodieIOHandle.cleanupTmpFilesFromCurrentCommit(hoodieConfig,
-                                                                commitTime,
-                                                                record.getPartitionPath(),
-                                                                TaskContext.getPartitionId());
-                partitionsCleaned.add(record.getPartitionPath());
-            }
-
-            // lazily initialize the handle, for the first time
-            if (handle == null) {
-                handle =
-                    new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
-                        record.getPartitionPath());
-            }
-
-            if (handle.canWrite(record)) {
-                // write the record, if the handle has capacity
-                handle.write(record);
-            } else {
-                // handle is full.
-                statuses.add(handle.close());
-                // Need to handle the rejected record & open new handle
-                handle =
-                    new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
-                        record.getPartitionPath());
-                handle.write(record); // we should be able to write 1 record.
-                break;
-            }
-        }
-
-        // If we exited out, because we ran out of records, just close the pending handle.
-        if (!inputItr.hasNext()) {
-            if (handle != null) {
-                statuses.add(handle.close());
-            }
-        }
-
-        assert statuses.size() > 0; // should never return empty statuses
-        return statuses;
-    }
-
-    @Override protected void end() {
-
-    }
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java
index 195342f82..2720e001e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java
@@ -31,98 +31,99 @@ import java.util.Iterator;
  * responsible for calling inputIterator.next() and doing the processing in computeNext()
  */
 public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterator<O> {
-    protected Iterator<I> inputItr = null;
-    private boolean consumed = false;
-    private boolean startCalled = false;
-    private boolean endCalled = false;
 
-    public LazyIterableIterator(Iterator<I> in) {
-        inputItr = in;
+  protected Iterator<I> inputItr = null;
+  private boolean consumed = false;
+  private boolean startCalled = false;
+  private boolean endCalled = false;
+
+  public LazyIterableIterator(Iterator<I> in) {
+    inputItr = in;
+  }
+
+  /**
+   * Called once, before any elements are processed
+   */
+  protected abstract void start();
+
+  /**
+   * Block computation to be overwritten by sub classes.
+   */
+  protected abstract O computeNext();
+
+
+  /**
+   * Called once, after all elements are processed.
+   */
+  protected abstract void end();
+
+  //////////////////
+  // iterable implementation
+
+  private void invokeStartIfNeeded() {
+    if (!startCalled) {
+      startCalled = true;
+      try {
+        start();
+      } catch (Exception e) {
+        throw new RuntimeException("Error in start()");
+      }
+    }
+  }
+
+  private void invokeEndIfNeeded() {
+    // make the calls out to begin() & end()
+    if (!endCalled) {
+      endCalled = true;
+      // if we are out of elements, and end has not been called yet
+      try {
+        end();
+      } catch (Exception e) {
+        throw new RuntimeException("Error in end()");
+      }
+    }
+  }
+
+  @Override
+  public Iterator<O> iterator() {
+    //check for consumed inputItr
+    if (consumed) {
+      throw new RuntimeException("Invalid repeated inputItr consumption.");
     }
 
-    /**
-     * Called once, before any elements are processed
-     */
-    protected abstract void start();
+    //hand out self as inputItr exactly once (note: do not hand out the input
+    //inputItr since it is consumed by the self inputItr implementation)
+    consumed = true;
+    return this;
+  }
 
-    /**
-     * Block computation to be overwritten by sub classes.
-     */
-    protected abstract O computeNext();
+  //////////////////
+  // inputItr implementation
 
-
-    /**
-     * Called once, after all elements are processed.
-     */
-    protected abstract void end();
-
-
-    //////////////////
-    // iterable implementation
-
-    private void invokeStartIfNeeded() {
-        if (!startCalled) {
-            startCalled = true;
-            try {
-                start();
-            } catch (Exception e) {
-                throw new RuntimeException("Error in start()");
-            }
-        }
+  @Override
+  public boolean hasNext() {
+    boolean ret = inputItr.hasNext();
+    // make sure, there is exactly one call to start()
+    invokeStartIfNeeded();
+    if (!ret) {
+      // if we are out of elements, and end has not been called yet
+      invokeEndIfNeeded();
     }
 
-    private void invokeEndIfNeeded() {
-        // make the calls out to begin() & end()
-        if (!endCalled) {
-            endCalled = true;
-            // if we are out of elements, and end has not been called yet
-            try {
-                end();
-            } catch (Exception e) {
-                throw new RuntimeException("Error in end()");
-            }
-        }
+    return ret;
+  }
+
+  @Override
+  public O next() {
+    try {
+      return computeNext();
+    } catch (Exception ex) {
+      throw new RuntimeException(ex);
     }
+  }
 
-    @Override
-    public Iterator<O> iterator() {
-        //check for consumed inputItr
-        if (consumed)
-            throw new RuntimeException("Invalid repeated inputItr consumption.");
-
-        //hand out self as inputItr exactly once (note: do not hand out the input
-        //inputItr since it is consumed by the self inputItr implementation)
-        consumed = true;
-        return this;
-    }
-
-    //////////////////
-    // inputItr implementation
-
-    @Override
-    public boolean hasNext() {
-        boolean ret = inputItr.hasNext();
-        // make sure, there is exactly one call to start()
-        invokeStartIfNeeded();
-        if (!ret) {
-            // if we are out of elements, and end has not been called yet
-            invokeEndIfNeeded();
-        }
-
-        return ret;
-    }
-
-    @Override
-    public O next() {
-        try {
-            return computeNext();
-        } catch (Exception ex) {
-            throw new RuntimeException(ex);
-        }
-    }
-
-    @Override
-    public void remove() {
-        throw new RuntimeException("Unsupported remove operation.");
-    }
+  @Override
+  public void remove() {
+    throw new RuntimeException("Unsupported remove operation.");
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
index 91b23bc0c..642cb7d9b 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
@@ -17,118 +17,108 @@
 package com.uber.hoodie.index;
 
 import com.google.common.base.Optional;
-
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieRecord;
-
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.bloom.HoodieBloomIndex;
 import com.uber.hoodie.index.bucketed.BucketedIndex;
 import com.uber.hoodie.index.hbase.HBaseIndex;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.Serializable;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.Serializable;
-
 /**
  * Base class for different types of indexes to determine the mapping from uuid
- *
  */
 public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Serializable {
-    protected transient JavaSparkContext jsc = null;
 
-    public enum IndexType {
-        HBASE,
-        INMEMORY,
-        BLOOM,
-        BUCKETED
-    }
-
-    protected final HoodieWriteConfig config;
-
-    protected HoodieIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        this.config = config;
-        this.jsc = jsc;
-    }
-
-    /**
-     * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
-     * If the optional FullFilePath value is not present, then the key is not found. If the FullFilePath
-     * value is present, it is the path component (without scheme) of the URI underlying file
-     *
-     * @param hoodieKeys
-     * @param table
-     * @return
-     */
-    public abstract JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-        JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table);
-
-    /**
-     * Looks up the index and tags each incoming record with a location of a file that contains the
-     * row (if it is actually present)
-     */
-    public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
-        HoodieTable<T> hoodieTable) throws HoodieIndexException;
-
-    /**
-     * Extracts the location of written records, and updates the index.
-     *
-     * TODO(vc): We may need to propagate the record as well in a WriteStatus class
-     */
-    public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-        HoodieTable<T> hoodieTable) throws HoodieIndexException;
-
-    /**
-     * Rollback the efffects of the commit made at commitTime.
-     */
-    public abstract boolean rollbackCommit(String commitTime);
-
-    /**
-     * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
-     * Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
-     * but different `partitionPath`
-     *
-     * @return whether or not, the index implementation is global in nature
-     */
-    public abstract boolean isGlobal();
-
-    /**
-     * This is used by storage to determine, if its safe to send inserts, straight to the log,
-     * i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
-     *
-     * @return Returns true/false depending on whether the impl has this capability
-     */
-    public abstract boolean canIndexLogFiles();
-
-
-    /**
-     *
-     * An index is "implicit" with respect to storage, if just writing new data to a file slice,
-     * updates the index as well. This is used by storage, to save memory footprint in
-     * certain cases.
-     *
-     * @return
-     */
-    public abstract boolean isImplicitWithStorage();
-
-
-    public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
-            HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
-        switch (config.getIndexType()) {
-            case HBASE:
-                return new HBaseIndex<>(config, jsc);
-            case INMEMORY:
-                return new InMemoryHashIndex<>(config, jsc);
-            case BLOOM:
-                return new HoodieBloomIndex<>(config, jsc);
-            case BUCKETED:
-                return new BucketedIndex<>(config, jsc);
-        }
-        throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType());
+  protected transient JavaSparkContext jsc = null;
+
+  public enum IndexType {
+    HBASE,
+    INMEMORY,
+    BLOOM,
+    BUCKETED
+  }
+
+  protected final HoodieWriteConfig config;
+
+  protected HoodieIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    this.config = config;
+    this.jsc = jsc;
+  }
+
+  /**
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
+   * If the optional FullFilePath value is not present, then the key is not found. If the
+   * FullFilePath value is present, it is the path component (without scheme) of the URI underlying
+   * file
+   */
+  public abstract JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table);
+
+  /**
+   * Looks up the index and tags each incoming record with a location of a file that contains the
+   * row (if it is actually present)
+   */
+  public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
+
+  /**
+   * Extracts the location of written records, and updates the index.
+   *
+   * TODO(vc): We may need to propagate the record as well in a WriteStatus class
+   */
+  public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
+
+  /**
+   * Rollback the efffects of the commit made at commitTime.
+   */
+  public abstract boolean rollbackCommit(String commitTime);
+
+  /**
+   * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the
+   * `partitionPath`. Such an implementation is able to obtain the same mapping, for two hoodie keys
+   * with same `recordKey` but different `partitionPath`
+   *
+   * @return whether or not, the index implementation is global in nature
+   */
+  public abstract boolean isGlobal();
+
+  /**
+   * This is used by storage to determine, if its safe to send inserts, straight to the log, i.e
+   * having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
+   *
+   * @return Returns true/false depending on whether the impl has this capability
+   */
+  public abstract boolean canIndexLogFiles();
+
+
+  /**
+   * An index is "implicit" with respect to storage, if just writing new data to a file slice,
+   * updates the index as well. This is used by storage, to save memory footprint in certain cases.
+   */
+  public abstract boolean isImplicitWithStorage();
+
+
+  public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
+      HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
+    switch (config.getIndexType()) {
+      case HBASE:
+        return new HBaseIndex<>(config, jsc);
+      case INMEMORY:
+        return new InMemoryHashIndex<>(config, jsc);
+      case BLOOM:
+        return new HoodieBloomIndex<>(config, jsc);
+      case BUCKETED:
+        return new BucketedIndex<>(config, jsc);
     }
+    throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType());
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java
index 7f202f662..422d31983 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java
@@ -17,129 +17,119 @@
 package com.uber.hoodie.index;
 
 import com.google.common.base.Optional;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.Function2;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-
 
 /**
- * Hoodie Index implementation backed by an in-memory Hash map.
- * <p>
- * ONLY USE FOR LOCAL TESTING
+ * Hoodie Index implementation backed by an in-memory Hash map. <p> ONLY USE FOR LOCAL TESTING
  */
 public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
 
-    private static ConcurrentMap<HoodieKey, HoodieRecordLocation> recordLocationMap;
+  private static ConcurrentMap<HoodieKey, HoodieRecordLocation> recordLocationMap;
 
-    public InMemoryHashIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-        recordLocationMap = new ConcurrentHashMap<>();
-    }
+  public InMemoryHashIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+    recordLocationMap = new ConcurrentHashMap<>();
+  }
+
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
+    throw new UnsupportedOperationException("InMemory index does not implement check exist yet");
+  }
+
+  /**
+   * Function that tags each HoodieRecord with an existing location, if known.
+   */
+  class LocationTagFunction
+      implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
 
     @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-            JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
-        throw new UnsupportedOperationException("InMemory index does not implement check exist yet");
-    }
-
-    /**
-     * Function that tags each HoodieRecord with an existing location, if known.
-     */
-    class LocationTagFunction
-            implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
-        @Override
-        public Iterator<HoodieRecord<T>> call(Integer partitionNum,
-                                              Iterator<HoodieRecord<T>> hoodieRecordIterator) {
-            List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
-            while (hoodieRecordIterator.hasNext()) {
-                HoodieRecord<T> rec = hoodieRecordIterator.next();
-                if (recordLocationMap.containsKey(rec.getKey())) {
-                    rec.setCurrentLocation(recordLocationMap.get(rec.getKey()));
-                }
-                taggedRecords.add(rec);
-            }
-            return taggedRecords.iterator();
+    public Iterator<HoodieRecord<T>> call(Integer partitionNum,
+        Iterator<HoodieRecord<T>> hoodieRecordIterator) {
+      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      while (hoodieRecordIterator.hasNext()) {
+        HoodieRecord<T> rec = hoodieRecordIterator.next();
+        if (recordLocationMap.containsKey(rec.getKey())) {
+          rec.setCurrentLocation(recordLocationMap.get(rec.getKey()));
         }
+        taggedRecords.add(rec);
+      }
+      return taggedRecords.iterator();
     }
+  }
 
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
-                                                HoodieTable<T> hoodieTable) {
-        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(), true);
-    }
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) {
+    return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(), true);
+  }
 
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-                                               HoodieTable<T> hoodieTable) {
-        return writeStatusRDD.map(new Function<WriteStatus, WriteStatus>() {
-            @Override
-            public WriteStatus call(WriteStatus writeStatus) {
-                for (HoodieRecord record : writeStatus.getWrittenRecords()) {
-                    if (!writeStatus.isErrored(record.getKey())) {
-                        HoodieKey key = record.getKey();
-                        java.util.Optional<HoodieRecordLocation> newLocation = record.getNewLocation();
-                        if (newLocation.isPresent()) {
-                            recordLocationMap.put(key, newLocation.get());
-                        } else {
-                            //Delete existing index for a deleted record
-                            recordLocationMap.remove(key);
-                        }
-                    }
-                }
-                return writeStatus;
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD.map(new Function<WriteStatus, WriteStatus>() {
+      @Override
+      public WriteStatus call(WriteStatus writeStatus) {
+        for (HoodieRecord record : writeStatus.getWrittenRecords()) {
+          if (!writeStatus.isErrored(record.getKey())) {
+            HoodieKey key = record.getKey();
+            java.util.Optional<HoodieRecordLocation> newLocation = record.getNewLocation();
+            if (newLocation.isPresent()) {
+              recordLocationMap.put(key, newLocation.get());
+            } else {
+              //Delete existing index for a deleted record
+              recordLocationMap.remove(key);
             }
-        });
-    }
+          }
+        }
+        return writeStatus;
+      }
+    });
+  }
 
-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    return true;
+  }
 
-    /**
-     * Only looks up by recordKey
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return true;
-    }
+  /**
+   * Only looks up by recordKey
+   */
+  @Override
+  public boolean isGlobal() {
+    return true;
+  }
 
-    /**
-     * Mapping is available in HBase already.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Mapping is available in HBase already.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }
 
-    /**
-     * Index needs to be explicitly updated after storage write.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return false;
-    }
+  /**
+   * Index needs to be explicitly updated after storage write.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return false;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java
index abe482094..0f0fb9908 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java
@@ -19,7 +19,6 @@
 package com.uber.hoodie.index.bloom;
 
 import com.google.common.base.Objects;
-
 import java.io.Serializable;
 
 /**
@@ -27,73 +26,75 @@ import java.io.Serializable;
  */
 public class BloomIndexFileInfo implements Serializable {
 
-    private final String fileName;
+  private final String fileName;
 
-    private final String minRecordKey;
+  private final String minRecordKey;
 
-    private final String maxRecordKey;
+  private final String maxRecordKey;
 
-    public BloomIndexFileInfo(String fileName, String minRecordKey, String maxRecordKey) {
-        this.fileName = fileName;
-        this.minRecordKey = minRecordKey;
-        this.maxRecordKey = maxRecordKey;
+  public BloomIndexFileInfo(String fileName, String minRecordKey, String maxRecordKey) {
+    this.fileName = fileName;
+    this.minRecordKey = minRecordKey;
+    this.maxRecordKey = maxRecordKey;
+  }
+
+  public BloomIndexFileInfo(String fileName) {
+    this.fileName = fileName;
+    this.minRecordKey = null;
+    this.maxRecordKey = null;
+  }
+
+  public String getFileName() {
+    return fileName;
+  }
+
+  public String getMinRecordKey() {
+    return minRecordKey;
+  }
+
+  public String getMaxRecordKey() {
+    return maxRecordKey;
+  }
+
+  public boolean hasKeyRanges() {
+    return minRecordKey != null && maxRecordKey != null;
+  }
+
+  /**
+   * Does the given key fall within the range (inclusive)
+   */
+  public boolean isKeyInRange(String recordKey) {
+    return minRecordKey.compareTo(recordKey) <= 0 &&
+        maxRecordKey.compareTo(recordKey) >= 0;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
     }
 
-    public BloomIndexFileInfo(String fileName) {
-        this.fileName = fileName;
-        this.minRecordKey = null;
-        this.maxRecordKey = null;
-    }
+    BloomIndexFileInfo that = (BloomIndexFileInfo) o;
+    return Objects.equal(that.fileName, fileName) &&
+        Objects.equal(that.minRecordKey, minRecordKey) &&
+        Objects.equal(that.maxRecordKey, maxRecordKey);
 
-    public String getFileName() {
-        return fileName;
-    }
+  }
 
-    public String getMinRecordKey() {
-        return minRecordKey;
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(fileName, minRecordKey, maxRecordKey);
+  }
 
-    public String getMaxRecordKey() {
-        return maxRecordKey;
-    }
-
-    public boolean hasKeyRanges()  {
-        return minRecordKey != null && maxRecordKey != null;
-    }
-
-    /**
-     * Does the given key fall within the range (inclusive)
-     * @param recordKey
-     * @return
-     */
-    public boolean isKeyInRange(String recordKey) {
-        return minRecordKey.compareTo(recordKey) <= 0 &&
-                maxRecordKey.compareTo(recordKey) >= 0;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        BloomIndexFileInfo that = (BloomIndexFileInfo) o;
-        return Objects.equal(that.fileName, fileName) &&
-                Objects.equal(that.minRecordKey, minRecordKey) &&
-                Objects.equal(that.maxRecordKey, maxRecordKey);
-
-    }
-
-    @Override
-    public int hashCode() {
-       return Objects.hashCode(fileName, minRecordKey, maxRecordKey);
-    }
-
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("BloomIndexFileInfo {");
-        sb.append(" fileName=").append(fileName);
-        sb.append(" minRecordKey=").append(minRecordKey);
-        sb.append(" maxRecordKey=").append(maxRecordKey);
-        sb.append('}');
-        return sb.toString();
-    }
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("BloomIndexFileInfo {");
+    sb.append(" fileName=").append(fileName);
+    sb.append(" minRecordKey=").append(minRecordKey);
+    sb.append(" maxRecordKey=").append(maxRecordKey);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java
index 37e0bc719..44dc910c1 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java
@@ -18,9 +18,12 @@
 
 package com.uber.hoodie.index.bloom;
 
+import static java.util.stream.Collectors.groupingBy;
+import static java.util.stream.Collectors.mapping;
+import static java.util.stream.Collectors.toList;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
-
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieKey;
@@ -34,7 +37,10 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.MetadataNotFoundException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
-
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -42,369 +48,370 @@ import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.storage.StorageLevel;
-
 import scala.Tuple2;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import static java.util.stream.Collectors.*;
-
 /**
  * Indexing mechanism based on bloom filter. Each parquet file includes its row_key bloom filter in
  * its metadata.
  */
 public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
 
-    private static Logger logger = LogManager.getLogger(HoodieBloomIndex.class);
+  private static Logger logger = LogManager.getLogger(HoodieBloomIndex.class);
 
-    // we need to limit the join such that it stays within 1.5GB per Spark partition. (SPARK-1476)
-    private static final int SPARK_MAXIMUM_BYTES_PER_PARTITION = 1500 * 1024 * 1024;
-    // this is how much a triplet of (partitionPath, fileId, recordKey) costs.
-    private static final int BYTES_PER_PARTITION_FILE_KEY_TRIPLET = 300;
-    private static int MAX_ITEMS_PER_SHUFFLE_PARTITION = SPARK_MAXIMUM_BYTES_PER_PARTITION / BYTES_PER_PARTITION_FILE_KEY_TRIPLET;
+  // we need to limit the join such that it stays within 1.5GB per Spark partition. (SPARK-1476)
+  private static final int SPARK_MAXIMUM_BYTES_PER_PARTITION = 1500 * 1024 * 1024;
+  // this is how much a triplet of (partitionPath, fileId, recordKey) costs.
+  private static final int BYTES_PER_PARTITION_FILE_KEY_TRIPLET = 300;
+  private static int MAX_ITEMS_PER_SHUFFLE_PARTITION =
+      SPARK_MAXIMUM_BYTES_PER_PARTITION / BYTES_PER_PARTITION_FILE_KEY_TRIPLET;
 
-    public HoodieBloomIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
+  public HoodieBloomIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+  }
+
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      final HoodieTable<T> hoodieTable) {
+
+    // Step 0: cache the input record RDD
+    if (config.getBloomIndexUseCaching()) {
+      recordRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
     }
 
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, final HoodieTable<T> hoodieTable) {
+    // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey)
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD
+        .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));
 
-        // Step 0: cache the input record RDD
-        if (config.getBloomIndexUseCaching()) {
-            recordRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
-        }
+    // Lookup indexes for all the partition/recordkey pair
+    JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD,
+        hoodieTable);
 
-        // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey)
-        JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD
-                .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));
-
-        // Lookup indexes for all the partition/recordkey pair
-        JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, hoodieTable);
-
-        // Cache the result, for subsequent stages.
-        if (config.getBloomIndexUseCaching()) {
-            rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
-        }
-        if (logger.isDebugEnabled()) {
-            long totalTaggedRecords = rowKeyFilenamePairRDD.count();
-            logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords);
-        }
-
-        // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys
-        // Cost: 4 sec.
-        JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD, recordRDD);
-
-        if (config.getBloomIndexUseCaching()) {
-            recordRDD.unpersist(); // unpersist the input Record RDD
-            rowKeyFilenamePairRDD.unpersist();
-        }
-
-        return taggedRecordRDD;
+    // Cache the result, for subsequent stages.
+    if (config.getBloomIndexUseCaching()) {
+      rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
+    }
+    if (logger.isDebugEnabled()) {
+      long totalTaggedRecords = rowKeyFilenamePairRDD.count();
+      logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords);
     }
 
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-            JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
-        JavaPairRDD<String, String> partitionRecordKeyPairRDD =
-                hoodieKeys.mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
+    // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys
+    // Cost: 4 sec.
+    JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD,
+        recordRDD);
 
-        // Lookup indexes for all the partition/recordkey pair
-        JavaPairRDD<String, String> rowKeyFilenamePairRDD =
-                lookupIndex(partitionRecordKeyPairRDD, table);
-
-        JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD =
-                hoodieKeys.mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
-
-        return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD)
-                .mapToPair(keyPathTuple -> {
-                    Optional<String> recordLocationPath;
-                    if (keyPathTuple._2._2.isPresent()) {
-                        String fileName = keyPathTuple._2._2.get();
-                        String partitionPath = keyPathTuple._2._1.getPartitionPath();
-                        recordLocationPath = Optional.of(new Path(
-                                new Path(table.getMetaClient().getBasePath(), partitionPath),
-                                fileName).toUri().getPath());
-                    } else {
-                        recordLocationPath = Optional.absent();
-                    }
-                    return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
-                });
+    if (config.getBloomIndexUseCaching()) {
+      recordRDD.unpersist(); // unpersist the input Record RDD
+      rowKeyFilenamePairRDD.unpersist();
     }
 
-    /**
-     * Lookup the location for each record key and return the pair<record_key,location> for all
-     * record keys already present and drop the record keys if not present
-     */
-    private JavaPairRDD<String, String> lookupIndex(
-            JavaPairRDD<String, String> partitionRecordKeyPairRDD, final HoodieTable<T> hoodieTable) {
-        // Obtain records per partition, in the incoming records
-        Map<String, Long> recordsPerPartition = partitionRecordKeyPairRDD.countByKey();
-        List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
+    return taggedRecordRDD;
+  }
 
-        // Step 2: Load all involved files as <Partition, filename> pairs
-        List<Tuple2<String, BloomIndexFileInfo>> fileInfoList = loadInvolvedFiles(affectedPartitionPathList, hoodieTable);
-        final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo = fileInfoList.stream()
-                .collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD =
+        hoodieKeys.mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
 
-        // Step 3: Obtain a RDD, for each incoming record, that already exists, with the file id, that contains it.
-        int parallelism = autoComputeParallelism(recordsPerPartition, partitionToFileInfo, partitionRecordKeyPairRDD);
-        return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD, parallelism);
+    // Lookup indexes for all the partition/recordkey pair
+    JavaPairRDD<String, String> rowKeyFilenamePairRDD =
+        lookupIndex(partitionRecordKeyPairRDD, table);
+
+    JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD =
+        hoodieKeys.mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
+
+    return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD)
+        .mapToPair(keyPathTuple -> {
+          Optional<String> recordLocationPath;
+          if (keyPathTuple._2._2.isPresent()) {
+            String fileName = keyPathTuple._2._2.get();
+            String partitionPath = keyPathTuple._2._1.getPartitionPath();
+            recordLocationPath = Optional.of(new Path(
+                new Path(table.getMetaClient().getBasePath(), partitionPath),
+                fileName).toUri().getPath());
+          } else {
+            recordLocationPath = Optional.absent();
+          }
+          return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
+        });
+  }
+
+  /**
+   * Lookup the location for each record key and return the pair<record_key,location> for all record
+   * keys already present and drop the record keys if not present
+   */
+  private JavaPairRDD<String, String> lookupIndex(
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD, final HoodieTable<T> hoodieTable) {
+    // Obtain records per partition, in the incoming records
+    Map<String, Long> recordsPerPartition = partitionRecordKeyPairRDD.countByKey();
+    List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
+
+    // Step 2: Load all involved files as <Partition, filename> pairs
+    List<Tuple2<String, BloomIndexFileInfo>> fileInfoList = loadInvolvedFiles(
+        affectedPartitionPathList, hoodieTable);
+    final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo = fileInfoList.stream()
+        .collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
+
+    // Step 3: Obtain a RDD, for each incoming record, that already exists, with the file id, that contains it.
+    int parallelism = autoComputeParallelism(recordsPerPartition, partitionToFileInfo,
+        partitionRecordKeyPairRDD);
+    return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD,
+        parallelism);
+  }
+
+  /**
+   * The index lookup can be skewed in three dimensions : #files, #partitions, #records
+   *
+   * To be able to smoothly handle skews, we need to compute how to split each partitions into
+   * subpartitions. We do it here, in a way that keeps the amount of each Spark join partition to <
+   * 2GB.
+   *
+   * If {@link com.uber.hoodie.config.HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is specified
+   * as a NON-zero number, then that is used explicitly.
+   */
+  private int autoComputeParallelism(final Map<String, Long> recordsPerPartition,
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+
+    long totalComparisons = 0;
+    if (config.getBloomIndexPruneByRanges()) {
+      // we will just try exploding the input and then count to determine comparisons
+      totalComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo,
+          partitionRecordKeyPairRDD).count();
+    } else {
+      // if not pruning by ranges, then each file in a partition needs to compared against all
+      // records for a partition.
+      Map<String, Long> filesPerPartition = partitionToFileInfo.entrySet().stream()
+          .collect(Collectors.toMap(Map.Entry::getKey, e -> Long.valueOf(e.getValue().size())));
+      long totalFiles = 0, totalRecords = 0;
+      for (String partitionPath : recordsPerPartition.keySet()) {
+        long numRecords = recordsPerPartition.get(partitionPath);
+        long numFiles =
+            filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath)
+                : 1L;
+
+        totalComparisons += numFiles * numRecords;
+        totalFiles +=
+            filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath)
+                : 0L;
+        totalRecords += numRecords;
+      }
+      logger.info("TotalRecords: " + totalRecords + ", TotalFiles: " + totalFiles
+          + ", TotalAffectedPartitions:" + recordsPerPartition.size());
     }
 
-    /**
-     * The index lookup can be skewed in three dimensions : #files, #partitions, #records
-     *
-     * To be able to smoothly handle skews, we need to compute how to split each partitions into
-     * subpartitions. We do it here, in a way that keeps the amount of each Spark join partition to
-     * < 2GB.
-     *
-     * If {@link com.uber.hoodie.config.HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is specified as a NON-zero number,
-     * then that is used explicitly.
-     *
-     */
-    private int autoComputeParallelism(final Map<String, Long> recordsPerPartition,
-                                       final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
-                                       JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+    // each partition will have an item per comparison.
+    int parallelism = (int) (totalComparisons / MAX_ITEMS_PER_SHUFFLE_PARTITION + 1);
+    logger.info(
+        "Auto computed parallelism :" + parallelism + ", totalComparisons: " + totalComparisons);
+    return parallelism;
+  }
 
-        long totalComparisons = 0;
-        if (config.getBloomIndexPruneByRanges()) {
-            // we will just try exploding the input and then count to determine comparisons
-            totalComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo, partitionRecordKeyPairRDD).count();
-        } else {
-            // if not pruning by ranges, then each file in a partition needs to compared against all
-            // records for a partition.
-            Map<String, Long> filesPerPartition = partitionToFileInfo.entrySet().stream()
-                    .collect(Collectors.toMap(Map.Entry::getKey, e -> Long.valueOf(e.getValue().size())));
-            long totalFiles = 0, totalRecords = 0;
-            for (String partitionPath : recordsPerPartition.keySet()) {
-                long numRecords = recordsPerPartition.get(partitionPath);
-                long numFiles = filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath) : 1L;
+  /**
+   * Its crucial to pick the right parallelism.
+   *
+   * totalSubPartitions : this is deemed safe limit, to be nice with Spark. inputParallelism :
+   * typically number of input file splits
+   *
+   * We pick the max such that, we are always safe, but go higher if say a there are a lot of input
+   * files. (otherwise, we will fallback to number of partitions in input and end up with slow
+   * performance)
+   */
+  private int determineParallelism(int inputParallelism, int totalSubPartitions) {
+    // If bloom index parallelism is set, use it to to check against the input parallelism and take the max
+    int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
+    int joinParallelism = Math.max(totalSubPartitions, indexParallelism);
+    logger.info("InputParallelism: ${" + inputParallelism + "}, " +
+        "IndexParallelism: ${" + config.getBloomIndexParallelism() + "}, " +
+        "TotalSubParts: ${" + totalSubPartitions + "}, " +
+        "Join Parallelism set to : " + joinParallelism);
+    return joinParallelism;
+  }
 
-                totalComparisons += numFiles * numRecords;
-                totalFiles += filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath) : 0L;
-                totalRecords += numRecords;
-            }
-            logger.info("TotalRecords: " + totalRecords + ", TotalFiles: " + totalFiles + ", TotalAffectedPartitions:" + recordsPerPartition.size());
-        }
-
-        // each partition will have an item per comparison.
-        int parallelism = (int) (totalComparisons/ MAX_ITEMS_PER_SHUFFLE_PARTITION + 1);
-        logger.info("Auto computed parallelism :" + parallelism + ", totalComparisons: " + totalComparisons);
-        return parallelism;
-    }
-
-    /**
-     * Its crucial to pick the right parallelism.
-     *
-     * totalSubPartitions : this is deemed safe limit, to be nice with Spark.
-     * inputParallelism : typically number of input file splits
-     *
-     * We pick the max such that, we are always safe, but go higher if say a there are a lot of
-     * input files. (otherwise, we will fallback to number of partitions in input and end up with
-     * slow performance)
-     */
-    private int determineParallelism(int inputParallelism, int totalSubPartitions) {
-        // If bloom index parallelism is set, use it to to check against the input parallelism and take the max
-        int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
-        int joinParallelism = Math.max(totalSubPartitions, indexParallelism);
-        logger.info("InputParallelism: ${" + inputParallelism + "}, " +
-                "IndexParallelism: ${" + config.getBloomIndexParallelism() + "}, " +
-                "TotalSubParts: ${" + totalSubPartitions + "}, " +
-                "Join Parallelism set to : " + joinParallelism);
-        return joinParallelism;
-    }
-
-    /**
-     * Load all involved files as <Partition, filename> pair RDD.
-     */
-    @VisibleForTesting
-    List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions, final HoodieTable<T> hoodieTable) {
-        // Obtain the latest data files from all the partitions.
-        List<Tuple2<String, HoodieDataFile>> dataFilesList = jsc.parallelize(partitions, Math.max(partitions.size(), 1))
-                .flatMapToPair(partitionPath -> {
-                    java.util.Optional<HoodieInstant> latestCommitTime =
-                            hoodieTable.getCommitTimeline().filterCompletedInstants().lastInstant();
-                    List<Tuple2<String, HoodieDataFile>> filteredFiles = new ArrayList<>();
-                    if (latestCommitTime.isPresent()) {
-                         filteredFiles =
-                                hoodieTable.getROFileSystemView().getLatestDataFilesBeforeOrOn(partitionPath,
-                                        latestCommitTime.get().getTimestamp())
-                                        .map(f -> new Tuple2<>(partitionPath, f))
-                                        .collect(toList());
-                    }
-                    return filteredFiles.iterator();
-                }).collect();
-
-        if (config.getBloomIndexPruneByRanges()) {
-            // also obtain file ranges, if range pruning is enabled
-            return jsc.parallelize(dataFilesList, Math.max(dataFilesList.size(), 1))
-                    .mapToPair(ft -> {
-                        try {
-                            String[] minMaxKeys = ParquetUtils.readMinMaxRecordKeys(ft._2().getFileStatus().getPath());
-                            return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName(), minMaxKeys[0], minMaxKeys[1]));
-                        } catch (MetadataNotFoundException me) {
-                            logger.warn("Unable to find range metadata in file :" + ft._2());
-                            return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName()));
-                        }
-                    }).collect();
-        } else {
-            return dataFilesList.stream()
-                    .map(ft -> new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName())))
+  /**
+   * Load all involved files as <Partition, filename> pair RDD.
+   */
+  @VisibleForTesting
+  List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions,
+      final HoodieTable<T> hoodieTable) {
+    // Obtain the latest data files from all the partitions.
+    List<Tuple2<String, HoodieDataFile>> dataFilesList = jsc
+        .parallelize(partitions, Math.max(partitions.size(), 1))
+        .flatMapToPair(partitionPath -> {
+          java.util.Optional<HoodieInstant> latestCommitTime =
+              hoodieTable.getCommitTimeline().filterCompletedInstants().lastInstant();
+          List<Tuple2<String, HoodieDataFile>> filteredFiles = new ArrayList<>();
+          if (latestCommitTime.isPresent()) {
+            filteredFiles =
+                hoodieTable.getROFileSystemView().getLatestDataFilesBeforeOrOn(partitionPath,
+                    latestCommitTime.get().getTimestamp())
+                    .map(f -> new Tuple2<>(partitionPath, f))
                     .collect(toList());
+          }
+          return filteredFiles.iterator();
+        }).collect();
+
+    if (config.getBloomIndexPruneByRanges()) {
+      // also obtain file ranges, if range pruning is enabled
+      return jsc.parallelize(dataFilesList, Math.max(dataFilesList.size(), 1))
+          .mapToPair(ft -> {
+            try {
+              String[] minMaxKeys = ParquetUtils
+                  .readMinMaxRecordKeys(ft._2().getFileStatus().getPath());
+              return new Tuple2<>(ft._1(),
+                  new BloomIndexFileInfo(ft._2().getFileName(), minMaxKeys[0], minMaxKeys[1]));
+            } catch (MetadataNotFoundException me) {
+              logger.warn("Unable to find range metadata in file :" + ft._2());
+              return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName()));
+            }
+          }).collect();
+    } else {
+      return dataFilesList.stream()
+          .map(ft -> new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName())))
+          .collect(toList());
+    }
+  }
+
+
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // Nope, don't need to do anything.
+    return true;
+  }
+
+  /**
+   * This is not global, since we depend on the partitionPath to do the lookup
+   */
+  @Override
+  public boolean isGlobal() {
+    return false;
+  }
+
+  /**
+   * No indexes into log files yet.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return false;
+  }
+
+  /**
+   * Bloom filters are stored, into the same data files.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return true;
+  }
+
+  /**
+   * if we dont have key ranges, then also we need to compare against the file. no other choice if
+   * we do, then only compare the file if the record key falls in range.
+   */
+  private boolean shouldCompareWithFile(BloomIndexFileInfo indexInfo, String recordKey) {
+    return !indexInfo.hasKeyRanges() || indexInfo.isKeyInRange(recordKey);
+  }
+
+
+  /**
+   * For each incoming record, produce N output records, 1 each for each file against which the
+   * record's key needs to be checked. For datasets, where the keys have a definite insert order
+   * (e.g: timestamp as prefix), the number of files to be compared gets cut down a lot from range
+   * pruning.
+   */
+  // sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on recordKey
+  // ranges in the index info.
+  @VisibleForTesting
+  JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+    return partitionRecordKeyPairRDD
+        .map(partitionRecordKeyPair -> {
+          String recordKey = partitionRecordKeyPair._2();
+          String partitionPath = partitionRecordKeyPair._1();
+
+          List<BloomIndexFileInfo> indexInfos = partitionToFileIndexInfo.get(partitionPath);
+          List<Tuple2<String, Tuple2<String, HoodieKey>>> recordComparisons = new ArrayList<>();
+          if (indexInfos
+              != null) { // could be null, if there are no files in a given partition yet.
+            // for each candidate file in partition, that needs to be compared.
+            for (BloomIndexFileInfo indexInfo : indexInfos) {
+              if (shouldCompareWithFile(indexInfo, recordKey)) {
+                recordComparisons.add(
+                    new Tuple2<>(String.format("%s#%s", indexInfo.getFileName(), recordKey),
+                        new Tuple2<>(indexInfo.getFileName(),
+                            new HoodieKey(recordKey, partitionPath))));
+              }
+            }
+          }
+          return recordComparisons;
+        })
+        .flatMapToPair(t -> t.iterator());
+  }
+
+  /**
+   * Find out <RowKey, filename> pair. All workload grouped by file-level.
+   *
+   * Join PairRDD(PartitionPath, RecordKey) and PairRDD(PartitionPath, File) & then repartition such
+   * that each RDD partition is a file, then for each file, we do (1) load bloom filter, (2) load
+   * rowKeys, (3) Tag rowKey
+   *
+   * Make sure the parallelism is atleast the groupby parallelism for tagging location
+   */
+  @VisibleForTesting
+  JavaPairRDD<String, String> findMatchingFilesForRecordKeys(
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD,
+      int totalSubpartitions) {
+
+    int joinParallelism = determineParallelism(partitionRecordKeyPairRDD.partitions().size(),
+        totalSubpartitions);
+
+    JavaPairRDD<String, Tuple2<String, HoodieKey>> fileSortedTripletRDD = explodeRecordRDDWithFileComparisons(
+        partitionToFileIndexInfo, partitionRecordKeyPairRDD)
+        // sort further based on filename, such that all checking for the file can happen within a single partition, on-the-fly
+        .sortByKey(true, joinParallelism);
+
+    return fileSortedTripletRDD
+        .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(config.getBasePath()), true)
+        .flatMap(indexLookupResults -> indexLookupResults.iterator())
+        .filter(lookupResult -> lookupResult.getMatchingRecordKeys().size() > 0)
+        .flatMapToPair(lookupResult -> {
+          List<Tuple2<String, String>> vals = new ArrayList<>();
+          for (String recordKey : lookupResult.getMatchingRecordKeys()) {
+            vals.add(new Tuple2<>(recordKey, lookupResult.getFileName()));
+          }
+          return vals.iterator();
+        });
+  }
+
+  /**
+   * Tag the <rowKey, filename> back to the original HoodieRecord RDD.
+   */
+  private JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(
+      JavaPairRDD<String, String> rowKeyFilenamePairRDD,
+      JavaRDD<HoodieRecord<T>> recordRDD) {
+    JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD
+        .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
+
+    // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null), so we do left outer join.
+    return rowKeyRecordPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).values().map(
+        v1 -> {
+          HoodieRecord<T> record = v1._1();
+          if (v1._2().isPresent()) {
+            String filename = v1._2().get();
+            if (filename != null && !filename.isEmpty()) {
+              record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
+                  FSUtils.getFileId(filename)));
+            }
+          }
+          return record;
         }
-    }
+    );
+  }
 
-
-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // Nope, don't need to do anything.
-        return true;
-    }
-
-    /**
-     * This is not global, since we depend on the partitionPath to do the lookup
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return false;
-    }
-
-    /**
-     * No indexes into log files yet.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return false;
-    }
-
-    /**
-     * Bloom filters are stored, into the same data files.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return true;
-    }
-
-    /**
-     * if we dont have key ranges, then also we need to compare against the file. no other choice
-     * if we do, then only compare the file if the record key falls in range.
-
-     * @param indexInfo
-     * @param recordKey
-     * @return
-     */
-    private boolean shouldCompareWithFile(BloomIndexFileInfo indexInfo, String recordKey) {
-        return !indexInfo.hasKeyRanges() || indexInfo.isKeyInRange(recordKey);
-    }
-
-
-    /**
-     * For each incoming record, produce N output records, 1 each for each file against which the record's key
-     * needs to be checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix),
-     * the number of files to be compared gets cut down a lot from range pruning.
-     *
-     *
-     * @param partitionToFileIndexInfo
-     * @param partitionRecordKeyPairRDD
-     * @return
-     */
-    // sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on recordKey
-    // ranges in the index info.
-    @VisibleForTesting
-    JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
-                                                                                       JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
-        return partitionRecordKeyPairRDD
-                .map(partitionRecordKeyPair -> {
-                    String recordKey = partitionRecordKeyPair._2();
-                    String partitionPath = partitionRecordKeyPair._1();
-
-                    List<BloomIndexFileInfo> indexInfos = partitionToFileIndexInfo.get(partitionPath);
-                    List<Tuple2<String, Tuple2<String, HoodieKey>>> recordComparisons = new ArrayList<>();
-                    if (indexInfos != null) { // could be null, if there are no files in a given partition yet.
-                        // for each candidate file in partition, that needs to be compared.
-                        for (BloomIndexFileInfo indexInfo : indexInfos) {
-                            if (shouldCompareWithFile(indexInfo, recordKey)) {
-                                recordComparisons.add(
-                                        new Tuple2<>(String.format("%s#%s", indexInfo.getFileName(), recordKey),
-                                                new Tuple2<>(indexInfo.getFileName(), new HoodieKey(recordKey, partitionPath))));
-                            }
-                        }
-                    }
-                    return recordComparisons;
-                })
-                .flatMapToPair(t -> t.iterator());
-    }
-
-    /**
-     * Find out <RowKey, filename> pair. All workload grouped by file-level.
-     *
-     * Join PairRDD(PartitionPath, RecordKey) and PairRDD(PartitionPath, File) & then repartition
-     * such that each RDD partition is a file, then for each file, we do
-     * (1) load bloom filter,
-     * (2) load rowKeys,
-     * (3) Tag rowKey
-     *
-     * Make sure the parallelism is atleast the groupby parallelism for tagging location
-     */
-    @VisibleForTesting
-    JavaPairRDD<String, String> findMatchingFilesForRecordKeys(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
-                                                               JavaPairRDD<String, String> partitionRecordKeyPairRDD,
-                                                               int totalSubpartitions) {
-
-        int joinParallelism = determineParallelism(partitionRecordKeyPairRDD.partitions().size(), totalSubpartitions);
-
-        JavaPairRDD<String, Tuple2<String, HoodieKey>> fileSortedTripletRDD = explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD)
-                // sort further based on filename, such that all checking for the file can happen within a single partition, on-the-fly
-                .sortByKey(true, joinParallelism);
-
-        return fileSortedTripletRDD
-                .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(config.getBasePath()), true)
-                .flatMap(indexLookupResults -> indexLookupResults.iterator())
-                .filter(lookupResult -> lookupResult.getMatchingRecordKeys().size() > 0)
-                .flatMapToPair(lookupResult -> {
-                    List<Tuple2<String, String>> vals = new ArrayList<>();
-                    for (String recordKey : lookupResult.getMatchingRecordKeys()) {
-                        vals.add(new Tuple2<>(recordKey, lookupResult.getFileName()));
-                    }
-                    return vals.iterator();
-                });
-    }
-
-    /**
-     * Tag the <rowKey, filename> back to the original HoodieRecord RDD.
-     */
-    private JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(JavaPairRDD<String, String> rowKeyFilenamePairRDD,
-                                                              JavaRDD<HoodieRecord<T>> recordRDD) {
-        JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD
-                .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
-
-        // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null), so we do left outer join.
-        return rowKeyRecordPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).values().map(
-                v1 -> {
-                    HoodieRecord<T> record = v1._1();
-                    if (v1._2().isPresent()) {
-                        String filename = v1._2().get();
-                        if (filename != null && !filename.isEmpty()) {
-                            record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
-                                    FSUtils.getFileId(filename)));
-                        }
-                    }
-                    return record;
-                }
-        );
-    }
-
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> hoodieTable) {
-        return writeStatusRDD;
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java
index 9eb3c8996..0d562ae86 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java
@@ -24,172 +24,182 @@ import com.uber.hoodie.common.util.ParquetUtils;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.func.LazyIterableIterator;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.api.java.function.Function2;
-
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
-
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.function.Function2;
 import scala.Tuple2;
 
 /**
  * Function performing actual checking of RDD parition containing (fileId, hoodieKeys) against the
  * actual files
  */
-public class HoodieBloomIndexCheckFunction implements Function2<Integer, Iterator<Tuple2<String, Tuple2<String, HoodieKey>>>, Iterator<List<IndexLookupResult>>> {
+public class HoodieBloomIndexCheckFunction implements
+    Function2<Integer, Iterator<Tuple2<String, Tuple2<String, HoodieKey>>>, Iterator<List<IndexLookupResult>>> {
 
-    private static Logger logger = LogManager.getLogger(HoodieBloomIndexCheckFunction.class);
+  private static Logger logger = LogManager.getLogger(HoodieBloomIndexCheckFunction.class);
 
-    private final String basePath;
+  private final String basePath;
 
-    public HoodieBloomIndexCheckFunction(String basePath) {
-        this.basePath = basePath;
+  public HoodieBloomIndexCheckFunction(String basePath) {
+    this.basePath = basePath;
+  }
+
+  /**
+   * Given a list of row keys and one file, return only row keys existing in that file.
+   */
+  public static List<String> checkCandidatesAgainstFile(List<String> candidateRecordKeys,
+      Path filePath) throws HoodieIndexException {
+    List<String> foundRecordKeys = new ArrayList<>();
+    try {
+      // Load all rowKeys from the file, to double-confirm
+      if (!candidateRecordKeys.isEmpty()) {
+        Set<String> fileRowKeys = ParquetUtils.readRowKeysFromParquet(filePath);
+        logger.info("Loading " + fileRowKeys.size() + " row keys from " + filePath);
+        if (logger.isDebugEnabled()) {
+          logger.debug("Keys from " + filePath + " => " + fileRowKeys);
+        }
+        for (String rowKey : candidateRecordKeys) {
+          if (fileRowKeys.contains(rowKey)) {
+            foundRecordKeys.add(rowKey);
+          }
+        }
+        logger.info("After checking with row keys, we have " + foundRecordKeys.size()
+            + " results, for file " + filePath + " => " + foundRecordKeys);
+        if (logger.isDebugEnabled()) {
+          logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
+        }
+      }
+    } catch (Exception e) {
+      throw new HoodieIndexException("Error checking candidate keys against file.", e);
     }
+    return foundRecordKeys;
+  }
 
-    /**
-     * Given a list of row keys and one file, return only row keys existing in that file.
-     */
-    public static List<String> checkCandidatesAgainstFile(List<String> candidateRecordKeys, Path filePath) throws HoodieIndexException {
-        List<String> foundRecordKeys = new ArrayList<>();
-        try {
-            // Load all rowKeys from the file, to double-confirm
-            if (!candidateRecordKeys.isEmpty()) {
-                Set<String> fileRowKeys = ParquetUtils.readRowKeysFromParquet(filePath);
-                logger.info("Loading " + fileRowKeys.size() + " row keys from " + filePath);
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Keys from " + filePath + " => " + fileRowKeys);
-                }
-                for (String rowKey : candidateRecordKeys) {
-                    if (fileRowKeys.contains(rowKey)) {
-                        foundRecordKeys.add(rowKey);
-                    }
-                }
-                logger.info("After checking with row keys, we have " + foundRecordKeys.size() + " results, for file " + filePath + " => " + foundRecordKeys);
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
-                }
-            }
-        } catch (Exception e){
-            throw new HoodieIndexException("Error checking candidate keys against file.", e);
-        }
-        return foundRecordKeys;
+  class LazyKeyCheckIterator extends
+      LazyIterableIterator<Tuple2<String, Tuple2<String, HoodieKey>>, List<IndexLookupResult>> {
+
+    private List<String> candidateRecordKeys;
+
+    private BloomFilter bloomFilter;
+
+    private String currentFile;
+
+    private String currentParitionPath;
+
+    LazyKeyCheckIterator(
+        Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) {
+      super(fileParitionRecordKeyTripletItr);
+      currentFile = null;
+      candidateRecordKeys = new ArrayList<>();
+      bloomFilter = null;
+      currentParitionPath = null;
     }
 
-    class LazyKeyCheckIterator extends LazyIterableIterator<Tuple2<String, Tuple2<String, HoodieKey>>, List<IndexLookupResult>> {
-
-        private List<String> candidateRecordKeys;
-
-        private BloomFilter bloomFilter;
-
-        private String currentFile;
-
-        private String currentParitionPath;
-
-        LazyKeyCheckIterator(Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) {
-            super(fileParitionRecordKeyTripletItr);
-            currentFile = null;
-            candidateRecordKeys = new ArrayList<>();
-            bloomFilter = null;
-            currentParitionPath = null;
-        }
-
-        @Override
-        protected void start() {
-        }
-
-        private void initState(String fileName, String partitionPath) throws HoodieIndexException {
-            try {
-                Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
-                bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(filePath);
-                candidateRecordKeys = new ArrayList<>();
-                currentFile = fileName;
-                currentParitionPath = partitionPath;
-            } catch (Exception e) {
-                throw new HoodieIndexException("Error checking candidate keys against file.", e);
-            }
-        }
-
-        @Override
-        protected List<IndexLookupResult> computeNext() {
-
-            List<IndexLookupResult> ret = new ArrayList<>();
-            try {
-                // process one file in each go.
-                while (inputItr.hasNext()) {
-
-                    Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
-                    String fileName = currentTuple._2._1;
-                    String partitionPath = currentTuple._2._2.getPartitionPath();
-                    String recordKey = currentTuple._2._2.getRecordKey();
-
-                    // lazily init state
-                    if (currentFile == null) {
-                        initState(fileName, partitionPath);
-                    }
-
-                    // if continue on current file)
-                    if (fileName.equals(currentFile)) {
-                        // check record key against bloom filter of current file & add to possible keys if needed
-                        if (bloomFilter.mightContain(recordKey)) {
-                            if (logger.isDebugEnabled()) {
-                                logger.debug("#1 Adding " + recordKey + " as candidate for file " + fileName);
-                            }
-                            candidateRecordKeys.add(recordKey);
-                        }
-                    } else {
-                        // do the actual checking of file & break out
-                        Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
-                        logger.info("#1 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys.size() + " for " + filePath);
-                        if (logger.isDebugEnabled()) {
-                            logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
-                        }
-                        ret.add(new IndexLookupResult(currentFile, checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
-
-                        initState(fileName, partitionPath);
-                        if (bloomFilter.mightContain(recordKey)) {
-                            if (logger.isDebugEnabled()) {
-                                logger.debug("#2 Adding " + recordKey + " as candidate for file " + fileName);
-                            }
-                            candidateRecordKeys.add(recordKey);
-                        }
-                        break;
-                    }
-                }
-
-                // handle case, where we ran out of input, finish pending work, update return val
-                if (!inputItr.hasNext()) {
-                    Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
-                    logger.info("#2 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys.size() + " for " + filePath);
-                    if (logger.isDebugEnabled()) {
-                        logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
-                    }
-                    ret.add(new IndexLookupResult(currentFile, checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
-                }
-
-            } catch (Throwable e) {
-                if (e instanceof HoodieException) {
-                    throw e;
-                }
-                throw new HoodieIndexException("Error checking bloom filter index. ", e);
-            }
-
-            return ret;
-        }
-
-        @Override
-        protected void end() {
-        }
-    }
-
-
     @Override
-    public Iterator<List<IndexLookupResult>> call(Integer partition,
-                                                  Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) throws Exception {
-        return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr);
+    protected void start() {
     }
+
+    private void initState(String fileName, String partitionPath) throws HoodieIndexException {
+      try {
+        Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
+        bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(filePath);
+        candidateRecordKeys = new ArrayList<>();
+        currentFile = fileName;
+        currentParitionPath = partitionPath;
+      } catch (Exception e) {
+        throw new HoodieIndexException("Error checking candidate keys against file.", e);
+      }
+    }
+
+    @Override
+    protected List<IndexLookupResult> computeNext() {
+
+      List<IndexLookupResult> ret = new ArrayList<>();
+      try {
+        // process one file in each go.
+        while (inputItr.hasNext()) {
+
+          Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
+          String fileName = currentTuple._2._1;
+          String partitionPath = currentTuple._2._2.getPartitionPath();
+          String recordKey = currentTuple._2._2.getRecordKey();
+
+          // lazily init state
+          if (currentFile == null) {
+            initState(fileName, partitionPath);
+          }
+
+          // if continue on current file)
+          if (fileName.equals(currentFile)) {
+            // check record key against bloom filter of current file & add to possible keys if needed
+            if (bloomFilter.mightContain(recordKey)) {
+              if (logger.isDebugEnabled()) {
+                logger.debug("#1 Adding " + recordKey + " as candidate for file " + fileName);
+              }
+              candidateRecordKeys.add(recordKey);
+            }
+          } else {
+            // do the actual checking of file & break out
+            Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
+            logger.info(
+                "#1 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
+                    .size() + " for " + filePath);
+            if (logger.isDebugEnabled()) {
+              logger
+                  .debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
+            }
+            ret.add(new IndexLookupResult(currentFile,
+                checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
+
+            initState(fileName, partitionPath);
+            if (bloomFilter.mightContain(recordKey)) {
+              if (logger.isDebugEnabled()) {
+                logger.debug("#2 Adding " + recordKey + " as candidate for file " + fileName);
+              }
+              candidateRecordKeys.add(recordKey);
+            }
+            break;
+          }
+        }
+
+        // handle case, where we ran out of input, finish pending work, update return val
+        if (!inputItr.hasNext()) {
+          Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
+          logger.info(
+              "#2 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
+                  .size() + " for " + filePath);
+          if (logger.isDebugEnabled()) {
+            logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
+          }
+          ret.add(new IndexLookupResult(currentFile,
+              checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
+        }
+
+      } catch (Throwable e) {
+        if (e instanceof HoodieException) {
+          throw e;
+        }
+        throw new HoodieIndexException("Error checking bloom filter index. ", e);
+      }
+
+      return ret;
+    }
+
+    @Override
+    protected void end() {
+    }
+  }
+
+
+  @Override
+  public Iterator<List<IndexLookupResult>> call(Integer partition,
+      Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr)
+      throws Exception {
+    return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java
index 23a89b945..37760646e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java
@@ -25,21 +25,21 @@ import java.util.List;
  */
 public class IndexLookupResult {
 
-    private String fileName;
+  private String fileName;
 
 
-    private List<String> matchingRecordKeys;
+  private List<String> matchingRecordKeys;
 
-    public IndexLookupResult(String fileName, List<String> matchingRecordKeys) {
-        this.fileName = fileName;
-        this.matchingRecordKeys = matchingRecordKeys;
-    }
+  public IndexLookupResult(String fileName, List<String> matchingRecordKeys) {
+    this.fileName = fileName;
+    this.matchingRecordKeys = matchingRecordKeys;
+  }
 
-    public String getFileName() {
-        return fileName;
-    }
+  public String getFileName() {
+    return fileName;
+  }
 
-    public List<String> getMatchingRecordKeys() {
-        return matchingRecordKeys;
-    }
+  public List<String> getMatchingRecordKeys() {
+    return matchingRecordKeys;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java
index ba45bc666..a361a7b06 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java
@@ -19,7 +19,6 @@
 package com.uber.hoodie.index.bucketed;
 
 import com.google.common.base.Optional;
-
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
@@ -29,96 +28,86 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-
 import scala.Tuple2;
 
 /**
- * An `stateless` index implementation that will using a deterministic mapping function to
- * determine the fileID for a given record.
- *
- * Pros:
- *  - Fast
- *
- * Cons :
- *  - Need to tune the number of buckets per partition path manually (FIXME: Need to autotune this)
- *  - Could increase write amplification on copy-on-write storage since inserts always rewrite files
- *  - Not global.
- *
+ * An `stateless` index implementation that will using a deterministic mapping function to determine
+ * the fileID for a given record.
  *
+ * Pros: - Fast
  *
+ * Cons : - Need to tune the number of buckets per partition path manually (FIXME: Need to autotune
+ * this) - Could increase write amplification on copy-on-write storage since inserts always rewrite
+ * files - Not global.
  */
 public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
 
-    private static Logger logger = LogManager.getLogger(BucketedIndex.class);
+  private static Logger logger = LogManager.getLogger(BucketedIndex.class);
 
-    public BucketedIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-    }
+  public BucketedIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+  }
 
-    private String getBucket(String recordKey) {
-        return String.valueOf(recordKey.hashCode() % config.getNumBucketsPerPartition());
-    }
+  private String getBucket(String recordKey) {
+    return String.valueOf(recordKey.hashCode() % config.getNumBucketsPerPartition());
+  }
 
-    @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
-        return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
-    }
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
+      HoodieTable<T> table) {
+    return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
+  }
 
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, HoodieTable<T> hoodieTable) throws HoodieIndexException {
-        return recordRDD.map(record -> {
-            String bucket = getBucket(record.getRecordKey());
-            //HACK(vc) a non-existent commit is provided here.
-            record.setCurrentLocation(new HoodieRecordLocation("000", bucket));
-            return record;
-        });
-    }
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException {
+    return recordRDD.map(record -> {
+      String bucket = getBucket(record.getRecordKey());
+      //HACK(vc) a non-existent commit is provided here.
+      record.setCurrentLocation(new HoodieRecordLocation("000", bucket));
+      return record;
+    });
+  }
 
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> hoodieTable) throws HoodieIndexException {
-        return writeStatusRDD;
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException {
+    return writeStatusRDD;
+  }
 
-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // nothing to rollback in the index.
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // nothing to rollback in the index.
+    return true;
+  }
 
-    /**
-     * Bucketing is still done within each partition.
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return false;
-    }
+  /**
+   * Bucketing is still done within each partition.
+   */
+  @Override
+  public boolean isGlobal() {
+    return false;
+  }
 
-    /**
-     * Since indexing is just a deterministic hash, we can identify file group correctly even without an index
-     * on the actual log file.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Since indexing is just a deterministic hash, we can identify file group correctly even without
+   * an index on the actual log file.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }
 
-    /**
-     * Indexing is just a hash function.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return true;
-    }
+  /**
+   * Indexing is just a hash function.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return true;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java
index 39929876f..5d50ff646 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java
@@ -19,24 +19,33 @@
 package com.uber.hoodie.index.hbase;
 
 import com.google.common.base.Optional;
-import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.common.table.timeline.HoodieInstant;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
+import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-import com.uber.hoodie.common.model.HoodieRecord;
-
+import com.uber.hoodie.common.table.HoodieTimeline;
+import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieIndexConfig;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieDependentSystemUnavailableException;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -45,230 +54,221 @@ import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function2;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
 /**
  * Hoodie Index implementation backed by HBase
  */
 public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
-    private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
-    private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
-    private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
-    private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");
 
-    private static Logger logger = LogManager.getLogger(HBaseIndex.class);
+  private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
+  private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
+  private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
+  private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");
 
-    private final String tableName;
+  private static Logger logger = LogManager.getLogger(HBaseIndex.class);
 
-    public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-        this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  private final String tableName;
+
+  public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+    this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  }
+
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
+    throw new UnsupportedOperationException("HBase index does not implement check exist yet");
+  }
+
+  private static Connection hbaseConnection = null;
+
+  private Connection getHBaseConnection() {
+    Configuration hbaseConfig = HBaseConfiguration.create();
+    String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
+    hbaseConfig.set("hbase.zookeeper.quorum", quorum);
+    String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
+    hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
+    try {
+      return ConnectionFactory.createConnection(hbaseConfig);
+    } catch (IOException e) {
+      throw new HoodieDependentSystemUnavailableException(
+          HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
+    }
+  }
+
+  /**
+   * Function that tags each HoodieRecord with an existing location, if known.
+   */
+  class LocationTagFunction
+      implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
+
+    private final HoodieTable<T> hoodieTable;
+
+    LocationTagFunction(HoodieTable<T> hoodieTable) {
+      this.hoodieTable = hoodieTable;
     }
 
     @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-        JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
-        throw new UnsupportedOperationException("HBase index does not implement check exist yet");
-    }
-
-    private static Connection hbaseConnection = null;
-
-    private Connection getHBaseConnection() {
-        Configuration hbaseConfig = HBaseConfiguration.create();
-        String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
-        hbaseConfig.set("hbase.zookeeper.quorum", quorum);
-        String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
-        hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
-        try {
-            return ConnectionFactory.createConnection(hbaseConfig);
-        } catch (IOException e) {
-            throw new HoodieDependentSystemUnavailableException(
-                HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
+    public Iterator<HoodieRecord<T>> call(Integer partitionNum,
+        Iterator<HoodieRecord<T>> hoodieRecordIterator) {
+      // Grab the global HBase connection
+      synchronized (HBaseIndex.class) {
+        if (hbaseConnection == null) {
+          hbaseConnection = getHBaseConnection();
         }
-    }
+      }
+      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      HTable hTable = null;
+      try {
+        hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
+        // Do the tagging.
+        while (hoodieRecordIterator.hasNext()) {
+          HoodieRecord rec = hoodieRecordIterator.next();
+          // TODO(vc): This may need to be a multi get.
+          Result result = hTable.get(
+              new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
 
-    /**
-     * Function that tags each HoodieRecord with an existing location, if known.
-     */
-    class LocationTagFunction
-            implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
+          // first, attempt to grab location from HBase
+          if (result.getRow() != null) {
+            String commitTs =
+                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
+            String fileId =
+                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
 
-        private final HoodieTable<T> hoodieTable;
-
-        LocationTagFunction(HoodieTable<T> hoodieTable) {
-            this.hoodieTable = hoodieTable;
+            HoodieTimeline commitTimeline = hoodieTable.getCompletedCommitTimeline();
+            // if the last commit ts for this row is less than the system commit ts
+            if (!commitTimeline.empty() && commitTimeline.containsInstant(
+                new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
+              rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
+            }
+          }
+          taggedRecords.add(rec);
+        }
+      } catch (IOException e) {
+        throw new HoodieIndexException(
+            "Failed to Tag indexed locations because of exception with HBase Client", e);
+      } finally {
+        if (hTable != null) {
+          try {
+            hTable.close();
+          } catch (IOException e) {
+            // Ignore
+          }
         }
 
-        @Override
-        public Iterator<HoodieRecord<T>> call(Integer partitionNum,
-                                           Iterator<HoodieRecord<T>> hoodieRecordIterator) {
-            // Grab the global HBase connection
-            synchronized (HBaseIndex.class) {
-                if (hbaseConnection == null) {
-                    hbaseConnection = getHBaseConnection();
-                }
-            }
-            List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
-            HTable hTable = null;
-            try {
-                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
-                // Do the tagging.
-                while (hoodieRecordIterator.hasNext()) {
-                    HoodieRecord rec = hoodieRecordIterator.next();
-                    // TODO(vc): This may need to be a multi get.
-                    Result result = hTable.get(
-                            new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
+      }
+      return taggedRecords.iterator();
+    }
+  }
 
-                    // first, attempt to grab location from HBase
-                    if (result.getRow() != null) {
-                        String commitTs =
-                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
-                        String fileId =
-                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) {
+    return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(hoodieTable), true);
+  }
 
-                        HoodieTimeline commitTimeline = hoodieTable.getCompletedCommitTimeline();
-                        // if the last commit ts for this row is less than the system commit ts
-                        if (!commitTimeline.empty() && commitTimeline.containsInstant(
-                            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
-                            rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
-                        }
-                    }
-                    taggedRecords.add(rec);
-                }
-            } catch (IOException e) {
-                throw new HoodieIndexException(
-                    "Failed to Tag indexed locations because of exception with HBase Client", e);
-            }
+  class UpdateLocationTask implements
+      Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> {
 
-            finally {
-                if (hTable != null) {
-                    try {
-                        hTable.close();
-                    } catch (IOException e) {
-                        // Ignore
-                    }
-                }
+    @Override
+    public Iterator<WriteStatus> call(Integer partition, Iterator<WriteStatus> statusIterator) {
 
-            }
-            return taggedRecords.iterator();
+      List<WriteStatus> writeStatusList = new ArrayList<>();
+      // Grab the global HBase connection
+      synchronized (HBaseIndex.class) {
+        if (hbaseConnection == null) {
+          hbaseConnection = getHBaseConnection();
         }
-    }
-
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, HoodieTable<T> hoodieTable) {
-        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(hoodieTable), true);
-    }
-
-    class UpdateLocationTask implements Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> {
-        @Override
-        public Iterator<WriteStatus> call(Integer partition, Iterator<WriteStatus> statusIterator) {
-
-            List<WriteStatus> writeStatusList = new ArrayList<>();
-            // Grab the global HBase connection
-            synchronized (HBaseIndex.class) {
-                if (hbaseConnection == null) {
-                    hbaseConnection = getHBaseConnection();
+      }
+      HTable hTable = null;
+      try {
+        hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
+        while (statusIterator.hasNext()) {
+          WriteStatus writeStatus = statusIterator.next();
+          List<Put> puts = new ArrayList<>();
+          List<Delete> deletes = new ArrayList<>();
+          try {
+            for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
+              if (!writeStatus.isErrored(rec.getKey())) {
+                java.util.Optional<HoodieRecordLocation> loc = rec.getNewLocation();
+                if (loc.isPresent()) {
+                  Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
+                      Bytes.toBytes(loc.get().getCommitTime()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
+                      Bytes.toBytes(loc.get().getFileId()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
+                      Bytes.toBytes(rec.getPartitionPath()));
+                  puts.add(put);
+                } else {
+                  //Delete existing index for a deleted record
+                  Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
+                  deletes.add(delete);
                 }
+              }
             }
-            HTable hTable = null;
-            try {
-                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
-                while (statusIterator.hasNext()) {
-                    WriteStatus writeStatus = statusIterator.next();
-                    List<Put> puts = new ArrayList<>();
-                    List<Delete> deletes = new ArrayList<>();
-                    try {
-                        for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
-                            if (!writeStatus.isErrored(rec.getKey())) {
-                                java.util.Optional<HoodieRecordLocation> loc = rec.getNewLocation();
-                                if(loc.isPresent()) {
-                                    Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
-                                            Bytes.toBytes(loc.get().getCommitTime()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
-                                            Bytes.toBytes(loc.get().getFileId()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
-                                            Bytes.toBytes(rec.getPartitionPath()));
-                                    puts.add(put);
-                                } else {
-                                    //Delete existing index for a deleted record
-                                    Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
-                                    deletes.add(delete);
-                                }
-                            }
-                        }
-                        hTable.put(puts);
-                        hTable.delete(deletes);
-                        hTable.flushCommits();
-                    } catch (Exception e) {
-                        Exception we = new Exception("Error updating index for " + writeStatus, e);
-                        logger.error(we);
-                        writeStatus.setGlobalError(we);
-                    }
-                    writeStatusList.add(writeStatus);
-                }
-            } catch (IOException e) {
-                throw new HoodieIndexException(
-                    "Failed to Update Index locations because of exception with HBase Client", e);
-            } finally {
-                if (hTable != null) {
-                    try {
-                        hTable.close();
-                    } catch (IOException e) {
-                        // Ignore
-                    }
-                }
-            }
-            return writeStatusList.iterator();
+            hTable.put(puts);
+            hTable.delete(deletes);
+            hTable.flushCommits();
+          } catch (Exception e) {
+            Exception we = new Exception("Error updating index for " + writeStatus, e);
+            logger.error(we);
+            writeStatus.setGlobalError(we);
+          }
+          writeStatusList.add(writeStatus);
         }
+      } catch (IOException e) {
+        throw new HoodieIndexException(
+            "Failed to Update Index locations because of exception with HBase Client", e);
+      } finally {
+        if (hTable != null) {
+          try {
+            hTable.close();
+          } catch (IOException e) {
+            // Ignore
+          }
+        }
+      }
+      return writeStatusList.iterator();
     }
+  }
 
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-        HoodieTable<T> hoodieTable) {
-        return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
+  }
 
-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // Can't really rollback here. HBase only can let you go from recordKey to fileID,
-        // not the other way around
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // Can't really rollback here. HBase only can let you go from recordKey to fileID,
+    // not the other way around
+    return true;
+  }
 
-    /**
-     * Only looks up by recordKey
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return true;
-    }
+  /**
+   * Only looks up by recordKey
+   */
+  @Override
+  public boolean isGlobal() {
+    return true;
+  }
 
-    /**
-     * Mapping is available in HBase already.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Mapping is available in HBase already.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }
 
-    /**
-     * Index needs to be explicitly updated after storage write.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return false;
-    }
+  /**
+   * Index needs to be explicitly updated after storage write.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return false;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java
index 7e4d106ec..683c6a75e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java
@@ -36,13 +36,6 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieAppendException;
 import com.uber.hoodie.exception.HoodieUpsertException;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.TaskContext;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -50,155 +43,161 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.TaskContext;
 
 /**
  * IO Operation to append data onto an existing file.
- *
- * @param <T>
  */
 public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
-    private static AtomicLong recordIndex = new AtomicLong(1);
 
-    private final WriteStatus writeStatus;
-    private final String fileId;
-    private String partitionPath;
-    private List<HoodieRecord<T>> records;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
-    private HoodieLogFile currentLogFile;
-    private Writer writer;
+  private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
+  private static AtomicLong recordIndex = new AtomicLong(1);
 
-    public HoodieAppendHandle(HoodieWriteConfig config,
-                              String commitTime,
-                              HoodieTable<T> hoodieTable,
-                              String fileId,
-                              Iterator<HoodieRecord<T>> recordItr) {
-        super(config, commitTime, hoodieTable);
-        WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        writeStatus.setStat(new HoodieDeltaWriteStat());
-        this.writeStatus = writeStatus;
-        this.fileId = fileId;
-        init(recordItr);
-    }
+  private final WriteStatus writeStatus;
+  private final String fileId;
+  private String partitionPath;
+  private List<HoodieRecord<T>> records;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;
+  private HoodieLogFile currentLogFile;
+  private Writer writer;
 
-    private void init(Iterator<HoodieRecord<T>> recordItr) {
-        List<HoodieRecord<T>> records = Lists.newArrayList();
-        recordItr.forEachRemaining(record -> {
-            records.add(record);
-            // extract some information from the first record
-            if (partitionPath == null) {
-                partitionPath = record.getPartitionPath();
-                // HACK(vc) This also assumes a base file. It will break, if appending without one.
-                String latestValidFilePath =
-                    fileSystemView.getLatestDataFiles(record.getPartitionPath())
-                            .filter(dataFile -> dataFile.getFileId().equals(fileId))
-                            .findFirst().get().getFileName();
-                String baseCommitTime = FSUtils.getCommitTime(latestValidFilePath);
-                writeStatus.getStat().setPrevCommit(baseCommitTime);
-                writeStatus.setFileId(fileId);
-                writeStatus.setPartitionPath(record.getPartitionPath());
-                writeStatus.getStat().setFileId(fileId);
+  public HoodieAppendHandle(HoodieWriteConfig config,
+      String commitTime,
+      HoodieTable<T> hoodieTable,
+      String fileId,
+      Iterator<HoodieRecord<T>> recordItr) {
+    super(config, commitTime, hoodieTable);
+    WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    writeStatus.setStat(new HoodieDeltaWriteStat());
+    this.writeStatus = writeStatus;
+    this.fileId = fileId;
+    init(recordItr);
+  }
 
-                try {
-                    this.writer = HoodieLogFormat.newWriterBuilder()
-                        .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
-                        .withFileId(fileId).overBaseCommit(baseCommitTime)
-                        .withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
-                    this.currentLogFile = writer.getLogFile();
-                    ((HoodieDeltaWriteStat) writeStatus.getStat())
-                        .setLogVersion(currentLogFile.getLogVersion());
-                    ((HoodieDeltaWriteStat) writeStatus.getStat())
-                        .setLogOffset(writer.getCurrentSize());
-                } catch (Exception e) {
-                    logger.error("Error in update task at commit " + commitTime, e);
-                    writeStatus.setGlobalError(e);
-                    throw new HoodieUpsertException(
-                        "Failed to initialize HoodieUpdateHandle for FileId: " + fileId
-                            + " on commit " + commitTime + " on HDFS path " + hoodieTable
-                            .getMetaClient().getBasePath() + partitionPath, e);
-                }
-                Path path = new Path(record.getPartitionPath(),
-                        FSUtils.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId));
-                writeStatus.getStat().setPath(path.toString());
-            }
-            // update the new location of the record, so we know where to find it next
-            record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
-        });
-        this.records = records;
-    }
+  private void init(Iterator<HoodieRecord<T>> recordItr) {
+    List<HoodieRecord<T>> records = Lists.newArrayList();
+    recordItr.forEachRemaining(record -> {
+      records.add(record);
+      // extract some information from the first record
+      if (partitionPath == null) {
+        partitionPath = record.getPartitionPath();
+        // HACK(vc) This also assumes a base file. It will break, if appending without one.
+        String latestValidFilePath =
+            fileSystemView.getLatestDataFiles(record.getPartitionPath())
+                .filter(dataFile -> dataFile.getFileId().equals(fileId))
+                .findFirst().get().getFileName();
+        String baseCommitTime = FSUtils.getCommitTime(latestValidFilePath);
+        writeStatus.getStat().setPrevCommit(baseCommitTime);
+        writeStatus.setFileId(fileId);
+        writeStatus.setPartitionPath(record.getPartitionPath());
+        writeStatus.getStat().setFileId(fileId);
 
-    private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
-        Optional recordMetadata = hoodieRecord.getData().getMetadata();
         try {
-            Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
-
-            if(avroRecord.isPresent()) {
-                String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
-                        recordIndex.getAndIncrement());
-                HoodieAvroUtils
-                        .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
-                                hoodieRecord.getPartitionPath(), fileId);
-                HoodieAvroUtils
-                        .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), commitTime, seqId);
-                recordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
-
-            hoodieRecord.deflate();
-            writeStatus.markSuccess(hoodieRecord, recordMetadata);
-            return avroRecord;
+          this.writer = HoodieLogFormat.newWriterBuilder()
+              .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
+              .withFileId(fileId).overBaseCommit(baseCommitTime)
+              .withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+          this.currentLogFile = writer.getLogFile();
+          ((HoodieDeltaWriteStat) writeStatus.getStat())
+              .setLogVersion(currentLogFile.getLogVersion());
+          ((HoodieDeltaWriteStat) writeStatus.getStat())
+              .setLogOffset(writer.getCurrentSize());
         } catch (Exception e) {
-            logger.error("Error writing record  " + hoodieRecord, e);
-            writeStatus.markFailure(hoodieRecord, e, recordMetadata);
+          logger.error("Error in update task at commit " + commitTime, e);
+          writeStatus.setGlobalError(e);
+          throw new HoodieUpsertException(
+              "Failed to initialize HoodieUpdateHandle for FileId: " + fileId
+                  + " on commit " + commitTime + " on HDFS path " + hoodieTable
+                  .getMetaClient().getBasePath() + partitionPath, e);
         }
-        return Optional.empty();
-    }
+        Path path = new Path(record.getPartitionPath(),
+            FSUtils.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId));
+        writeStatus.getStat().setPath(path.toString());
+      }
+      // update the new location of the record, so we know where to find it next
+      record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
+    });
+    this.records = records;
+  }
 
-    public void doAppend() {
+  private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
+    Optional recordMetadata = hoodieRecord.getData().getMetadata();
+    try {
+      Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
 
-        List<IndexedRecord> recordList = new ArrayList<>();
-        List<String> keysToDelete = new ArrayList<>();
-        Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-        metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, commitTime);
-        records.stream().forEach(record -> {
-            Optional<IndexedRecord> indexedRecord = getIndexedRecord(record);
-            if(indexedRecord.isPresent()) {
-                recordList.add(indexedRecord.get());
-            } else {
-                keysToDelete.add(record.getRecordKey());
-            }
-        });
-        try {
-            if(recordList.size() > 0) {
-                writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema, metadata));
-            }
-            if(keysToDelete.size() > 0) {
-                writer = writer.appendBlock(new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), metadata));
-            }
-        } catch (Exception e) {
-            throw new HoodieAppendException(
-                "Failed while appeding records to " + currentLogFile.getPath(), e);
-        }
-    }
+      if (avroRecord.isPresent()) {
+        String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
+            recordIndex.getAndIncrement());
+        HoodieAvroUtils
+            .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
+                hoodieRecord.getPartitionPath(), fileId);
+        HoodieAvroUtils
+            .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), commitTime, seqId);
+        recordsWritten++;
+      } else {
+        recordsDeleted++;
+      }
 
-    public void close() {
-        try {
-            if (writer != null) {
-                writer.close();
-            }
-            writeStatus.getStat().setNumWrites(recordsWritten);
-            writeStatus.getStat().setNumDeletes(recordsDeleted);
-            writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
-        } catch (IOException e) {
-            throw new HoodieUpsertException("Failed to close UpdateHandle", e);
-        }
+      hoodieRecord.deflate();
+      writeStatus.markSuccess(hoodieRecord, recordMetadata);
+      return avroRecord;
+    } catch (Exception e) {
+      logger.error("Error writing record  " + hoodieRecord, e);
+      writeStatus.markFailure(hoodieRecord, e, recordMetadata);
     }
+    return Optional.empty();
+  }
 
-    public WriteStatus getWriteStatus() {
-        return writeStatus;
+  public void doAppend() {
+
+    List<IndexedRecord> recordList = new ArrayList<>();
+    List<String> keysToDelete = new ArrayList<>();
+    Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+    metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, commitTime);
+    records.stream().forEach(record -> {
+      Optional<IndexedRecord> indexedRecord = getIndexedRecord(record);
+      if (indexedRecord.isPresent()) {
+        recordList.add(indexedRecord.get());
+      } else {
+        keysToDelete.add(record.getRecordKey());
+      }
+    });
+    try {
+      if (recordList.size() > 0) {
+        writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema, metadata));
+      }
+      if (keysToDelete.size() > 0) {
+        writer = writer.appendBlock(
+            new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), metadata));
+      }
+    } catch (Exception e) {
+      throw new HoodieAppendException(
+          "Failed while appeding records to " + currentLogFile.getPath(), e);
     }
+  }
+
+  public void close() {
+    try {
+      if (writer != null) {
+        writer.close();
+      }
+      writeStatus.getStat().setNumWrites(recordsWritten);
+      writeStatus.getStat().setNumDeletes(recordsDeleted);
+      writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
+    } catch (IOException e) {
+      throw new HoodieUpsertException("Failed to close UpdateHandle", e);
+    }
+  }
+
+  public WriteStatus getWriteStatus() {
+    return writeStatus;
+  }
 
 
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java
index 410dea8e3..086b87f89 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java
@@ -27,226 +27,212 @@ import com.uber.hoodie.common.table.TableFileSystemView;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
  * Cleaner is responsible for garbage collecting older files in a given partition path, such that
- * <p>
- * 1) It provides sufficient time for existing queries running on older versions, to finish
- * <p>
- * 2) It bounds the growth of the files in the file system
- * <p>
- * TODO: Should all cleaning be done based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
+ * <p> 1) It provides sufficient time for existing queries running on older versions, to finish <p>
+ * 2) It bounds the growth of the files in the file system <p> TODO: Should all cleaning be done
+ * based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
  */
 public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
-    private static Logger logger = LogManager.getLogger(HoodieCleanHelper.class);
 
-    private final TableFileSystemView fileSystemView;
-    private final HoodieTimeline commitTimeline;
-    private HoodieTable<T> hoodieTable;
-    private HoodieWriteConfig config;
-    private FileSystem fs;
+  private static Logger logger = LogManager.getLogger(HoodieCleanHelper.class);
 
-    public HoodieCleanHelper(HoodieTable<T> hoodieTable, HoodieWriteConfig config) {
-        this.hoodieTable = hoodieTable;
-        this.fileSystemView = hoodieTable.getCompletedFileSystemView();
-        this.commitTimeline = hoodieTable.getCompletedCommitTimeline();
-        this.config = config;
-        this.fs = hoodieTable.getFs();
+  private final TableFileSystemView fileSystemView;
+  private final HoodieTimeline commitTimeline;
+  private HoodieTable<T> hoodieTable;
+  private HoodieWriteConfig config;
+  private FileSystem fs;
+
+  public HoodieCleanHelper(HoodieTable<T> hoodieTable, HoodieWriteConfig config) {
+    this.hoodieTable = hoodieTable;
+    this.fileSystemView = hoodieTable.getCompletedFileSystemView();
+    this.commitTimeline = hoodieTable.getCompletedCommitTimeline();
+    this.config = config;
+    this.fs = hoodieTable.getFs();
+  }
+
+
+  /**
+   * Selects the older versions of files for cleaning, such that it bounds the number of versions of
+   * each file. This policy is useful, if you are simply interested in querying the table, and you
+   * don't want too many versions for a single file (i.e run it with versionsRetained = 1)
+   */
+  private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
+      throws IOException {
+    logger.info("Cleaning " + partitionPath + ", retaining latest " + config
+        .getCleanerFileVersionsRetained() + " file versions. ");
+    List<HoodieFileGroup> fileGroups =
+        fileSystemView.getAllFileGroups(partitionPath)
+            .collect(Collectors.toList());
+    List<String> deletePaths = new ArrayList<>();
+    // Collect all the datafiles savepointed by all the savepoints
+    List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
+        .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+
+    for (HoodieFileGroup fileGroup : fileGroups) {
+      int keepVersions = config.getCleanerFileVersionsRetained();
+      Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices().iterator();
+      while (fileSliceIterator.hasNext() && keepVersions > 0) {
+        // Skip this most recent version
+        FileSlice nextSlice = fileSliceIterator.next();
+        HoodieDataFile dataFile = nextSlice.getDataFile().get();
+        if (savepointedFiles.contains(dataFile.getFileName())) {
+          // do not clean up a savepoint data file
+          continue;
+        }
+        keepVersions--;
+      }
+      // Delete the remaining files
+      while (fileSliceIterator.hasNext()) {
+        FileSlice nextSlice = fileSliceIterator.next();
+        HoodieDataFile dataFile = nextSlice.getDataFile().get();
+        deletePaths.add(dataFile.getFileStatus().getPath().toString());
+        if (hoodieTable.getMetaClient().getTableType()
+            == HoodieTableType.MERGE_ON_READ) {
+          // If merge on read, then clean the log files for the commits as well
+          deletePaths.addAll(nextSlice.getLogFiles()
+              .map(file -> file.getPath().toString())
+              .collect(Collectors.toList()));
+        }
+      }
     }
+    return deletePaths;
+  }
 
 
-    /**
-     * Selects the older versions of files for cleaning, such that it bounds the number of versions of each file.
-     * This policy is useful, if you are simply interested in querying the table, and you don't want too many
-     * versions for a single file (i.e run it with versionsRetained = 1)
-     *
-     * @param partitionPath
-     * @return
-     * @throws IOException
-     */
-    private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
-        throws IOException {
-        logger.info("Cleaning " + partitionPath + ", retaining latest " + config
-            .getCleanerFileVersionsRetained() + " file versions. ");
-        List<HoodieFileGroup> fileGroups =
-            fileSystemView.getAllFileGroups(partitionPath)
-                .collect(Collectors.toList());
-        List<String> deletePaths = new ArrayList<>();
-        // Collect all the datafiles savepointed by all the savepoints
-        List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
-            .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+  /**
+   * Selects the versions for file for cleaning, such that it <p> - Leaves the latest version of the
+   * file untouched - For older versions, - It leaves all the commits untouched which has occured in
+   * last <code>config.getCleanerCommitsRetained()</code> commits - It leaves ONE commit before this
+   * window. We assume that the max(query execution time) == commit_batch_time *
+   * config.getCleanerCommitsRetained(). This is 12 hours by default. This is essential to leave the
+   * file used by the query thats running for the max time. <p> This provides the effect of having
+   * lookback into all changes that happened in the last X commits. (eg: if you retain 24 commits,
+   * and commit batch time is 30 mins, then you have 12 hrs of lookback) <p> This policy is the
+   * default.
+   */
+  private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath)
+      throws IOException {
+    int commitsRetained = config.getCleanerCommitsRetained();
+    logger.info(
+        "Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
+    List<String> deletePaths = new ArrayList<>();
 
-        for (HoodieFileGroup fileGroup : fileGroups) {
-            int keepVersions = config.getCleanerFileVersionsRetained();
-            Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices().iterator();
-            while (fileSliceIterator.hasNext() && keepVersions > 0) {
-                // Skip this most recent version
-                FileSlice nextSlice = fileSliceIterator.next();
-                HoodieDataFile dataFile = nextSlice.getDataFile().get();
-                if(savepointedFiles.contains(dataFile.getFileName())) {
-                    // do not clean up a savepoint data file
-                    continue;
-                }
-                keepVersions--;
-            }
-            // Delete the remaining files
-            while (fileSliceIterator.hasNext()) {
-                FileSlice nextSlice = fileSliceIterator.next();
-                HoodieDataFile dataFile = nextSlice.getDataFile().get();
-                deletePaths.add(dataFile.getFileStatus().getPath().toString());
-                if (hoodieTable.getMetaClient().getTableType()
-                    == HoodieTableType.MERGE_ON_READ) {
-                    // If merge on read, then clean the log files for the commits as well
-                    deletePaths.addAll(nextSlice.getLogFiles()
-                            .map(file -> file.getPath().toString())
-                            .collect(Collectors.toList()));
-                }
+    // Collect all the datafiles savepointed by all the savepoints
+    List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
+        .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+
+    // determine if we have enough commits, to start cleaning.
+    if (commitTimeline.countInstants() > commitsRetained) {
+      HoodieInstant earliestCommitToRetain = getEarliestCommitToRetain().get();
+      List<HoodieFileGroup> fileGroups =
+          fileSystemView.getAllFileGroups(partitionPath)
+              .collect(Collectors.toList());
+      for (HoodieFileGroup fileGroup : fileGroups) {
+        List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
+        HoodieDataFile dataFile = fileSliceList.get(0).getDataFile().get();
+        String lastVersion = dataFile.getCommitTime();
+        String lastVersionBeforeEarliestCommitToRetain =
+            getLatestVersionBeforeCommit(fileSliceList, earliestCommitToRetain);
+
+        // Ensure there are more than 1 version of the file (we only clean old files from updates)
+        // i.e always spare the last commit.
+        for (FileSlice aSlice : fileSliceList) {
+          HoodieDataFile aFile = aSlice.getDataFile().get();
+          String fileCommitTime = aFile.getCommitTime();
+          if (savepointedFiles.contains(aFile.getFileName())) {
+            // do not clean up a savepoint data file
+            continue;
+          }
+          // Dont delete the latest commit and also the last commit before the earliest commit we are retaining
+          // The window of commit retain == max query run time. So a query could be running which still
+          // uses this file.
+          if (fileCommitTime.equals(lastVersion) || (
+              lastVersionBeforeEarliestCommitToRetain != null && fileCommitTime
+                  .equals(lastVersionBeforeEarliestCommitToRetain))) {
+            // move on to the next file
+            continue;
+          }
+
+          // Always keep the last commit
+          if (HoodieTimeline.compareTimestamps(
+              earliestCommitToRetain.getTimestamp(),
+              fileCommitTime,
+              HoodieTimeline.GREATER)) {
+            // this is a commit, that should be cleaned.
+            deletePaths.add(aFile.getFileStatus().getPath().toString());
+            if (hoodieTable.getMetaClient().getTableType()
+                == HoodieTableType.MERGE_ON_READ) {
+              // If merge on read, then clean the log files for the commits as well
+              deletePaths.addAll(aSlice.getLogFiles()
+                  .map(file -> file.getPath().toString())
+                  .collect(Collectors.toList()));
             }
+          }
         }
-        return deletePaths;
+      }
     }
 
+    return deletePaths;
+  }
 
-    /**
-     * Selects the versions for file for cleaning, such that it
-     * <p>
-     * - Leaves the latest version of the file untouched
-     * - For older versions,
-     * - It leaves all the commits untouched which has occured in last <code>config.getCleanerCommitsRetained()</code> commits
-     * - It leaves ONE commit before this window. We assume that the max(query execution time) == commit_batch_time *  config.getCleanerCommitsRetained(). This is 12 hours by default.
-     * This is essential to leave the file used by the query thats running for the max time.
-     * <p>
-     * This provides the effect of having lookback into all changes that happened in the last X
-     * commits. (eg: if you retain 24 commits, and commit batch time is 30 mins, then you have 12 hrs of lookback)
-     * <p>
-     * This policy is the default.
-     *
-     * @param partitionPath
-     * @return
-     * @throws IOException
-     */
-    private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath)
-        throws IOException {
-        int commitsRetained = config.getCleanerCommitsRetained();
-        logger.info(
-            "Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
-        List<String> deletePaths = new ArrayList<>();
-
-        // Collect all the datafiles savepointed by all the savepoints
-        List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
-            .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
-
-        // determine if we have enough commits, to start cleaning.
-        if (commitTimeline.countInstants() > commitsRetained) {
-            HoodieInstant earliestCommitToRetain = getEarliestCommitToRetain().get();
-            List<HoodieFileGroup> fileGroups =
-                fileSystemView.getAllFileGroups(partitionPath)
-                    .collect(Collectors.toList());
-            for (HoodieFileGroup fileGroup : fileGroups) {
-                List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
-                HoodieDataFile dataFile = fileSliceList.get(0).getDataFile().get();
-                String lastVersion = dataFile.getCommitTime();
-                String lastVersionBeforeEarliestCommitToRetain =
-                    getLatestVersionBeforeCommit(fileSliceList, earliestCommitToRetain);
-
-                // Ensure there are more than 1 version of the file (we only clean old files from updates)
-                // i.e always spare the last commit.
-                for (FileSlice aSlice : fileSliceList) {
-                    HoodieDataFile aFile = aSlice.getDataFile().get();
-                    String fileCommitTime = aFile.getCommitTime();
-                    if(savepointedFiles.contains(aFile.getFileName())) {
-                        // do not clean up a savepoint data file
-                        continue;
-                    }
-                    // Dont delete the latest commit and also the last commit before the earliest commit we are retaining
-                    // The window of commit retain == max query run time. So a query could be running which still
-                    // uses this file.
-                    if (fileCommitTime.equals(lastVersion) || (
-                        lastVersionBeforeEarliestCommitToRetain != null && fileCommitTime
-                            .equals(lastVersionBeforeEarliestCommitToRetain))) {
-                        // move on to the next file
-                        continue;
-                    }
-
-                    // Always keep the last commit
-                    if (HoodieTimeline.compareTimestamps(
-                            earliestCommitToRetain.getTimestamp(),
-                            fileCommitTime,
-                            HoodieTimeline.GREATER)) {
-                        // this is a commit, that should be cleaned.
-                        deletePaths.add(aFile.getFileStatus().getPath().toString());
-                        if (hoodieTable.getMetaClient().getTableType()
-                            == HoodieTableType.MERGE_ON_READ) {
-                            // If merge on read, then clean the log files for the commits as well
-                            deletePaths.addAll(aSlice.getLogFiles()
-                                    .map(file -> file.getPath().toString())
-                                    .collect(Collectors.toList()));
-                        }
-                    }
-                }
-            }
-        }
-
-        return deletePaths;
+  /**
+   * Gets the latest version < commitTime. This version file could still be used by queries.
+   */
+  private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
+      HoodieInstant commitTime) {
+    for (FileSlice file : fileSliceList) {
+      String fileCommitTime = file.getDataFile().get().getCommitTime();
+      if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime,
+          HoodieTimeline.GREATER)) {
+        // fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
+        return fileCommitTime;
+      }
     }
+    // There is no version of this file which is <= commitTime
+    return null;
+  }
 
-    /**
-     * Gets the latest version < commitTime. This version file could still be used by queries.
-     */
-    private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
-        HoodieInstant commitTime) {
-        for (FileSlice file : fileSliceList) {
-            String fileCommitTime = file.getDataFile().get().getCommitTime();
-            if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime,
-                HoodieTimeline.GREATER)) {
-                // fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
-                return fileCommitTime;
-            }
-        }
-        // There is no version of this file which is <= commitTime
-        return null;
+  /**
+   * Returns files to be cleaned for the given partitionPath based on cleaning policy.
+   */
+  public List<String> getDeletePaths(String partitionPath) throws IOException {
+    HoodieCleaningPolicy policy = config.getCleanerPolicy();
+    List<String> deletePaths;
+    if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
+      deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
+    } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+      deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
+    } else {
+      throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
     }
+    logger.info(
+        deletePaths.size() + " patterns used to delete in partition path:" + partitionPath);
 
-    /**
-     * Returns files to be cleaned for the given partitionPath based on cleaning policy.
-     */
-    public List<String> getDeletePaths(String partitionPath) throws IOException {
-        HoodieCleaningPolicy policy = config.getCleanerPolicy();
-        List<String> deletePaths;
-        if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
-            deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
-        } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
-            deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
-        } else {
-            throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
-        }
-        logger.info(
-            deletePaths.size() + " patterns used to delete in partition path:" + partitionPath);
+    return deletePaths;
+  }
 
-        return deletePaths;
-    }
-
-    /**
-     * Returns earliest commit to retain based on cleaning policy.
-     */
-    public Optional<HoodieInstant> getEarliestCommitToRetain() {
-        Optional<HoodieInstant> earliestCommitToRetain = Optional.empty();
-        int commitsRetained = config.getCleanerCommitsRetained();
-        if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
-            && commitTimeline.countInstants() > commitsRetained) {
-            earliestCommitToRetain =
-                commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
-        }
-        return earliestCommitToRetain;
+  /**
+   * Returns earliest commit to retain based on cleaning policy.
+   */
+  public Optional<HoodieInstant> getEarliestCommitToRetain() {
+    Optional<HoodieInstant> earliestCommitToRetain = Optional.empty();
+    int commitsRetained = config.getCleanerCommitsRetained();
+    if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
+        && commitTimeline.countInstants() > commitsRetained) {
+      earliestCommitToRetain =
+          commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
     }
+    return earliestCommitToRetain;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java
index 18279a498..a707590c4 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java
@@ -39,6 +39,12 @@ import com.uber.hoodie.exception.HoodieCommitException;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
@@ -46,225 +52,233 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
 /**
  * Archiver to bound the growth of <action>.commit files
  */
 public class HoodieCommitArchiveLog {
-    private static Logger log = LogManager.getLogger(HoodieCommitArchiveLog.class);
 
-    private final Path archiveFilePath;
-    private final FileSystem fs;
-    private final HoodieWriteConfig config;
-    private HoodieLogFormat.Writer writer;
+  private static Logger log = LogManager.getLogger(HoodieCommitArchiveLog.class);
 
-    public HoodieCommitArchiveLog(HoodieWriteConfig config, FileSystem fs) {
-        this.fs = fs;
-        this.config = config;
-        this.archiveFilePath = HoodieArchivedTimeline
-            .getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
+  private final Path archiveFilePath;
+  private final FileSystem fs;
+  private final HoodieWriteConfig config;
+  private HoodieLogFormat.Writer writer;
+
+  public HoodieCommitArchiveLog(HoodieWriteConfig config, FileSystem fs) {
+    this.fs = fs;
+    this.config = config;
+    this.archiveFilePath = HoodieArchivedTimeline
+        .getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
+  }
+
+  private HoodieLogFormat.Writer openWriter() {
+    try {
+      if (this.writer == null) {
+        return HoodieLogFormat.newWriterBuilder()
+            .onParentPath(archiveFilePath.getParent())
+            .withFileId(archiveFilePath.getName())
+            .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION)
+            .withFs(fs)
+            .overBaseCommit("").build();
+      } else {
+        return this.writer;
+      }
+    } catch (InterruptedException | IOException e) {
+      throw new HoodieException("Unable to initialize HoodieLogFormat writer", e);
+    }
+  }
+
+  private void close() {
+    try {
+      if (this.writer != null) {
+        this.writer.close();
+      }
+    } catch (IOException e) {
+      throw new HoodieException("Unable to close HoodieLogFormat writer", e);
+    }
+  }
+
+  /**
+   * Check if commits need to be archived. If yes, archive commits.
+   */
+  public boolean archiveIfRequired() {
+    try {
+      List<HoodieInstant> instantsToArchive = getInstantsToArchive().collect(Collectors.toList());
+      boolean success = true;
+      if (instantsToArchive.iterator().hasNext()) {
+        this.writer = openWriter();
+        log.info("Archiving instants " + instantsToArchive);
+        archive(instantsToArchive);
+        success = deleteArchivedInstants(instantsToArchive);
+      } else {
+        log.info("No Instants to archive");
+      }
+      return success;
+    } finally {
+      close();
+    }
+  }
+
+  private Stream<HoodieInstant> getInstantsToArchive() {
+
+    // TODO : rename to max/minInstantsToKeep
+    int maxCommitsToKeep = config.getMaxCommitsToKeep();
+    int minCommitsToKeep = config.getMinCommitsToKeep();
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
+
+    // GroupBy each action and limit each action timeline to maxCommitsToKeep
+    HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline()
+        .getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION,
+            HoodieTimeline.ROLLBACK_ACTION));
+    Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
+        .collect(Collectors.groupingBy(s -> s.getAction()))
+        .entrySet()
+        .stream()
+        .map(i -> {
+          if (i.getValue().size() > maxCommitsToKeep) {
+            return i.getValue().subList(0, i.getValue().size() - minCommitsToKeep);
+          } else {
+            return new ArrayList<HoodieInstant>();
+          }
+        })
+        .flatMap(i -> i.stream());
+
+    //TODO (na) : Add a way to return actions associated with a timeline and then merge/unify with logic above to avoid Stream.concats
+    HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
+    // We cannot have any holes in the commit timeline. We cannot archive any commits which are made after the first savepoint present.
+    Optional<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
+    if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
+      // Actually do the commits
+      instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
+        // if no savepoint present, then dont filter
+        return !(firstSavepoint.isPresent() && HoodieTimeline
+            .compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
+                HoodieTimeline.LESSER_OR_EQUAL));
+      }).limit(commitTimeline.countInstants() - minCommitsToKeep));
     }
 
-    private HoodieLogFormat.Writer openWriter() {
-        try {
-            if(this.writer == null) {
-                return HoodieLogFormat.newWriterBuilder()
-                        .onParentPath(archiveFilePath.getParent())
-                        .withFileId(archiveFilePath.getName())
-                        .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION)
-                        .withFs(fs)
-                        .overBaseCommit("").build();
-            } else {
-                return this.writer;
-            }
-        } catch(InterruptedException | IOException e) {
-            throw new HoodieException("Unable to initialize HoodieLogFormat writer", e);
+    return instants;
+  }
+
+  private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) {
+    log.info("Deleting instants " + archivedInstants);
+    HoodieTableMetaClient metaClient =
+        new HoodieTableMetaClient(fs, config.getBasePath(), true);
+
+    boolean success = true;
+    for (HoodieInstant archivedInstant : archivedInstants) {
+      Path commitFile =
+          new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
+      try {
+        if (fs.exists(commitFile)) {
+          success &= fs.delete(commitFile, false);
+          log.info("Archived and deleted instant file " + commitFile);
         }
+      } catch (IOException e) {
+        throw new HoodieIOException("Failed to delete archived instant " + archivedInstant,
+            e);
+      }
     }
+    return success;
+  }
 
-    private void close() {
-        try {
-            if(this.writer != null) {
-                this.writer.close();
-            }
-        } catch(IOException e) {
-            throw new HoodieException("Unable to close HoodieLogFormat writer", e);
-        }
+  public void archive(List<HoodieInstant> instants) throws HoodieCommitException {
+
+    try {
+      HoodieTableMetaClient metaClient =
+          new HoodieTableMetaClient(fs, config.getBasePath(), true);
+      HoodieTimeline commitTimeline =
+          metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
+
+      Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
+      log.info("Wrapper schema " + wrapperSchema.toString());
+      List<IndexedRecord> records = new ArrayList<>();
+      for (HoodieInstant hoodieInstant : instants) {
+        records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
+      }
+      HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, wrapperSchema);
+      this.writer = writer.appendBlock(block);
+    } catch (Exception e) {
+      throw new HoodieCommitException("Failed to archive commits", e);
     }
+  }
 
-    /**
-     * Check if commits need to be archived. If yes, archive commits.
-     */
-    public boolean archiveIfRequired() {
-        try {
-            List<HoodieInstant> instantsToArchive = getInstantsToArchive().collect(Collectors.toList());
-            boolean success = true;
-            if (instantsToArchive.iterator().hasNext()) {
-                this.writer = openWriter();
-                log.info("Archiving instants " + instantsToArchive);
-                archive(instantsToArchive);
-                success = deleteArchivedInstants(instantsToArchive);
-            } else {
-                log.info("No Instants to archive");
-            }
-            return success;
-        } finally {
-            close();
-        }
+  public Path getArchiveFilePath() {
+    return archiveFilePath;
+  }
+
+  private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline,
+      HoodieInstant hoodieInstant) throws IOException {
+    HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
+    archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
+    switch (hoodieInstant.getAction()) {
+      case HoodieTimeline.CLEAN_ACTION: {
+        archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieCleanMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.clean.name());
+        break;
+      }
+      case HoodieTimeline.COMMIT_ACTION: {
+        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
+        archivedMetaWrapper.setActionType(ActionType.commit.name());
+        break;
+      }
+      case HoodieTimeline.COMPACTION_ACTION: {
+        com.uber.hoodie.common.model.HoodieCompactionMetadata compactionMetadata = com.uber.hoodie.common.model.HoodieCompactionMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper
+            .setHoodieCompactionMetadata(compactionMetadataConverter(compactionMetadata));
+        archivedMetaWrapper.setActionType(ActionType.compaction.name());
+        break;
+      }
+      case HoodieTimeline.ROLLBACK_ACTION: {
+        archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieRollbackMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.rollback.name());
+        break;
+      }
+      case HoodieTimeline.SAVEPOINT_ACTION: {
+        archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieSavepointMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.savepoint.name());
+        break;
+      }
+      case HoodieTimeline.DELTA_COMMIT_ACTION: {
+        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
+        archivedMetaWrapper.setActionType(ActionType.commit.name());
+        break;
+      }
     }
+    return archivedMetaWrapper;
+  }
 
-    private Stream<HoodieInstant> getInstantsToArchive() {
+  private com.uber.hoodie.avro.model.HoodieCommitMetadata commitMetadataConverter(
+      HoodieCommitMetadata hoodieCommitMetadata) {
+    ObjectMapper mapper = new ObjectMapper();
+    //Need this to ignore other public get() methods
+    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    com.uber.hoodie.avro.model.HoodieCommitMetadata avroMetaData =
+        mapper.convertValue(hoodieCommitMetadata,
+            com.uber.hoodie.avro.model.HoodieCommitMetadata.class);
+    return avroMetaData;
+  }
 
-        // TODO : rename to max/minInstantsToKeep
-        int maxCommitsToKeep = config.getMaxCommitsToKeep();
-        int minCommitsToKeep = config.getMinCommitsToKeep();
-
-        HoodieTable table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
-
-        // GroupBy each action and limit each action timeline to maxCommitsToKeep
-        HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline().getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION,
-                HoodieTimeline.ROLLBACK_ACTION));
-        Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
-            .collect(Collectors.groupingBy(s -> s.getAction()))
-            .entrySet()
-            .stream()
-            .map(i -> {
-                if (i.getValue().size() > maxCommitsToKeep) {
-                    return i.getValue().subList(0, i.getValue().size() - minCommitsToKeep);
-                } else {
-                    return new ArrayList<HoodieInstant>();
-                }
-            })
-            .flatMap(i -> i.stream());
-
-        //TODO (na) : Add a way to return actions associated with a timeline and then merge/unify with logic above to avoid Stream.concats
-        HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
-        // We cannot have any holes in the commit timeline. We cannot archive any commits which are made after the first savepoint present.
-        Optional<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
-        if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
-            // Actually do the commits
-            instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
-                // if no savepoint present, then dont filter
-                return !(firstSavepoint.isPresent() && HoodieTimeline
-                    .compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
-                        HoodieTimeline.LESSER_OR_EQUAL));
-            }).limit(commitTimeline.countInstants() - minCommitsToKeep));
-        }
-
-        return instants;
-    }
-
-    private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) {
-        log.info("Deleting instants " + archivedInstants);
-        HoodieTableMetaClient metaClient =
-                new HoodieTableMetaClient(fs, config.getBasePath(), true);
-
-        boolean success = true;
-        for (HoodieInstant archivedInstant : archivedInstants) {
-            Path commitFile =
-                new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
-            try {
-                if (fs.exists(commitFile)) {
-                    success &= fs.delete(commitFile, false);
-                    log.info("Archived and deleted instant file " + commitFile);
-                }
-            } catch (IOException e) {
-                throw new HoodieIOException("Failed to delete archived instant " + archivedInstant,
-                    e);
-            }
-        }
-        return success;
-    }
-
-    public void archive(List<HoodieInstant> instants) throws HoodieCommitException {
-
-        try {
-            HoodieTableMetaClient metaClient =
-                    new HoodieTableMetaClient(fs, config.getBasePath(), true);
-            HoodieTimeline commitTimeline =
-                    metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
-
-            Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
-            log.info("Wrapper schema " + wrapperSchema.toString());
-            List<IndexedRecord> records = new ArrayList<>();
-            for (HoodieInstant hoodieInstant : instants) {
-                records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
-            }
-            HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, wrapperSchema);
-            this.writer = writer.appendBlock(block);
-        } catch(Exception e) {
-            throw new HoodieCommitException("Failed to archive commits", e);
-        }
-    }
-
-    public Path getArchiveFilePath() {
-        return archiveFilePath;
-    }
-
-    private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline, HoodieInstant hoodieInstant) throws IOException {
-        HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
-        archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
-        switch(hoodieInstant.getAction()) {
-            case HoodieTimeline.CLEAN_ACTION:{
-                archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.clean.name());
-                break;
-            }
-            case HoodieTimeline.COMMIT_ACTION:{
-                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
-                archivedMetaWrapper.setActionType(ActionType.commit.name());
-                break;
-            }
-            case HoodieTimeline.COMPACTION_ACTION:{
-                com.uber.hoodie.common.model.HoodieCompactionMetadata compactionMetadata = com.uber.hoodie.common.model.HoodieCompactionMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCompactionMetadata(compactionMetadataConverter(compactionMetadata));
-                archivedMetaWrapper.setActionType(ActionType.compaction.name());
-                break;
-            }
-            case HoodieTimeline.ROLLBACK_ACTION:{
-                archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.rollback.name());
-                break;
-            }
-            case HoodieTimeline.SAVEPOINT_ACTION:{
-                archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.savepoint.name());
-                break;
-            }
-            case HoodieTimeline.DELTA_COMMIT_ACTION:{
-                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
-                archivedMetaWrapper.setActionType(ActionType.commit.name());
-                break;
-            }
-        }
-        return archivedMetaWrapper;
-    }
-
-    private com.uber.hoodie.avro.model.HoodieCommitMetadata commitMetadataConverter(HoodieCommitMetadata hoodieCommitMetadata) {
-        ObjectMapper mapper = new ObjectMapper();
-        //Need this to ignore other public get() methods
-        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        com.uber.hoodie.avro.model.HoodieCommitMetadata avroMetaData =
-                mapper.convertValue(hoodieCommitMetadata, com.uber.hoodie.avro.model.HoodieCommitMetadata.class);
-        return avroMetaData;
-    }
-
-    private com.uber.hoodie.avro.model.HoodieCompactionMetadata compactionMetadataConverter(HoodieCompactionMetadata hoodieCompactionMetadata) {
-         ObjectMapper mapper = new ObjectMapper();
-        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        com.uber.hoodie.avro.model.HoodieCompactionMetadata avroMetaData = mapper.convertValue(hoodieCompactionMetadata,
-                 com.uber.hoodie.avro.model.HoodieCompactionMetadata.class);
-        return avroMetaData;
-    }
+  private com.uber.hoodie.avro.model.HoodieCompactionMetadata compactionMetadataConverter(
+      HoodieCompactionMetadata hoodieCompactionMetadata) {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    com.uber.hoodie.avro.model.HoodieCompactionMetadata avroMetaData = mapper
+        .convertValue(hoodieCompactionMetadata,
+            com.uber.hoodie.avro.model.HoodieCompactionMetadata.class);
+    return avroMetaData;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
index c9680c8f2..629869e18 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
@@ -29,116 +29,111 @@ import com.uber.hoodie.exception.HoodieInsertException;
 import com.uber.hoodie.io.storage.HoodieStorageWriter;
 import com.uber.hoodie.io.storage.HoodieStorageWriterFactory;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.Optional;
+import java.util.UUID;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.TaskContext;
 
-import java.io.IOException;
-import java.util.Optional;
-import java.util.UUID;
-
 public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieCreateHandle.class);
 
-    private final WriteStatus status;
-    private final HoodieStorageWriter<IndexedRecord> storageWriter;
-    private final Path path;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
+  private static Logger logger = LogManager.getLogger(HoodieCreateHandle.class);
 
-    public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
-                              HoodieTable<T> hoodieTable, String partitionPath) {
-        super(config, commitTime, hoodieTable);
-        this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        status.setFileId(UUID.randomUUID().toString());
-        status.setPartitionPath(partitionPath);
+  private final WriteStatus status;
+  private final HoodieStorageWriter<IndexedRecord> storageWriter;
+  private final Path path;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;
 
-        this.path = makeNewPath(partitionPath, TaskContext.getPartitionId(), status.getFileId());
-        try {
-            HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
-                                       commitTime,
-                                       new Path(config.getBasePath()),
-                                       new Path(config.getBasePath(), partitionPath));
-            partitionMetadata.trySave(TaskContext.getPartitionId());
-            this.storageWriter =
-                HoodieStorageWriterFactory.getStorageWriter(commitTime, path, hoodieTable, config, schema);
-        } catch (IOException e) {
-            throw new HoodieInsertException(
-                "Failed to initialize HoodieStorageWriter for path " + path, e);
-        }
-        logger.info("New InsertHandle for partition :" + partitionPath);
+  public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
+      HoodieTable<T> hoodieTable, String partitionPath) {
+    super(config, commitTime, hoodieTable);
+    this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    status.setFileId(UUID.randomUUID().toString());
+    status.setPartitionPath(partitionPath);
+
+    this.path = makeNewPath(partitionPath, TaskContext.getPartitionId(), status.getFileId());
+    try {
+      HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
+          commitTime,
+          new Path(config.getBasePath()),
+          new Path(config.getBasePath(), partitionPath));
+      partitionMetadata.trySave(TaskContext.getPartitionId());
+      this.storageWriter =
+          HoodieStorageWriterFactory
+              .getStorageWriter(commitTime, path, hoodieTable, config, schema);
+    } catch (IOException e) {
+      throw new HoodieInsertException(
+          "Failed to initialize HoodieStorageWriter for path " + path, e);
     }
+    logger.info("New InsertHandle for partition :" + partitionPath);
+  }
 
-    /**
-     * Determines whether we can accept the incoming records, into the current file, depending on
-     *
-     * - Whether it belongs to the same partitionPath as existing records
-     * - Whether the current file written bytes lt max file size
-     *
-     * @return
-     */
-    public boolean canWrite(HoodieRecord record) {
-        return storageWriter.canWrite() && record.getPartitionPath()
-            .equals(status.getPartitionPath());
+  /**
+   * Determines whether we can accept the incoming records, into the current file, depending on
+   *
+   * - Whether it belongs to the same partitionPath as existing records - Whether the current file
+   * written bytes lt max file size
+   */
+  public boolean canWrite(HoodieRecord record) {
+    return storageWriter.canWrite() && record.getPartitionPath()
+        .equals(status.getPartitionPath());
+  }
+
+  /**
+   * Perform the actual writing of the given record into the backing file.
+   */
+  public void write(HoodieRecord record) {
+    Optional recordMetadata = record.getData().getMetadata();
+    try {
+      Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
+
+      if (avroRecord.isPresent()) {
+        storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
+        // update the new location of record, so we know where to find it next
+        record.setNewLocation(new HoodieRecordLocation(commitTime, status.getFileId()));
+        recordsWritten++;
+      } else {
+        recordsDeleted++;
+      }
+      record.deflate();
+      status.markSuccess(record, recordMetadata);
+    } catch (Throwable t) {
+      // Not throwing exception from here, since we don't want to fail the entire job
+      // for a single record
+      status.markFailure(record, t, recordMetadata);
+      logger.error("Error writing record " + record, t);
     }
+  }
 
-    /**
-     * Perform the actual writing of the given record into the backing file.
-     *
-     * @param record
-     */
-    public void write(HoodieRecord record) {
-        Optional recordMetadata = record.getData().getMetadata();
-        try {
-            Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
+  /**
+   * Performs actions to durably, persist the current changes and returns a WriteStatus object
+   */
+  public WriteStatus close() {
+    logger.info(
+        "Closing the file " + status.getFileId() + " as we are done with all the records "
+            + recordsWritten);
+    try {
+      storageWriter.close();
 
-            if(avroRecord.isPresent()) {
-                storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
-                // update the new location of record, so we know where to find it next
-                record.setNewLocation(new HoodieRecordLocation(commitTime, status.getFileId()));
-                recordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
-            record.deflate();
-            status.markSuccess(record, recordMetadata);
-        } catch (Throwable t) {
-            // Not throwing exception from here, since we don't want to fail the entire job
-            // for a single record
-            status.markFailure(record, t, recordMetadata);
-            logger.error("Error writing record " + record, t);
-        }
-    }
-
-    /**
-     * Performs actions to durably, persist the current changes and returns a WriteStatus object
-     *
-     * @return
-     */
-    public WriteStatus close() {
-        logger.info(
-            "Closing the file " + status.getFileId() + " as we are done with all the records "
-                + recordsWritten);
-        try {
-            storageWriter.close();
-
-            HoodieWriteStat stat = new HoodieWriteStat();
-            stat.setNumWrites(recordsWritten);
-            stat.setNumDeletes(recordsDeleted);
-            stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
-            stat.setFileId(status.getFileId());
-            String relativePath = path.toString().replace(new Path(config.getBasePath()) + "/", "");
-            stat.setPath(relativePath);
-            stat.setTotalWriteBytes(FSUtils.getFileSize(fs, path));
-            stat.setTotalWriteErrors(status.getFailedRecords().size());
-            status.setStat(stat);
-
-            return status;
-        } catch (IOException e) {
-            throw new HoodieInsertException("Failed to close the Insert Handle for path " + path,
-                e);
-        }
+      HoodieWriteStat stat = new HoodieWriteStat();
+      stat.setNumWrites(recordsWritten);
+      stat.setNumDeletes(recordsDeleted);
+      stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
+      stat.setFileId(status.getFileId());
+      String relativePath = path.toString().replace(new Path(config.getBasePath()) + "/", "");
+      stat.setPath(relativePath);
+      stat.setTotalWriteBytes(FSUtils.getFileSize(fs, path));
+      stat.setTotalWriteErrors(status.getFailedRecords().size());
+      status.setStat(stat);
+
+      return status;
+    } catch (IOException e) {
+      throw new HoodieInsertException("Failed to close the Insert Handle for path " + path,
+          e);
     }
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java
index 8ba4068ad..53ec545fc 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java
@@ -24,6 +24,7 @@ import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -31,68 +32,67 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-
 public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
-    private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
-    protected final String commitTime;
-    protected final HoodieWriteConfig config;
-    protected final FileSystem fs;
-    protected final HoodieTable<T> hoodieTable;
-    protected HoodieTimeline hoodieTimeline;
-    protected TableFileSystemView.ReadOptimizedView fileSystemView;
-    protected final Schema schema;
 
-    public HoodieIOHandle(HoodieWriteConfig config, String commitTime,
-                          HoodieTable<T> hoodieTable) {
-        this.commitTime = commitTime;
-        this.config = config;
-        this.fs = FSUtils.getFs();
-        this.hoodieTable = hoodieTable;
-        this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
-        this.fileSystemView = hoodieTable.getROFileSystemView();
-        this.schema =
-            HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
+  private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
+  protected final String commitTime;
+  protected final HoodieWriteConfig config;
+  protected final FileSystem fs;
+  protected final HoodieTable<T> hoodieTable;
+  protected HoodieTimeline hoodieTimeline;
+  protected TableFileSystemView.ReadOptimizedView fileSystemView;
+  protected final Schema schema;
+
+  public HoodieIOHandle(HoodieWriteConfig config, String commitTime,
+      HoodieTable<T> hoodieTable) {
+    this.commitTime = commitTime;
+    this.config = config;
+    this.fs = FSUtils.getFs();
+    this.hoodieTable = hoodieTable;
+    this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
+    this.fileSystemView = hoodieTable.getROFileSystemView();
+    this.schema =
+        HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
+  }
+
+  public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
+    Path path = new Path(config.getBasePath(), partitionPath);
+    try {
+      fs.mkdirs(path); // create a new partition as needed.
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to make dir " + path, e);
     }
 
-    public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
-        Path path = new Path(config.getBasePath(), partitionPath);
-        try {
-            fs.mkdirs(path); // create a new partition as needed.
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to make dir " + path, e);
+    return new Path(path.toString(),
+        FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
+  }
+
+  /**
+   * Deletes any new tmp files written during the current commit, into the partition
+   */
+  public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config,
+      String commitTime,
+      String partitionPath,
+      int taskPartitionId) {
+    FileSystem fs = FSUtils.getFs();
+    try {
+      FileStatus[] prevFailedFiles = fs.globStatus(new Path(String
+          .format("%s/%s/%s", config.getBasePath(), partitionPath,
+              FSUtils.maskWithoutFileId(commitTime, taskPartitionId))));
+      if (prevFailedFiles != null) {
+        logger.info("Deleting " + prevFailedFiles.length
+            + " files generated by previous failed attempts.");
+        for (FileStatus status : prevFailedFiles) {
+          fs.delete(status.getPath(), false);
         }
-
-        return new Path(path.toString(),
-            FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime,
+          e);
     }
+  }
 
-    /**
-     * Deletes any new tmp files written during the current commit, into the partition
-     */
-    public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config,
-                                                        String commitTime,
-                                                        String partitionPath,
-                                                        int taskPartitionId) {
-        FileSystem fs = FSUtils.getFs();
-        try {
-            FileStatus[] prevFailedFiles = fs.globStatus(new Path(String
-                .format("%s/%s/%s", config.getBasePath(), partitionPath,
-                    FSUtils.maskWithoutFileId(commitTime, taskPartitionId))));
-            if (prevFailedFiles != null) {
-                logger.info("Deleting " + prevFailedFiles.length
-                    + " files generated by previous failed attempts.");
-                for (FileStatus status : prevFailedFiles) {
-                    fs.delete(status.getPath(), false);
-                }
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime,
-                e);
-        }
-    }
-
-    public Schema getSchema() {
-        return schema;
-    }
+  public Schema getSchema() {
+    return schema;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
index 262da60ca..b61b9d9e8 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
@@ -16,19 +16,23 @@
 
 package com.uber.hoodie.io;
 
-import com.uber.hoodie.common.model.HoodiePartitionMetadata;
-import com.uber.hoodie.common.util.ReflectionUtils;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
+import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieWriteStat;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.common.util.ReflectionUtils;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieUpsertException;
 import com.uber.hoodie.io.storage.HoodieStorageWriter;
 import com.uber.hoodie.io.storage.HoodieStorageWriterFactory;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Optional;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
@@ -36,197 +40,197 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.TaskContext;
 
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Optional;
-
 @SuppressWarnings("Duplicates")
 public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
 
-    private WriteStatus writeStatus;
-    private HashMap<String, HoodieRecord<T>> keyToNewRecords;
-    private HoodieStorageWriter<IndexedRecord> storageWriter;
-    private Path newFilePath;
-    private Path oldFilePath;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
-    private long updatedRecordsWritten = 0;
+  private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
 
-    public HoodieMergeHandle(HoodieWriteConfig config,
-                             String commitTime,
-                             HoodieTable<T> hoodieTable,
-                             Iterator<HoodieRecord<T>> recordItr,
-                             String fileId) {
-        super(config, commitTime, hoodieTable);
-        init(fileId, recordItr);
-    }
+  private WriteStatus writeStatus;
+  private HashMap<String, HoodieRecord<T>> keyToNewRecords;
+  private HoodieStorageWriter<IndexedRecord> storageWriter;
+  private Path newFilePath;
+  private Path oldFilePath;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;
+  private long updatedRecordsWritten = 0;
 
-    /**
-     * Load the new incoming records in a map, and extract the old file path.
-     */
-    private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
-        WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        writeStatus.setStat(new HoodieWriteStat());
-        this.writeStatus = writeStatus;
-        this.keyToNewRecords = new HashMap<>();
+  public HoodieMergeHandle(HoodieWriteConfig config,
+      String commitTime,
+      HoodieTable<T> hoodieTable,
+      Iterator<HoodieRecord<T>> recordItr,
+      String fileId) {
+    super(config, commitTime, hoodieTable);
+    init(fileId, recordItr);
+  }
 
-        try {
-            // Load the new records in a map
-            while (newRecordsItr.hasNext()) {
-                HoodieRecord<T> record = newRecordsItr.next();
-                // If the first record, we need to extract some info out
-                if (oldFilePath == null) {
-                    String latestValidFilePath = fileSystemView
-                            .getLatestDataFiles(record.getPartitionPath())
-                            .filter(dataFile -> dataFile.getFileId().equals(fileId))
-                            .findFirst()
-                            .get().getFileName();
-                    writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
+  /**
+   * Load the new incoming records in a map, and extract the old file path.
+   */
+  private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
+    WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    writeStatus.setStat(new HoodieWriteStat());
+    this.writeStatus = writeStatus;
+    this.keyToNewRecords = new HashMap<>();
 
-                    HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
-                                                       commitTime,
-                                                       new Path(config.getBasePath()),
-                                                       new Path(config.getBasePath(), record.getPartitionPath()));
-                    partitionMetadata.trySave(TaskContext.getPartitionId());
+    try {
+      // Load the new records in a map
+      while (newRecordsItr.hasNext()) {
+        HoodieRecord<T> record = newRecordsItr.next();
+        // If the first record, we need to extract some info out
+        if (oldFilePath == null) {
+          String latestValidFilePath = fileSystemView
+              .getLatestDataFiles(record.getPartitionPath())
+              .filter(dataFile -> dataFile.getFileId().equals(fileId))
+              .findFirst()
+              .get().getFileName();
+          writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
 
-                    oldFilePath = new Path(
-                        config.getBasePath() + "/" + record.getPartitionPath() + "/"
-                            + latestValidFilePath);
-                    String relativePath = new Path( record.getPartitionPath() + "/" + FSUtils
-                            .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
-                    newFilePath = new Path(config.getBasePath(), relativePath);
+          HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
+              commitTime,
+              new Path(config.getBasePath()),
+              new Path(config.getBasePath(), record.getPartitionPath()));
+          partitionMetadata.trySave(TaskContext.getPartitionId());
 
-                    // handle cases of partial failures, for update task
-                    if (fs.exists(newFilePath)) {
-                        fs.delete(newFilePath, false);
-                    }
+          oldFilePath = new Path(
+              config.getBasePath() + "/" + record.getPartitionPath() + "/"
+                  + latestValidFilePath);
+          String relativePath = new Path(record.getPartitionPath() + "/" + FSUtils
+              .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
+          newFilePath = new Path(config.getBasePath(), relativePath);
 
-                    logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
-                        oldFilePath.toString(), newFilePath.toString()));
-                    // file name is same for all records, in this bunch
-                    writeStatus.setFileId(fileId);
-                    writeStatus.setPartitionPath(record.getPartitionPath());
-                    writeStatus.getStat().setFileId(fileId);
-                    writeStatus.getStat().setPath(relativePath);
-                }
-                keyToNewRecords.put(record.getRecordKey(), record);
-                // update the new location of the record, so we know where to find it next
-                record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
-            }
-            // Create the writer for writing the new version file
-            storageWriter = HoodieStorageWriterFactory
-                .getStorageWriter(commitTime, newFilePath, hoodieTable, config, schema);
+          // handle cases of partial failures, for update task
+          if (fs.exists(newFilePath)) {
+            fs.delete(newFilePath, false);
+          }
 
-        } catch (Exception e) {
-            logger.error("Error in update task at commit " + commitTime, e);
-            writeStatus.setGlobalError(e);
-            throw new HoodieUpsertException(
-                "Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
-                    + commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), e);
+          logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
+              oldFilePath.toString(), newFilePath.toString()));
+          // file name is same for all records, in this bunch
+          writeStatus.setFileId(fileId);
+          writeStatus.setPartitionPath(record.getPartitionPath());
+          writeStatus.getStat().setFileId(fileId);
+          writeStatus.getStat().setPath(relativePath);
         }
+        keyToNewRecords.put(record.getRecordKey(), record);
+        // update the new location of the record, so we know where to find it next
+        record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
+      }
+      // Create the writer for writing the new version file
+      storageWriter = HoodieStorageWriterFactory
+          .getStorageWriter(commitTime, newFilePath, hoodieTable, config, schema);
+
+    } catch (Exception e) {
+      logger.error("Error in update task at commit " + commitTime, e);
+      writeStatus.setGlobalError(e);
+      throw new HoodieUpsertException(
+          "Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
+              + commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), e);
     }
+  }
 
 
-    private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) {
-        Optional recordMetadata = hoodieRecord.getData().getMetadata();
-        try {
-            if(indexedRecord.isPresent()) {
-                storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
-                recordsWritten++;
-                updatedRecordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
+  private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord,
+      Optional<IndexedRecord> indexedRecord) {
+    Optional recordMetadata = hoodieRecord.getData().getMetadata();
+    try {
+      if (indexedRecord.isPresent()) {
+        storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
+        recordsWritten++;
+        updatedRecordsWritten++;
+      } else {
+        recordsDeleted++;
+      }
 
-            hoodieRecord.deflate();
-            writeStatus.markSuccess(hoodieRecord, recordMetadata);
-            return true;
-        } catch (Exception e) {
-            logger.error("Error writing record  "+ hoodieRecord, e);
-            writeStatus.markFailure(hoodieRecord, e, recordMetadata);
-        }
-        return false;
+      hoodieRecord.deflate();
+      writeStatus.markSuccess(hoodieRecord, recordMetadata);
+      return true;
+    } catch (Exception e) {
+      logger.error("Error writing record  " + hoodieRecord, e);
+      writeStatus.markFailure(hoodieRecord, e, recordMetadata);
     }
+    return false;
+  }
 
-    /**
-     * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
-     */
-    public void write(GenericRecord oldRecord) {
-        String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
-        boolean copyOldRecord = true;
-        if (keyToNewRecords.containsKey(key)) {
-            try {
-                Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData().combineAndGetUpdateValue(oldRecord, schema);
-                if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
+  /**
+   * Go through an old record. Here if we detect a newer version shows up, we write the new one to
+   * the file.
+   */
+  public void write(GenericRecord oldRecord) {
+    String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+    HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
+    boolean copyOldRecord = true;
+    if (keyToNewRecords.containsKey(key)) {
+      try {
+        Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData()
+            .combineAndGetUpdateValue(oldRecord, schema);
+        if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
                     /* ONLY WHEN
                      * 1) we have an update for this key AND
                      * 2) We are able to successfully write the the combined new value
                      *
                      * We no longer need to copy the old record over.
                      */
-                    copyOldRecord = false;
-                }
-                keyToNewRecords.remove(key);
-            } catch (Exception e) {
-                throw new HoodieUpsertException("Failed to combine/merge new record with old value in storage, for new record {"
-                        + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
-            }
-        }
-
-        if (copyOldRecord) {
-            // this should work as it is, since this is an existing record
-            String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
-                + getOldFilePath() + " to new file " + newFilePath;
-            try {
-                storageWriter.writeAvro(key, oldRecord);
-            } catch (ClassCastException e) {
-                logger.error(
-                    "Schema mismatch when rewriting old record " + oldRecord + " from file "
-                        + getOldFilePath() + " to file " + newFilePath + " with schema " + schema
-                        .toString(true));
-                throw new HoodieUpsertException(errMsg, e);
-            } catch (IOException e) {
-                logger.error("Failed to merge old record into new file for key " + key + " from old file "
-                    + getOldFilePath() + " to new file " + newFilePath, e);
-                throw new HoodieUpsertException(errMsg, e);
-            }
-            recordsWritten ++;
+          copyOldRecord = false;
         }
+        keyToNewRecords.remove(key);
+      } catch (Exception e) {
+        throw new HoodieUpsertException(
+            "Failed to combine/merge new record with old value in storage, for new record {"
+                + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
+      }
     }
 
-    public void close() {
-        try {
-            // write out any pending records (this can happen when inserts are turned into updates)
-            Iterator<String> pendingRecordsItr = keyToNewRecords.keySet().iterator();
-            while (pendingRecordsItr.hasNext()) {
-                String key = pendingRecordsItr.next();
-                HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
-                writeUpdateRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema));
-            }
-            keyToNewRecords.clear();
-
-            if (storageWriter != null) {
-                storageWriter.close();
-            }
-
-            writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, newFilePath));
-            writeStatus.getStat().setNumWrites(recordsWritten);
-            writeStatus.getStat().setNumDeletes(recordsDeleted);
-            writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten);
-            writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
-        } catch (IOException e) {
-            throw new HoodieUpsertException("Failed to close UpdateHandle", e);
-        }
+    if (copyOldRecord) {
+      // this should work as it is, since this is an existing record
+      String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
+          + getOldFilePath() + " to new file " + newFilePath;
+      try {
+        storageWriter.writeAvro(key, oldRecord);
+      } catch (ClassCastException e) {
+        logger.error(
+            "Schema mismatch when rewriting old record " + oldRecord + " from file "
+                + getOldFilePath() + " to file " + newFilePath + " with schema " + schema
+                .toString(true));
+        throw new HoodieUpsertException(errMsg, e);
+      } catch (IOException e) {
+        logger.error("Failed to merge old record into new file for key " + key + " from old file "
+            + getOldFilePath() + " to new file " + newFilePath, e);
+        throw new HoodieUpsertException(errMsg, e);
+      }
+      recordsWritten++;
     }
+  }
 
-    public Path getOldFilePath() {
-        return oldFilePath;
-    }
+  public void close() {
+    try {
+      // write out any pending records (this can happen when inserts are turned into updates)
+      Iterator<String> pendingRecordsItr = keyToNewRecords.keySet().iterator();
+      while (pendingRecordsItr.hasNext()) {
+        String key = pendingRecordsItr.next();
+        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
+        writeUpdateRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema));
+      }
+      keyToNewRecords.clear();
 
-    public WriteStatus getWriteStatus() {
-        return writeStatus;
+      if (storageWriter != null) {
+        storageWriter.close();
+      }
+
+      writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, newFilePath));
+      writeStatus.getStat().setNumWrites(recordsWritten);
+      writeStatus.getStat().setNumDeletes(recordsDeleted);
+      writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten);
+      writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
+    } catch (IOException e) {
+      throw new HoodieUpsertException("Failed to close UpdateHandle", e);
     }
+  }
+
+  public Path getOldFilePath() {
+    return oldFilePath;
+  }
+
+  public WriteStatus getWriteStatus() {
+    return writeStatus;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java
index 55eb4c364..7304c11d8 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.io.compact;
 
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieLogFile;
-
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import java.io.Serializable;
@@ -27,8 +26,8 @@ import java.util.Map;
 import java.util.stream.Collectors;
 
 /**
- * Encapsulates all the needed information about a compaction
- * and make a decision whether this compaction is effective or not
+ * Encapsulates all the needed information about a compaction and make a decision whether this
+ * compaction is effective or not
  *
  * @see CompactionStrategy
  */
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java
index 8032cec41..e192a2416 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java
@@ -22,29 +22,28 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.api.java.JavaSparkContext;
-
 import java.io.Serializable;
 import java.util.Date;
+import org.apache.spark.api.java.JavaSparkContext;
 
 /**
  * A HoodieCompactor runs compaction on a hoodie table
  */
 public interface HoodieCompactor extends Serializable {
-    /**
-     * Compact the delta files with the data files
-     * @throws Exception
-     */
-    HoodieCompactionMetadata compact(JavaSparkContext jsc, final HoodieWriteConfig config,
-        HoodieTable hoodieTable) throws Exception;
+
+  /**
+   * Compact the delta files with the data files
+   */
+  HoodieCompactionMetadata compact(JavaSparkContext jsc, final HoodieWriteConfig config,
+      HoodieTable hoodieTable) throws Exception;
 
 
-    // Helper methods
-    default String startCompactionCommit(HoodieTable hoodieTable) {
-        String commitTime = HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
-        HoodieActiveTimeline activeTimeline = hoodieTable.getActiveTimeline();
-        activeTimeline
-            .createInflight(new HoodieInstant(true, HoodieTimeline.COMPACTION_ACTION, commitTime));
-        return commitTime;
-    }
+  // Helper methods
+  default String startCompactionCommit(HoodieTable hoodieTable) {
+    String commitTime = HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
+    HoodieActiveTimeline activeTimeline = hoodieTable.getActiveTimeline();
+    activeTimeline
+        .createInflight(new HoodieInstant(true, HoodieTimeline.COMPACTION_ACTION, commitTime));
+    return commitTime;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
index be039570d..7db2da2f7 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
@@ -16,14 +16,14 @@
 
 package com.uber.hoodie.io.compact;
 
+import static java.util.stream.Collectors.toList;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.CompactionWriteStat;
-import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieCompactionMetadata;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieTableType;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
@@ -36,7 +36,12 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieCompactionException;
 import com.uber.hoodie.table.HoodieCopyOnWriteTable;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 import org.apache.avro.Schema;
@@ -46,18 +51,10 @@ import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FlatMapFunction;
 
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Optional;
-
-import static java.util.stream.Collectors.*;
-
 /**
- * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage.
- * Computes all possible compactions, passes it through a CompactionFilter and executes
- * all the compactions and writes a new version of base files and make a normal commit
+ * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage. Computes all
+ * possible compactions, passes it through a CompactionFilter and executes all the compactions and
+ * writes a new version of base files and make a normal commit
  *
  * @see HoodieCompactor
  */
@@ -80,7 +77,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
     String compactionCommit = startCompactionCommit(hoodieTable);
     log.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommit);
     List<String> partitionPaths =
-        FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), config.shouldAssumeDatePartitioning());
+        FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
+            config.shouldAssumeDatePartitioning());
 
     log.info("Compaction looking for files to compact in " + partitionPaths + " partitions");
     List<CompactionOperation> operations =
@@ -89,7 +87,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
                 .getRTFileSystemView()
                 .getLatestFileSlices(partitionPath)
                 .map(s -> new CompactionOperation(s.getDataFile().get(),
-                        partitionPath, s.getLogFiles().collect(Collectors.toList()), config))
+                    partitionPath, s.getLogFiles().collect(Collectors.toList()), config))
                 .collect(toList()).iterator()).collect();
     log.info("Total of " + operations.size() + " compactions are retrieved");
 
@@ -150,14 +148,15 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
     // Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.
 
     String maxInstantTime = metaClient.getActiveTimeline()
-            .getTimelineOfActions(
-                    Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
-                            HoodieTimeline.COMPACTION_ACTION,
-                            HoodieTimeline.DELTA_COMMIT_ACTION))
-            .filterCompletedInstants().lastInstant().get().getTimestamp();
+        .getTimelineOfActions(
+            Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
+                HoodieTimeline.COMPACTION_ACTION,
+                HoodieTimeline.DELTA_COMMIT_ACTION))
+        .filterCompletedInstants().lastInstant().get().getTimestamp();
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, metaClient.getBasePath(),
-            operation.getDeltaFilePaths(), readerSchema, maxInstantTime);
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs,
+        metaClient.getBasePath(),
+        operation.getDeltaFilePaths(), readerSchema, maxInstantTime);
     if (!scanner.iterator().hasNext()) {
       return Lists.newArrayList();
     }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java
index 697062616..676b3e10e 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java
@@ -28,8 +28,8 @@ import java.util.Map;
 import java.util.Optional;
 
 /**
- * CompactionStrategy which looks at total IO to be done for the compaction (read + write)
- * and limits the list of compactions to be under a configured limit on the IO
+ * CompactionStrategy which looks at total IO to be done for the compaction (read + write) and
+ * limits the list of compactions to be under a configured limit on the IO
  *
  * @see CompactionStrategy
  */
@@ -46,7 +46,7 @@ public class BoundedIOCompactionStrategy implements CompactionStrategy {
     // Total size of all the log files
     Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(
         Optional::isPresent).map(Optional::get).reduce(
-            (size1, size2) -> size1 + size2).orElse(0L);
+        (size1, size2) -> size1 + size2).orElse(0L);
     // Total read will be the base file + all the log files
     Long totalIORead = FSUtils.getSizeInMB(dataFile.getFileSize() + totalLogFileSize);
     // Total write will be similar to the size of the base file
@@ -62,7 +62,8 @@ public class BoundedIOCompactionStrategy implements CompactionStrategy {
   }
 
   @Override
-  public List<CompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig, List<CompactionOperation> operations) {
+  public List<CompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
+      List<CompactionOperation> operations) {
     // Iterate through the operations in order and accept operations as long as we are within the IO limit
     // Preserves the original ordering of compactions
     List<CompactionOperation> finalOperations = Lists.newArrayList();
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java
index bb452d326..8486774d7 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java
@@ -25,12 +25,12 @@ import java.util.List;
 import java.util.Map;
 
 /**
- * Strategy for compaction. Pluggable implementation of define how compaction should be done.
- * The implementations of this interface can capture the relevant metrics to order and filter
- * the final list of compaction operation to run in a single compaction.
+ * Strategy for compaction. Pluggable implementation of define how compaction should be done. The
+ * implementations of this interface can capture the relevant metrics to order and filter the final
+ * list of compaction operation to run in a single compaction.
  *
- * Implementation of CompactionStrategy cannot hold any state.
- * Difference instantiations can be passed in every time
+ * Implementation of CompactionStrategy cannot hold any state. Difference instantiations can be
+ * passed in every time
  *
  * @see com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor
  * @see CompactionOperation
@@ -38,8 +38,8 @@ import java.util.Map;
 public interface CompactionStrategy extends Serializable {
 
   /**
-   * Callback hook when a CompactionOperation is created. Individual strategies can
-   * capture the metrics they need to decide on the priority.
+   * Callback hook when a CompactionOperation is created. Individual strategies can capture the
+   * metrics they need to decide on the priority.
    *
    * @param dataFile - Base file to compact
    * @param partitionPath - Partition path
@@ -50,8 +50,8 @@ public interface CompactionStrategy extends Serializable {
       List<HoodieLogFile> logFiles);
 
   /**
-   * Order and Filter the list of compactions. Use the metrics captured with the
-   * captureMetrics to order and filter out compactions
+   * Order and Filter the list of compactions. Use the metrics captured with the captureMetrics to
+   * order and filter out compactions
    *
    * @param writeConfig - HoodieWriteConfig - config for this compaction is passed in
    * @param operations - list of compactions collected
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
index 60fcf2f7f..2fabc6596 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
@@ -27,8 +27,8 @@ import java.util.Optional;
 import java.util.stream.Collectors;
 
 /**
- * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size
- * and limits the compactions within a configured IO bound
+ * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and
+ * limits the compactions within a configured IO bound
  *
  * @see BoundedIOCompactionStrategy
  * @see CompactionStrategy
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java
index c3b145e11..08f46019f 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java
@@ -25,9 +25,9 @@ import java.util.List;
 import java.util.Map;
 
 /**
- * UnBoundedCompactionStrategy will not change ordering or filter any compaction.
- * It is a pass-through and will compact all the base files which has a log file.
- * This usually means no-intelligence on compaction.
+ * UnBoundedCompactionStrategy will not change ordering or filter any compaction. It is a
+ * pass-through and will compact all the base files which has a log file. This usually means
+ * no-intelligence on compaction.
  *
  * @see CompactionStrategy
  */
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java
index 363bbb78f..d2af03047 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java
@@ -17,50 +17,50 @@
 package com.uber.hoodie.io.storage;
 
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
-import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
 public class HoodieParquetConfig {
-    private HoodieAvroWriteSupport writeSupport;
-    private CompressionCodecName compressionCodecName;
-    private int blockSize;
-    private int pageSize;
-    private int maxFileSize;
-    private Configuration hadoopConf;
 
-    public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport,
-        CompressionCodecName compressionCodecName, int blockSize, int pageSize, int maxFileSize,
-        Configuration hadoopConf) {
-        this.writeSupport = writeSupport;
-        this.compressionCodecName = compressionCodecName;
-        this.blockSize = blockSize;
-        this.pageSize = pageSize;
-        this.maxFileSize = maxFileSize;
-        this.hadoopConf = hadoopConf;
-    }
+  private HoodieAvroWriteSupport writeSupport;
+  private CompressionCodecName compressionCodecName;
+  private int blockSize;
+  private int pageSize;
+  private int maxFileSize;
+  private Configuration hadoopConf;
 
-    public HoodieAvroWriteSupport getWriteSupport() {
-        return writeSupport;
-    }
+  public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport,
+      CompressionCodecName compressionCodecName, int blockSize, int pageSize, int maxFileSize,
+      Configuration hadoopConf) {
+    this.writeSupport = writeSupport;
+    this.compressionCodecName = compressionCodecName;
+    this.blockSize = blockSize;
+    this.pageSize = pageSize;
+    this.maxFileSize = maxFileSize;
+    this.hadoopConf = hadoopConf;
+  }
 
-    public CompressionCodecName getCompressionCodecName() {
-        return compressionCodecName;
-    }
+  public HoodieAvroWriteSupport getWriteSupport() {
+    return writeSupport;
+  }
 
-    public int getBlockSize() {
-        return blockSize;
-    }
+  public CompressionCodecName getCompressionCodecName() {
+    return compressionCodecName;
+  }
 
-    public int getPageSize() {
-        return pageSize;
-    }
+  public int getBlockSize() {
+    return blockSize;
+  }
 
-    public int getMaxFileSize() {
-        return maxFileSize;
-    }
+  public int getPageSize() {
+    return pageSize;
+  }
 
-    public Configuration getHadoopConf() {
-        return hadoopConf;
-    }
+  public int getMaxFileSize() {
+    return maxFileSize;
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java
index ca3ed5709..2e207fd7f 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java
@@ -20,6 +20,8 @@ import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
@@ -30,79 +32,76 @@ import org.apache.parquet.hadoop.ParquetFileWriter;
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.spark.TaskContext;
 
-import java.io.IOException;
-import java.util.concurrent.atomic.AtomicLong;
-
 /**
- * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file.
- * Provides a way to check if the current file can take more records with the <code>canWrite()</code>
- *
- * @param <T>
+ * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file. Provides
+ * a way to check if the current file can take more records with the <code>canWrite()</code>
  */
 public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
     extends ParquetWriter<IndexedRecord> implements HoodieStorageWriter<R> {
-    private static double STREAM_COMPRESSION_RATIO = 0.1;
-    private static AtomicLong recordIndex = new AtomicLong(1);
+
+  private static double STREAM_COMPRESSION_RATIO = 0.1;
+  private static AtomicLong recordIndex = new AtomicLong(1);
 
 
-    private final Path file;
-    private final HoodieWrapperFileSystem fs;
-    private final long maxFileSize;
-    private final HoodieAvroWriteSupport writeSupport;
-    private final String commitTime;
-    private final Schema schema;
+  private final Path file;
+  private final HoodieWrapperFileSystem fs;
+  private final long maxFileSize;
+  private final HoodieAvroWriteSupport writeSupport;
+  private final String commitTime;
+  private final Schema schema;
 
 
-    private static Configuration registerFileSystem(Configuration conf) {
-        Configuration returnConf = new Configuration(conf);
-        String scheme = FileSystem.getDefaultUri(conf).getScheme();
-        returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl",
-            HoodieWrapperFileSystem.class.getName());
-        return returnConf;
-    }
+  private static Configuration registerFileSystem(Configuration conf) {
+    Configuration returnConf = new Configuration(conf);
+    String scheme = FileSystem.getDefaultUri(conf).getScheme();
+    returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl",
+        HoodieWrapperFileSystem.class.getName());
+    return returnConf;
+  }
 
-    public HoodieParquetWriter(String commitTime, Path file,
-        HoodieParquetConfig parquetConfig, Schema schema) throws IOException {
-        super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
-            ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
-            parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(),
-            parquetConfig.getPageSize(), parquetConfig.getPageSize(),
-            ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
-            ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION,
-            registerFileSystem(parquetConfig.getHadoopConf()));
-        this.file =
-            HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
-        this.fs = (HoodieWrapperFileSystem) this.file
-            .getFileSystem(registerFileSystem(parquetConfig.getHadoopConf()));
-        // We cannot accurately measure the snappy compressed output file size. We are choosing a conservative 10%
-        // TODO - compute this compression ratio dynamically by looking at the bytes written to the stream and the actual file size reported by HDFS
-        this.maxFileSize = parquetConfig.getMaxFileSize() + Math
-            .round(parquetConfig.getMaxFileSize() * STREAM_COMPRESSION_RATIO);
-        this.writeSupport = parquetConfig.getWriteSupport();
-        this.commitTime = commitTime;
-        this.schema = schema;
-    }
+  public HoodieParquetWriter(String commitTime, Path file,
+      HoodieParquetConfig parquetConfig, Schema schema) throws IOException {
+    super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
+        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
+        parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(),
+        parquetConfig.getPageSize(), parquetConfig.getPageSize(),
+        ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
+        ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION,
+        registerFileSystem(parquetConfig.getHadoopConf()));
+    this.file =
+        HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
+    this.fs = (HoodieWrapperFileSystem) this.file
+        .getFileSystem(registerFileSystem(parquetConfig.getHadoopConf()));
+    // We cannot accurately measure the snappy compressed output file size. We are choosing a conservative 10%
+    // TODO - compute this compression ratio dynamically by looking at the bytes written to the stream and the actual file size reported by HDFS
+    this.maxFileSize = parquetConfig.getMaxFileSize() + Math
+        .round(parquetConfig.getMaxFileSize() * STREAM_COMPRESSION_RATIO);
+    this.writeSupport = parquetConfig.getWriteSupport();
+    this.commitTime = commitTime;
+    this.schema = schema;
+  }
 
 
-    @Override
-    public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
-        String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
-                recordIndex.getAndIncrement());
-        HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord,
-                record.getRecordKey(),
-                record.getPartitionPath(),
-                file.getName());
-        HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
-        super.write(avroRecord);
-        writeSupport.add(record.getRecordKey());
-    }
+  @Override
+  public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
+    String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
+        recordIndex.getAndIncrement());
+    HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord,
+        record.getRecordKey(),
+        record.getPartitionPath(),
+        file.getName());
+    HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
+    super.write(avroRecord);
+    writeSupport.add(record.getRecordKey());
+  }
 
-    public boolean canWrite() {
-        return fs.getBytesWritten(file) < maxFileSize;
-    }
+  public boolean canWrite() {
+    return fs.getBytesWritten(file) < maxFileSize;
+  }
 
-    @Override public void writeAvro(String key, IndexedRecord object) throws IOException {
-        super.write(object);
-        writeSupport.add(key);
-    }
+  @Override
+  public void writeAvro(String key, IndexedRecord object) throws IOException {
+    super.write(object);
+    writeSupport.add(key);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java
index e4fcdc335..f3a39ffb5 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java
@@ -17,13 +17,16 @@
 package com.uber.hoodie.io.storage;
 
 import com.uber.hoodie.common.model.HoodieRecord;
+import java.io.IOException;
 import org.apache.avro.generic.IndexedRecord;
 
-import java.io.IOException;
-
 public interface HoodieStorageWriter<R extends IndexedRecord> {
-    void writeAvroWithMetadata(R newRecord, HoodieRecord record) throws IOException;
-    boolean canWrite();
-    void close() throws IOException;
-    void writeAvro(String key, R oldRecord) throws IOException;
+
+  void writeAvroWithMetadata(R newRecord, HoodieRecord record) throws IOException;
+
+  boolean canWrite();
+
+  void close() throws IOException;
+
+  void writeAvro(String key, R oldRecord) throws IOException;
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
index b9084dc61..2803fc8f9 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
@@ -16,42 +16,42 @@
 
 package com.uber.hoodie.io.storage;
 
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.BloomFilter;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
-import java.io.IOException;
-
 public class HoodieStorageWriterFactory {
-    public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
-            String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config, Schema schema)
-        throws IOException {
-        //TODO - based on the metadata choose the implementation of HoodieStorageWriter
-        // Currently only parquet is supported
-        return newParquetStorageWriter(commitTime, path, config, schema);
-    }
 
-    private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
-            String commitTime, Path path, HoodieWriteConfig config, Schema schema) throws IOException {
-        BloomFilter filter =
-            new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP());
-        HoodieAvroWriteSupport writeSupport =
-            new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
+  public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
+      String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config,
+      Schema schema)
+      throws IOException {
+    //TODO - based on the metadata choose the implementation of HoodieStorageWriter
+    // Currently only parquet is supported
+    return newParquetStorageWriter(commitTime, path, config, schema);
+  }
 
-        HoodieParquetConfig parquetConfig =
-            new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
-                config.getParquetBlockSize(), config.getParquetPageSize(),
-                config.getParquetMaxFileSize(), FSUtils.getFs().getConf());
+  private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
+      String commitTime, Path path, HoodieWriteConfig config, Schema schema) throws IOException {
+    BloomFilter filter =
+        new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP());
+    HoodieAvroWriteSupport writeSupport =
+        new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
 
-        return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
-    }
+    HoodieParquetConfig parquetConfig =
+        new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
+            config.getParquetBlockSize(), config.getParquetPageSize(),
+            config.getParquetMaxFileSize(), FSUtils.getFs().getConf());
+
+    return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
index 7cd2b8377..87a628447 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
@@ -16,17 +16,6 @@
 
 package com.uber.hoodie.io.storage;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
-import org.apache.hadoop.fs.permission.AclEntry;
-import org.apache.hadoop.fs.permission.AclStatus;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.util.Progressable;
-
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
@@ -38,646 +27,784 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileChecksum;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsServerDefaults;
+import org.apache.hadoop.fs.FsStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Options;
+import org.apache.hadoop.fs.ParentNotDirectoryException;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.fs.XAttrSetFlag;
+import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.AclStatus;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.Progressable;
 
 /**
- * HoodieWrapperFileSystem wraps the default file system.
- * It holds state about the open streams in the file system to support getting the
- * written size to each of the open streams.
+ * HoodieWrapperFileSystem wraps the default file system. It holds state about the open streams in
+ * the file system to support getting the written size to each of the open streams.
  */
 public class HoodieWrapperFileSystem extends FileSystem {
-    private static final Set<String> SUPPORT_SCHEMES;
-    public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
-
-    static {
-        SUPPORT_SCHEMES = new HashSet<>();
-        SUPPORT_SCHEMES.add("file");
-        SUPPORT_SCHEMES.add("hdfs");
-        SUPPORT_SCHEMES.add("s3");
-
-        // Hoodie currently relies on underlying object store being fully
-        // consistent so only regional buckets should be used.
-        SUPPORT_SCHEMES.add("gs");
-        SUPPORT_SCHEMES.add("viewfs");
-    }
-
-    private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams =
-        new ConcurrentHashMap<>();
-    private FileSystem fileSystem;
-    private URI uri;
-
-    @Override public void initialize(URI uri, Configuration conf) throws IOException {
-        // Get the default filesystem to decorate
-        fileSystem = FileSystem.get(conf);
-        // Do not need to explicitly initialize the default filesystem, its done already in the above FileSystem.get
-        // fileSystem.initialize(FileSystem.getDefaultUri(conf), conf);
-        // fileSystem.setConf(conf);
-        this.uri = uri;
-    }
-
-    @Override public URI getUri() {
-        return uri;
-    }
-
-    @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException {
-        return fileSystem.open(convertToDefaultPath(f), bufferSize);
-    }
-
-    @Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite,
-        int bufferSize, short replication, long blockSize, Progressable progress)
-        throws IOException {
-        final Path translatedPath = convertToDefaultPath(f);
-        return wrapOutputStream(f, fileSystem
-            .create(translatedPath, permission, overwrite, bufferSize, replication, blockSize,
-                progress));
-    }
-
-    private FSDataOutputStream wrapOutputStream(final Path path,
-        FSDataOutputStream fsDataOutputStream) throws IOException {
-        if (fsDataOutputStream instanceof SizeAwareFSDataOutputStream) {
-            return fsDataOutputStream;
-        }
-
-        SizeAwareFSDataOutputStream os =
-            new SizeAwareFSDataOutputStream(fsDataOutputStream, new Runnable() {
-                @Override public void run() {
-                    openStreams.remove(path.getName());
-                }
-            });
-        openStreams.put(path.getName(), os);
-        return os;
-    }
-
-    @Override public FSDataOutputStream create(Path f, boolean overwrite) throws IOException {
-        return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), overwrite));
-    }
-
-    @Override public FSDataOutputStream create(Path f) throws IOException {
-        return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f)));
-    }
-
-    @Override public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
-        return fileSystem.create(convertToDefaultPath(f), progress);
-    }
-
-    @Override public FSDataOutputStream create(Path f, short replication) throws IOException {
-        return fileSystem.create(convertToDefaultPath(f), replication);
-    }
-
-    @Override public FSDataOutputStream create(Path f, short replication, Progressable progress)
-        throws IOException {
-        return fileSystem.create(convertToDefaultPath(f), replication, progress);
-    }
-
-    @Override public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize)
-        throws IOException {
-        return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize);
-    }
-
-    @Override public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
-        Progressable progress) throws IOException {
-        return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, progress);
-    }
-
-    @Override
-    public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
-        long blockSize, Progressable progress) throws IOException {
-        return fileSystem
-            .create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize,
-                progress);
-    }
-
-    @Override
-    public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
-        int bufferSize, short replication, long blockSize, Progressable progress)
-        throws IOException {
-        return fileSystem
-            .create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
-                progress);
-    }
-
-    @Override
-    public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
-        int bufferSize, short replication, long blockSize, Progressable progress,
-        Options.ChecksumOpt checksumOpt) throws IOException {
-        return fileSystem
-            .create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
-                progress, checksumOpt);
-    }
-
-
-    @Override
-    public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
-        long blockSize) throws IOException {
-        return fileSystem
-            .create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize);
-    }
-
-
-    @Override public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
-        throws IOException {
-        return fileSystem.append(convertToDefaultPath(f), bufferSize, progress);
-    }
-
-    @Override public boolean rename(Path src, Path dst) throws IOException {
-        return fileSystem.rename(convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override public boolean delete(Path f, boolean recursive) throws IOException {
-        return fileSystem.delete(convertToDefaultPath(f), recursive);
-    }
-
-    @Override public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException {
-        return fileSystem.listStatus(convertToDefaultPath(f));
-    }
-
-    @Override public void setWorkingDirectory(Path new_dir) {
-        fileSystem.setWorkingDirectory(convertToDefaultPath(new_dir));
-    }
-
-    @Override public Path getWorkingDirectory() {
-        return convertToHoodiePath(fileSystem.getWorkingDirectory());
-    }
-
-    @Override public boolean mkdirs(Path f, FsPermission permission) throws IOException {
-        return fileSystem.mkdirs(convertToDefaultPath(f), permission);
-    }
-
-    @Override public FileStatus getFileStatus(Path f) throws IOException {
-        return fileSystem.getFileStatus(convertToDefaultPath(f));
-    }
-
-    @Override public String getScheme() {
-        return uri.getScheme();
-    }
-
-    @Override public String getCanonicalServiceName() {
-        return fileSystem.getCanonicalServiceName();
-    }
-
-    @Override public String getName() {
-        return fileSystem.getName();
-    }
-
-    @Override public Path makeQualified(Path path) {
-        return convertToHoodiePath(fileSystem.makeQualified(convertToDefaultPath(path)));
-    }
-
-    @Override public Token<?> getDelegationToken(String renewer) throws IOException {
-        return fileSystem.getDelegationToken(renewer);
-    }
-
-    @Override public Token<?>[] addDelegationTokens(String renewer, Credentials credentials)
-        throws IOException {
-        return fileSystem.addDelegationTokens(renewer, credentials);
-    }
-
-    @Override public FileSystem[] getChildFileSystems() {
-        return fileSystem.getChildFileSystems();
-    }
-
-    @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
-        throws IOException {
-        return fileSystem.getFileBlockLocations(file, start, len);
-    }
-
-    @Override public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
-        throws IOException {
-        return fileSystem.getFileBlockLocations(convertToDefaultPath(p), start, len);
-    }
-
-    @Override public FsServerDefaults getServerDefaults() throws IOException {
-        return fileSystem.getServerDefaults();
-    }
-
-    @Override public FsServerDefaults getServerDefaults(Path p) throws IOException {
-        return fileSystem.getServerDefaults(convertToDefaultPath(p));
-    }
-
-    @Override public Path resolvePath(Path p) throws IOException {
-        return convertToHoodiePath(fileSystem.resolvePath(convertToDefaultPath(p)));
-    }
-
-    @Override public FSDataInputStream open(Path f) throws IOException {
-        return fileSystem.open(convertToDefaultPath(f));
-    }
-
-    @Override
-    public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize,
-        short replication, long blockSize, Progressable progress) throws IOException {
-        return fileSystem
-            .createNonRecursive(convertToDefaultPath(f), overwrite, bufferSize, replication,
-                blockSize, progress);
-    }
-
-    @Override
-    public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite,
-        int bufferSize, short replication, long blockSize, Progressable progress)
-        throws IOException {
-        return fileSystem
-            .createNonRecursive(convertToDefaultPath(f), permission, overwrite, bufferSize,
-                replication, blockSize, progress);
-    }
-
-    @Override public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
-        EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
-        Progressable progress) throws IOException {
-        return fileSystem
-            .createNonRecursive(convertToDefaultPath(f), permission, flags, bufferSize, replication,
-                blockSize, progress);
-    }
-
-    @Override public boolean createNewFile(Path f) throws IOException {
-        return fileSystem.createNewFile(convertToDefaultPath(f));
-    }
-
-    @Override public FSDataOutputStream append(Path f) throws IOException {
-        return fileSystem.append(convertToDefaultPath(f));
-    }
-
-    @Override public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
-        return fileSystem.append(convertToDefaultPath(f), bufferSize);
-    }
-
-    @Override public void concat(Path trg, Path[] psrcs) throws IOException {
-        Path[] psrcsNew = convertDefaults(psrcs);
-        fileSystem.concat(convertToDefaultPath(trg), psrcsNew);
-    }
-
-    @Override public short getReplication(Path src) throws IOException {
-        return fileSystem.getReplication(convertToDefaultPath(src));
-    }
-
-    @Override public boolean setReplication(Path src, short replication) throws IOException {
-        return fileSystem.setReplication(convertToDefaultPath(src), replication);
-    }
-
-    @Override public boolean delete(Path f) throws IOException {
-        return fileSystem.delete(convertToDefaultPath(f));
-    }
-
-    @Override public boolean deleteOnExit(Path f) throws IOException {
-        return fileSystem.deleteOnExit(convertToDefaultPath(f));
-    }
-
-    @Override public boolean cancelDeleteOnExit(Path f) {
-        return fileSystem.cancelDeleteOnExit(convertToDefaultPath(f));
-    }
-
-    @Override public boolean exists(Path f) throws IOException {
-        return fileSystem.exists(convertToDefaultPath(f));
-    }
-
-    @Override public boolean isDirectory(Path f) throws IOException {
-        return fileSystem.isDirectory(convertToDefaultPath(f));
-    }
-
-    @Override public boolean isFile(Path f) throws IOException {
-        return fileSystem.isFile(convertToDefaultPath(f));
-    }
-
-    @Override public long getLength(Path f) throws IOException {
-        return fileSystem.getLength(convertToDefaultPath(f));
-    }
-
-    @Override public ContentSummary getContentSummary(Path f) throws IOException {
-        return fileSystem.getContentSummary(convertToDefaultPath(f));
-    }
-
-    @Override public RemoteIterator<Path> listCorruptFileBlocks(Path path) throws IOException {
-        return fileSystem.listCorruptFileBlocks(convertToDefaultPath(path));
-    }
-
-    @Override public FileStatus[] listStatus(Path f, PathFilter filter)
-        throws FileNotFoundException, IOException {
-        return fileSystem.listStatus(convertToDefaultPath(f), filter);
-    }
-
-    @Override public FileStatus[] listStatus(Path[] files)
-        throws FileNotFoundException, IOException {
-        return fileSystem.listStatus(convertDefaults(files));
-    }
-
-    @Override public FileStatus[] listStatus(Path[] files, PathFilter filter)
-        throws FileNotFoundException, IOException {
-        return fileSystem.listStatus(convertDefaults(files), filter);
-    }
-
-    @Override public FileStatus[] globStatus(Path pathPattern) throws IOException {
-        return fileSystem.globStatus(convertToDefaultPath(pathPattern));
-    }
-
-    @Override public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
-        throws IOException {
-        return fileSystem.globStatus(convertToDefaultPath(pathPattern), filter);
-    }
-
-    @Override public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
-        throws FileNotFoundException, IOException {
-        return fileSystem.listLocatedStatus(convertToDefaultPath(f));
-    }
-
-    @Override public RemoteIterator<LocatedFileStatus> listFiles(Path f, boolean recursive)
-        throws FileNotFoundException, IOException {
-        return fileSystem.listFiles(convertToDefaultPath(f), recursive);
-    }
-
-    @Override public Path getHomeDirectory() {
-        return convertToHoodiePath(fileSystem.getHomeDirectory());
-    }
-
-    @Override public boolean mkdirs(Path f) throws IOException {
-        return fileSystem.mkdirs(convertToDefaultPath(f));
-    }
-
-    @Override public void copyFromLocalFile(Path src, Path dst) throws IOException {
-        fileSystem.copyFromLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override public void moveFromLocalFile(Path[] srcs, Path dst) throws IOException {
-        fileSystem.moveFromLocalFile(convertDefaults(srcs), convertToDefaultPath(dst));
-    }
-
-    @Override public void moveFromLocalFile(Path src, Path dst) throws IOException {
-        fileSystem.moveFromLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
-        fileSystem.copyFromLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override
-    public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst)
-        throws IOException {
-        fileSystem
-            .copyFromLocalFile(delSrc, overwrite, convertDefaults(srcs), convertToDefaultPath(dst));
-    }
-
-    @Override public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)
-        throws IOException {
-        fileSystem.copyFromLocalFile(delSrc, overwrite, convertToDefaultPath(src),
-            convertToDefaultPath(dst));
-    }
-
-    @Override public void copyToLocalFile(Path src, Path dst) throws IOException {
-        fileSystem.copyToLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override public void moveToLocalFile(Path src, Path dst) throws IOException {
-        fileSystem.moveToLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
-        fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst));
-    }
-
-    @Override
-    public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
-        throws IOException {
-        fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst),
-            useRawLocalFileSystem);
-    }
-
-    @Override public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
-        throws IOException {
-        return convertToHoodiePath(fileSystem.startLocalOutput(convertToDefaultPath(fsOutputFile),
-            convertToDefaultPath(tmpLocalFile)));
-    }
-
-    @Override public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
-        throws IOException {
-        fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile),
-            convertToDefaultPath(tmpLocalFile));
-    }
-
-    @Override public void close() throws IOException {
-        fileSystem.close();
-    }
-
-    @Override public long getUsed() throws IOException {
-        return fileSystem.getUsed();
-    }
-
-    @Override public long getBlockSize(Path f) throws IOException {
-        return fileSystem.getBlockSize(convertToDefaultPath(f));
-    }
-
-    @Override public long getDefaultBlockSize() {
-        return fileSystem.getDefaultBlockSize();
-    }
-
-    @Override public long getDefaultBlockSize(Path f) {
-        return fileSystem.getDefaultBlockSize(convertToDefaultPath(f));
-    }
-
-    @Override public short getDefaultReplication() {
-        return fileSystem.getDefaultReplication();
-    }
-
-    @Override public short getDefaultReplication(Path path) {
-        return fileSystem.getDefaultReplication(convertToDefaultPath(path));
-    }
-
-    @Override public void access(Path path, FsAction mode)
-        throws AccessControlException, FileNotFoundException, IOException {
-        fileSystem.access(convertToDefaultPath(path), mode);
-    }
-
-    @Override public void createSymlink(Path target, Path link, boolean createParent)
-        throws AccessControlException, FileAlreadyExistsException, FileNotFoundException,
-        ParentNotDirectoryException, UnsupportedFileSystemException, IOException {
-        fileSystem
-            .createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
-    }
-
-    @Override public FileStatus getFileLinkStatus(Path f)
-        throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
-        IOException {
-        return fileSystem.getFileLinkStatus(convertToDefaultPath(f));
-    }
-
-    @Override public boolean supportsSymlinks() {
-        return fileSystem.supportsSymlinks();
-    }
-
-    @Override public Path getLinkTarget(Path f) throws IOException {
-        return convertToHoodiePath(fileSystem.getLinkTarget(convertToDefaultPath(f)));
-    }
-
-    @Override public FileChecksum getFileChecksum(Path f) throws IOException {
-        return fileSystem.getFileChecksum(convertToDefaultPath(f));
-    }
-
-    @Override public FileChecksum getFileChecksum(Path f, long length) throws IOException {
-        return fileSystem.getFileChecksum(convertToDefaultPath(f), length);
-    }
-
-    @Override public void setVerifyChecksum(boolean verifyChecksum) {
-        fileSystem.setVerifyChecksum(verifyChecksum);
-    }
-
-    @Override public void setWriteChecksum(boolean writeChecksum) {
-        fileSystem.setWriteChecksum(writeChecksum);
-    }
-
-    @Override public FsStatus getStatus() throws IOException {
-        return fileSystem.getStatus();
-    }
 
-    @Override public FsStatus getStatus(Path p) throws IOException {
-        return fileSystem.getStatus(convertToDefaultPath(p));
-    }
-
-    @Override public void setPermission(Path p, FsPermission permission) throws IOException {
-        fileSystem.setPermission(convertToDefaultPath(p), permission);
-    }
-
-    @Override public void setOwner(Path p, String username, String groupname) throws IOException {
-        fileSystem.setOwner(convertToDefaultPath(p), username, groupname);
-    }
-
-    @Override public void setTimes(Path p, long mtime, long atime) throws IOException {
-        fileSystem.setTimes(convertToDefaultPath(p), mtime, atime);
-    }
-
-    @Override public Path createSnapshot(Path path, String snapshotName) throws IOException {
-        return convertToHoodiePath(
-            fileSystem.createSnapshot(convertToDefaultPath(path), snapshotName));
-    }
-
-    @Override public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName)
-        throws IOException {
-        fileSystem.renameSnapshot(convertToDefaultPath(path), snapshotOldName, snapshotNewName);
-    }
-
-    @Override public void deleteSnapshot(Path path, String snapshotName) throws IOException {
-        fileSystem.deleteSnapshot(convertToDefaultPath(path), snapshotName);
-    }
-
-    @Override public void modifyAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
-        fileSystem.modifyAclEntries(convertToDefaultPath(path), aclSpec);
-    }
-
-    @Override public void removeAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
-        fileSystem.removeAclEntries(convertToDefaultPath(path), aclSpec);
-    }
-
-    @Override public void removeDefaultAcl(Path path) throws IOException {
-        fileSystem.removeDefaultAcl(convertToDefaultPath(path));
-    }
-
-    @Override public void removeAcl(Path path) throws IOException {
-        fileSystem.removeAcl(convertToDefaultPath(path));
-    }
-
-    @Override public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException {
-        fileSystem.setAcl(convertToDefaultPath(path), aclSpec);
-    }
-
-    @Override public AclStatus getAclStatus(Path path) throws IOException {
-        return fileSystem.getAclStatus(convertToDefaultPath(path));
-    }
-
-    @Override public void setXAttr(Path path, String name, byte[] value) throws IOException {
-        fileSystem.setXAttr(convertToDefaultPath(path), name, value);
-    }
-
-    @Override public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag)
-        throws IOException {
-        fileSystem.setXAttr(convertToDefaultPath(path), name, value, flag);
-    }
-
-    @Override public byte[] getXAttr(Path path, String name) throws IOException {
-        return fileSystem.getXAttr(convertToDefaultPath(path), name);
-    }
-
-    @Override public Map<String, byte[]> getXAttrs(Path path) throws IOException {
-        return fileSystem.getXAttrs(convertToDefaultPath(path));
-    }
-
-    @Override public Map<String, byte[]> getXAttrs(Path path, List<String> names)
-        throws IOException {
-        return fileSystem.getXAttrs(convertToDefaultPath(path), names);
-    }
-
-    @Override public List<String> listXAttrs(Path path) throws IOException {
-        return fileSystem.listXAttrs(convertToDefaultPath(path));
-    }
-
-    @Override public void removeXAttr(Path path, String name) throws IOException {
-        fileSystem.removeXAttr(convertToDefaultPath(path), name);
-    }
-
-    @Override public void setConf(Configuration conf) {
-        // ignore this. we will set conf on init
-    }
-
-    @Override public Configuration getConf() {
-        return fileSystem.getConf();
-    }
-
-    @Override public int hashCode() {
-        return fileSystem.hashCode();
-    }
-
-    @Override public boolean equals(Object obj) {
-        return fileSystem.equals(obj);
-    }
-
-    @Override public String toString() {
-        return fileSystem.toString();
-    }
-
-    public Path convertToHoodiePath(Path oldPath) {
-        return convertPathWithScheme(oldPath, getHoodieScheme(fileSystem.getScheme()));
-    }
-
-    public static Path convertToHoodiePath(Path file, Configuration conf) {
-        String scheme = FileSystem.getDefaultUri(conf).getScheme();
-        return convertPathWithScheme(file, getHoodieScheme(scheme));
-    }
-
-    private Path convertToDefaultPath(Path oldPath) {
-        return convertPathWithScheme(oldPath, fileSystem.getScheme());
-    }
-
-    private Path[] convertDefaults(Path[] psrcs) {
-        Path[] psrcsNew = new Path[psrcs.length];
-        for (int i = 0; i < psrcs.length; i++) {
-            psrcsNew[i] = convertToDefaultPath(psrcs[i]);
-        }
-        return psrcsNew;
-    }
-
-    private static Path convertPathWithScheme(Path oldPath, String newScheme) {
-        URI oldURI = oldPath.toUri();
-        URI newURI;
-        try {
-            newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(),
-                oldURI.getPath(), oldURI.getQuery(), oldURI.getFragment());
-            return new Path(newURI);
-        } catch (URISyntaxException e) {
-            // TODO - Better Exception handling
-            throw new RuntimeException(e);
-        }
-    }
-
-    public static String getHoodieScheme(String scheme) {
-        String newScheme;
-        if (SUPPORT_SCHEMES.contains(scheme)) {
-            newScheme = HOODIE_SCHEME_PREFIX + scheme;
-        } else {
-            throw new IllegalArgumentException(
-                "BlockAlignedAvroParquetWriter does not support scheme " + scheme);
-        }
-        return newScheme;
-    }
-
-    public long getBytesWritten(Path file) {
-        if (openStreams.containsKey(file.getName())) {
-            return openStreams.get(file.getName()).getBytesWritten();
-        }
-        // When the file is first written, we do not have a track of it
-        throw new IllegalArgumentException(file.toString()
-            + " does not have a open stream. Cannot get the bytes written on the stream");
-    }
+  private static final Set<String> SUPPORT_SCHEMES;
+  public static final String HOODIE_SCHEME_PREFIX = "hoodie-";
+
+  static {
+    SUPPORT_SCHEMES = new HashSet<>();
+    SUPPORT_SCHEMES.add("file");
+    SUPPORT_SCHEMES.add("hdfs");
+    SUPPORT_SCHEMES.add("s3");
+
+    // Hoodie currently relies on underlying object store being fully
+    // consistent so only regional buckets should be used.
+    SUPPORT_SCHEMES.add("gs");
+    SUPPORT_SCHEMES.add("viewfs");
+  }
+
+  private ConcurrentMap<String, SizeAwareFSDataOutputStream> openStreams =
+      new ConcurrentHashMap<>();
+  private FileSystem fileSystem;
+  private URI uri;
+
+  @Override
+  public void initialize(URI uri, Configuration conf) throws IOException {
+    // Get the default filesystem to decorate
+    fileSystem = FileSystem.get(conf);
+    // Do not need to explicitly initialize the default filesystem, its done already in the above FileSystem.get
+    // fileSystem.initialize(FileSystem.getDefaultUri(conf), conf);
+    // fileSystem.setConf(conf);
+    this.uri = uri;
+  }
+
+  @Override
+  public URI getUri() {
+    return uri;
+  }
+
+  @Override
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return fileSystem.open(convertToDefaultPath(f), bufferSize);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite,
+      int bufferSize, short replication, long blockSize, Progressable progress)
+      throws IOException {
+    final Path translatedPath = convertToDefaultPath(f);
+    return wrapOutputStream(f, fileSystem
+        .create(translatedPath, permission, overwrite, bufferSize, replication, blockSize,
+            progress));
+  }
+
+  private FSDataOutputStream wrapOutputStream(final Path path,
+      FSDataOutputStream fsDataOutputStream) throws IOException {
+    if (fsDataOutputStream instanceof SizeAwareFSDataOutputStream) {
+      return fsDataOutputStream;
+    }
+
+    SizeAwareFSDataOutputStream os =
+        new SizeAwareFSDataOutputStream(fsDataOutputStream, new Runnable() {
+          @Override
+          public void run() {
+            openStreams.remove(path.getName());
+          }
+        });
+    openStreams.put(path.getName(), os);
+    return os;
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite) throws IOException {
+    return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f), overwrite));
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f) throws IOException {
+    return wrapOutputStream(f, fileSystem.create(convertToDefaultPath(f)));
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
+    return fileSystem.create(convertToDefaultPath(f), progress);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, short replication) throws IOException {
+    return fileSystem.create(convertToDefaultPath(f), replication);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, short replication, Progressable progress)
+      throws IOException {
+    return fileSystem.create(convertToDefaultPath(f), replication, progress);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize)
+      throws IOException {
+    return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize,
+      Progressable progress) throws IOException {
+    return fileSystem.create(convertToDefaultPath(f), overwrite, bufferSize, progress);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
+      long blockSize, Progressable progress) throws IOException {
+    return fileSystem
+        .create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize,
+            progress);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
+      int bufferSize, short replication, long blockSize, Progressable progress)
+      throws IOException {
+    return fileSystem
+        .create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
+            progress);
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags,
+      int bufferSize, short replication, long blockSize, Progressable progress,
+      Options.ChecksumOpt checksumOpt) throws IOException {
+    return fileSystem
+        .create(convertToDefaultPath(f), permission, flags, bufferSize, replication, blockSize,
+            progress, checksumOpt);
+  }
+
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
+      long blockSize) throws IOException {
+    return fileSystem
+        .create(convertToDefaultPath(f), overwrite, bufferSize, replication, blockSize);
+  }
+
+
+  @Override
+  public FSDataOutputStream append(Path f, int bufferSize, Progressable progress)
+      throws IOException {
+    return fileSystem.append(convertToDefaultPath(f), bufferSize, progress);
+  }
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    return fileSystem.rename(convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public boolean delete(Path f, boolean recursive) throws IOException {
+    return fileSystem.delete(convertToDefaultPath(f), recursive);
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException {
+    return fileSystem.listStatus(convertToDefaultPath(f));
+  }
+
+  @Override
+  public void setWorkingDirectory(Path new_dir) {
+    fileSystem.setWorkingDirectory(convertToDefaultPath(new_dir));
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return convertToHoodiePath(fileSystem.getWorkingDirectory());
+  }
+
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    return fileSystem.mkdirs(convertToDefaultPath(f), permission);
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path f) throws IOException {
+    return fileSystem.getFileStatus(convertToDefaultPath(f));
+  }
+
+  @Override
+  public String getScheme() {
+    return uri.getScheme();
+  }
+
+  @Override
+  public String getCanonicalServiceName() {
+    return fileSystem.getCanonicalServiceName();
+  }
+
+  @Override
+  public String getName() {
+    return fileSystem.getName();
+  }
+
+  @Override
+  public Path makeQualified(Path path) {
+    return convertToHoodiePath(fileSystem.makeQualified(convertToDefaultPath(path)));
+  }
+
+  @Override
+  public Token<?> getDelegationToken(String renewer) throws IOException {
+    return fileSystem.getDelegationToken(renewer);
+  }
+
+  @Override
+  public Token<?>[] addDelegationTokens(String renewer, Credentials credentials)
+      throws IOException {
+    return fileSystem.addDelegationTokens(renewer, credentials);
+  }
+
+  @Override
+  public FileSystem[] getChildFileSystems() {
+    return fileSystem.getChildFileSystems();
+  }
+
+  @Override
+  public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len)
+      throws IOException {
+    return fileSystem.getFileBlockLocations(file, start, len);
+  }
+
+  @Override
+  public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
+      throws IOException {
+    return fileSystem.getFileBlockLocations(convertToDefaultPath(p), start, len);
+  }
+
+  @Override
+  public FsServerDefaults getServerDefaults() throws IOException {
+    return fileSystem.getServerDefaults();
+  }
+
+  @Override
+  public FsServerDefaults getServerDefaults(Path p) throws IOException {
+    return fileSystem.getServerDefaults(convertToDefaultPath(p));
+  }
+
+  @Override
+  public Path resolvePath(Path p) throws IOException {
+    return convertToHoodiePath(fileSystem.resolvePath(convertToDefaultPath(p)));
+  }
+
+  @Override
+  public FSDataInputStream open(Path f) throws IOException {
+    return fileSystem.open(convertToDefaultPath(f));
+  }
+
+  @Override
+  public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize,
+      short replication, long blockSize, Progressable progress) throws IOException {
+    return fileSystem
+        .createNonRecursive(convertToDefaultPath(f), overwrite, bufferSize, replication,
+            blockSize, progress);
+  }
+
+  @Override
+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite,
+      int bufferSize, short replication, long blockSize, Progressable progress)
+      throws IOException {
+    return fileSystem
+        .createNonRecursive(convertToDefaultPath(f), permission, overwrite, bufferSize,
+            replication, blockSize, progress);
+  }
+
+  @Override
+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
+      EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    return fileSystem
+        .createNonRecursive(convertToDefaultPath(f), permission, flags, bufferSize, replication,
+            blockSize, progress);
+  }
+
+  @Override
+  public boolean createNewFile(Path f) throws IOException {
+    return fileSystem.createNewFile(convertToDefaultPath(f));
+  }
+
+  @Override
+  public FSDataOutputStream append(Path f) throws IOException {
+    return fileSystem.append(convertToDefaultPath(f));
+  }
+
+  @Override
+  public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
+    return fileSystem.append(convertToDefaultPath(f), bufferSize);
+  }
+
+  @Override
+  public void concat(Path trg, Path[] psrcs) throws IOException {
+    Path[] psrcsNew = convertDefaults(psrcs);
+    fileSystem.concat(convertToDefaultPath(trg), psrcsNew);
+  }
+
+  @Override
+  public short getReplication(Path src) throws IOException {
+    return fileSystem.getReplication(convertToDefaultPath(src));
+  }
+
+  @Override
+  public boolean setReplication(Path src, short replication) throws IOException {
+    return fileSystem.setReplication(convertToDefaultPath(src), replication);
+  }
+
+  @Override
+  public boolean delete(Path f) throws IOException {
+    return fileSystem.delete(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean deleteOnExit(Path f) throws IOException {
+    return fileSystem.deleteOnExit(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean cancelDeleteOnExit(Path f) {
+    return fileSystem.cancelDeleteOnExit(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean exists(Path f) throws IOException {
+    return fileSystem.exists(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean isDirectory(Path f) throws IOException {
+    return fileSystem.isDirectory(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean isFile(Path f) throws IOException {
+    return fileSystem.isFile(convertToDefaultPath(f));
+  }
+
+  @Override
+  public long getLength(Path f) throws IOException {
+    return fileSystem.getLength(convertToDefaultPath(f));
+  }
+
+  @Override
+  public ContentSummary getContentSummary(Path f) throws IOException {
+    return fileSystem.getContentSummary(convertToDefaultPath(f));
+  }
+
+  @Override
+  public RemoteIterator<Path> listCorruptFileBlocks(Path path) throws IOException {
+    return fileSystem.listCorruptFileBlocks(convertToDefaultPath(path));
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path f, PathFilter filter)
+      throws FileNotFoundException, IOException {
+    return fileSystem.listStatus(convertToDefaultPath(f), filter);
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path[] files)
+      throws FileNotFoundException, IOException {
+    return fileSystem.listStatus(convertDefaults(files));
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path[] files, PathFilter filter)
+      throws FileNotFoundException, IOException {
+    return fileSystem.listStatus(convertDefaults(files), filter);
+  }
+
+  @Override
+  public FileStatus[] globStatus(Path pathPattern) throws IOException {
+    return fileSystem.globStatus(convertToDefaultPath(pathPattern));
+  }
+
+  @Override
+  public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
+      throws IOException {
+    return fileSystem.globStatus(convertToDefaultPath(pathPattern), filter);
+  }
+
+  @Override
+  public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
+      throws FileNotFoundException, IOException {
+    return fileSystem.listLocatedStatus(convertToDefaultPath(f));
+  }
+
+  @Override
+  public RemoteIterator<LocatedFileStatus> listFiles(Path f, boolean recursive)
+      throws FileNotFoundException, IOException {
+    return fileSystem.listFiles(convertToDefaultPath(f), recursive);
+  }
+
+  @Override
+  public Path getHomeDirectory() {
+    return convertToHoodiePath(fileSystem.getHomeDirectory());
+  }
+
+  @Override
+  public boolean mkdirs(Path f) throws IOException {
+    return fileSystem.mkdirs(convertToDefaultPath(f));
+  }
+
+  @Override
+  public void copyFromLocalFile(Path src, Path dst) throws IOException {
+    fileSystem.copyFromLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void moveFromLocalFile(Path[] srcs, Path dst) throws IOException {
+    fileSystem.moveFromLocalFile(convertDefaults(srcs), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void moveFromLocalFile(Path src, Path dst) throws IOException {
+    fileSystem.moveFromLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+    fileSystem.copyFromLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path[] srcs, Path dst)
+      throws IOException {
+    fileSystem
+        .copyFromLocalFile(delSrc, overwrite, convertDefaults(srcs), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)
+      throws IOException {
+    fileSystem.copyFromLocalFile(delSrc, overwrite, convertToDefaultPath(src),
+        convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyToLocalFile(Path src, Path dst) throws IOException {
+    fileSystem.copyToLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void moveToLocalFile(Path src, Path dst) throws IOException {
+    fileSystem.moveToLocalFile(convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
+    fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst));
+  }
+
+  @Override
+  public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
+      throws IOException {
+    fileSystem.copyToLocalFile(delSrc, convertToDefaultPath(src), convertToDefaultPath(dst),
+        useRawLocalFileSystem);
+  }
+
+  @Override
+  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+      throws IOException {
+    return convertToHoodiePath(fileSystem.startLocalOutput(convertToDefaultPath(fsOutputFile),
+        convertToDefaultPath(tmpLocalFile)));
+  }
+
+  @Override
+  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
+      throws IOException {
+    fileSystem.completeLocalOutput(convertToDefaultPath(fsOutputFile),
+        convertToDefaultPath(tmpLocalFile));
+  }
+
+  @Override
+  public void close() throws IOException {
+    fileSystem.close();
+  }
+
+  @Override
+  public long getUsed() throws IOException {
+    return fileSystem.getUsed();
+  }
+
+  @Override
+  public long getBlockSize(Path f) throws IOException {
+    return fileSystem.getBlockSize(convertToDefaultPath(f));
+  }
+
+  @Override
+  public long getDefaultBlockSize() {
+    return fileSystem.getDefaultBlockSize();
+  }
+
+  @Override
+  public long getDefaultBlockSize(Path f) {
+    return fileSystem.getDefaultBlockSize(convertToDefaultPath(f));
+  }
+
+  @Override
+  public short getDefaultReplication() {
+    return fileSystem.getDefaultReplication();
+  }
+
+  @Override
+  public short getDefaultReplication(Path path) {
+    return fileSystem.getDefaultReplication(convertToDefaultPath(path));
+  }
+
+  @Override
+  public void access(Path path, FsAction mode)
+      throws AccessControlException, FileNotFoundException, IOException {
+    fileSystem.access(convertToDefaultPath(path), mode);
+  }
+
+  @Override
+  public void createSymlink(Path target, Path link, boolean createParent)
+      throws AccessControlException, FileAlreadyExistsException, FileNotFoundException,
+      ParentNotDirectoryException, UnsupportedFileSystemException, IOException {
+    fileSystem
+        .createSymlink(convertToDefaultPath(target), convertToDefaultPath(link), createParent);
+  }
+
+  @Override
+  public FileStatus getFileLinkStatus(Path f)
+      throws AccessControlException, FileNotFoundException, UnsupportedFileSystemException,
+      IOException {
+    return fileSystem.getFileLinkStatus(convertToDefaultPath(f));
+  }
+
+  @Override
+  public boolean supportsSymlinks() {
+    return fileSystem.supportsSymlinks();
+  }
+
+  @Override
+  public Path getLinkTarget(Path f) throws IOException {
+    return convertToHoodiePath(fileSystem.getLinkTarget(convertToDefaultPath(f)));
+  }
+
+  @Override
+  public FileChecksum getFileChecksum(Path f) throws IOException {
+    return fileSystem.getFileChecksum(convertToDefaultPath(f));
+  }
+
+  @Override
+  public FileChecksum getFileChecksum(Path f, long length) throws IOException {
+    return fileSystem.getFileChecksum(convertToDefaultPath(f), length);
+  }
+
+  @Override
+  public void setVerifyChecksum(boolean verifyChecksum) {
+    fileSystem.setVerifyChecksum(verifyChecksum);
+  }
+
+  @Override
+  public void setWriteChecksum(boolean writeChecksum) {
+    fileSystem.setWriteChecksum(writeChecksum);
+  }
+
+  @Override
+  public FsStatus getStatus() throws IOException {
+    return fileSystem.getStatus();
+  }
+
+  @Override
+  public FsStatus getStatus(Path p) throws IOException {
+    return fileSystem.getStatus(convertToDefaultPath(p));
+  }
+
+  @Override
+  public void setPermission(Path p, FsPermission permission) throws IOException {
+    fileSystem.setPermission(convertToDefaultPath(p), permission);
+  }
+
+  @Override
+  public void setOwner(Path p, String username, String groupname) throws IOException {
+    fileSystem.setOwner(convertToDefaultPath(p), username, groupname);
+  }
+
+  @Override
+  public void setTimes(Path p, long mtime, long atime) throws IOException {
+    fileSystem.setTimes(convertToDefaultPath(p), mtime, atime);
+  }
+
+  @Override
+  public Path createSnapshot(Path path, String snapshotName) throws IOException {
+    return convertToHoodiePath(
+        fileSystem.createSnapshot(convertToDefaultPath(path), snapshotName));
+  }
+
+  @Override
+  public void renameSnapshot(Path path, String snapshotOldName, String snapshotNewName)
+      throws IOException {
+    fileSystem.renameSnapshot(convertToDefaultPath(path), snapshotOldName, snapshotNewName);
+  }
+
+  @Override
+  public void deleteSnapshot(Path path, String snapshotName) throws IOException {
+    fileSystem.deleteSnapshot(convertToDefaultPath(path), snapshotName);
+  }
+
+  @Override
+  public void modifyAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
+    fileSystem.modifyAclEntries(convertToDefaultPath(path), aclSpec);
+  }
+
+  @Override
+  public void removeAclEntries(Path path, List<AclEntry> aclSpec) throws IOException {
+    fileSystem.removeAclEntries(convertToDefaultPath(path), aclSpec);
+  }
+
+  @Override
+  public void removeDefaultAcl(Path path) throws IOException {
+    fileSystem.removeDefaultAcl(convertToDefaultPath(path));
+  }
+
+  @Override
+  public void removeAcl(Path path) throws IOException {
+    fileSystem.removeAcl(convertToDefaultPath(path));
+  }
+
+  @Override
+  public void setAcl(Path path, List<AclEntry> aclSpec) throws IOException {
+    fileSystem.setAcl(convertToDefaultPath(path), aclSpec);
+  }
+
+  @Override
+  public AclStatus getAclStatus(Path path) throws IOException {
+    return fileSystem.getAclStatus(convertToDefaultPath(path));
+  }
+
+  @Override
+  public void setXAttr(Path path, String name, byte[] value) throws IOException {
+    fileSystem.setXAttr(convertToDefaultPath(path), name, value);
+  }
+
+  @Override
+  public void setXAttr(Path path, String name, byte[] value, EnumSet<XAttrSetFlag> flag)
+      throws IOException {
+    fileSystem.setXAttr(convertToDefaultPath(path), name, value, flag);
+  }
+
+  @Override
+  public byte[] getXAttr(Path path, String name) throws IOException {
+    return fileSystem.getXAttr(convertToDefaultPath(path), name);
+  }
+
+  @Override
+  public Map<String, byte[]> getXAttrs(Path path) throws IOException {
+    return fileSystem.getXAttrs(convertToDefaultPath(path));
+  }
+
+  @Override
+  public Map<String, byte[]> getXAttrs(Path path, List<String> names)
+      throws IOException {
+    return fileSystem.getXAttrs(convertToDefaultPath(path), names);
+  }
+
+  @Override
+  public List<String> listXAttrs(Path path) throws IOException {
+    return fileSystem.listXAttrs(convertToDefaultPath(path));
+  }
+
+  @Override
+  public void removeXAttr(Path path, String name) throws IOException {
+    fileSystem.removeXAttr(convertToDefaultPath(path), name);
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    // ignore this. we will set conf on init
+  }
+
+  @Override
+  public Configuration getConf() {
+    return fileSystem.getConf();
+  }
+
+  @Override
+  public int hashCode() {
+    return fileSystem.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return fileSystem.equals(obj);
+  }
+
+  @Override
+  public String toString() {
+    return fileSystem.toString();
+  }
+
+  public Path convertToHoodiePath(Path oldPath) {
+    return convertPathWithScheme(oldPath, getHoodieScheme(fileSystem.getScheme()));
+  }
+
+  public static Path convertToHoodiePath(Path file, Configuration conf) {
+    String scheme = FileSystem.getDefaultUri(conf).getScheme();
+    return convertPathWithScheme(file, getHoodieScheme(scheme));
+  }
+
+  private Path convertToDefaultPath(Path oldPath) {
+    return convertPathWithScheme(oldPath, fileSystem.getScheme());
+  }
+
+  private Path[] convertDefaults(Path[] psrcs) {
+    Path[] psrcsNew = new Path[psrcs.length];
+    for (int i = 0; i < psrcs.length; i++) {
+      psrcsNew[i] = convertToDefaultPath(psrcs[i]);
+    }
+    return psrcsNew;
+  }
+
+  private static Path convertPathWithScheme(Path oldPath, String newScheme) {
+    URI oldURI = oldPath.toUri();
+    URI newURI;
+    try {
+      newURI = new URI(newScheme, oldURI.getUserInfo(), oldURI.getHost(), oldURI.getPort(),
+          oldURI.getPath(), oldURI.getQuery(), oldURI.getFragment());
+      return new Path(newURI);
+    } catch (URISyntaxException e) {
+      // TODO - Better Exception handling
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static String getHoodieScheme(String scheme) {
+    String newScheme;
+    if (SUPPORT_SCHEMES.contains(scheme)) {
+      newScheme = HOODIE_SCHEME_PREFIX + scheme;
+    } else {
+      throw new IllegalArgumentException(
+          "BlockAlignedAvroParquetWriter does not support scheme " + scheme);
+    }
+    return newScheme;
+  }
+
+  public long getBytesWritten(Path file) {
+    if (openStreams.containsKey(file.getName())) {
+      return openStreams.get(file.getName()).getBytesWritten();
+    }
+    // When the file is first written, we do not have a track of it
+    throw new IllegalArgumentException(file.toString()
+        + " does not have a open stream. Cannot get the bytes written on the stream");
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java
index 1c4dd9ae5..3f966d6ac 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java
@@ -16,44 +16,47 @@
 
 package com.uber.hoodie.io.storage;
 
-import org.apache.hadoop.fs.FSDataOutputStream;
-
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.hadoop.fs.FSDataOutputStream;
 
 /**
- * Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes.
- * This gives a cheap way to check on the underlying file size.
+ * Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
+ * gives a cheap way to check on the underlying file size.
  */
 public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
-    // A callback to call when the output stream is closed.
-    private final Runnable closeCallback;
-    // Keep track of the bytes written
-    private final AtomicLong bytesWritten = new AtomicLong(0L);
 
-    public SizeAwareFSDataOutputStream(FSDataOutputStream out, Runnable closeCallback)
-        throws IOException {
-        super(out);
-        this.closeCallback = closeCallback;
-    }
+  // A callback to call when the output stream is closed.
+  private final Runnable closeCallback;
+  // Keep track of the bytes written
+  private final AtomicLong bytesWritten = new AtomicLong(0L);
 
-    @Override public synchronized void write(byte[] b, int off, int len) throws IOException {
-        bytesWritten.addAndGet(len);
-        super.write(b, off, len);
-    }
+  public SizeAwareFSDataOutputStream(FSDataOutputStream out, Runnable closeCallback)
+      throws IOException {
+    super(out);
+    this.closeCallback = closeCallback;
+  }
 
-    @Override public void write(byte[] b) throws IOException {
-        bytesWritten.addAndGet(b.length);
-        super.write(b);
-    }
+  @Override
+  public synchronized void write(byte[] b, int off, int len) throws IOException {
+    bytesWritten.addAndGet(len);
+    super.write(b, off, len);
+  }
 
-    @Override public void close() throws IOException {
-        super.close();
-        closeCallback.run();
-    }
+  @Override
+  public void write(byte[] b) throws IOException {
+    bytesWritten.addAndGet(b.length);
+    super.write(b);
+  }
 
-    public long getBytesWritten() {
-        return bytesWritten.get();
-    }
+  @Override
+  public void close() throws IOException {
+    super.close();
+    closeCallback.run();
+  }
+
+  public long getBytesWritten() {
+    return bytesWritten.get();
+  }
 
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java
index c0dee102d..f6c79bb10 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java
@@ -22,7 +22,6 @@ import com.codahale.metrics.Timer;
 import com.google.common.annotations.VisibleForTesting;
 import com.uber.hoodie.common.model.HoodieCommitMetadata;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -30,119 +29,122 @@ import org.apache.log4j.Logger;
  * Wrapper for metrics-related operations.
  */
 public class HoodieMetrics {
-    private HoodieWriteConfig config = null;
-    private String tableName = null;
-    private static Logger logger = LogManager.getLogger(HoodieMetrics.class);
-    // Some timers
-    public String rollbackTimerName = null;
-    public String cleanTimerName = null;
-    public  String commitTimerName = null;
-    private Timer rollbackTimer = null;
-    private Timer cleanTimer = null;
-    private Timer commitTimer = null;
 
-    public HoodieMetrics(HoodieWriteConfig config, String tableName) {
-        this.config = config;
-        this.tableName = tableName;
-        if (config.isMetricsOn()) {
-            Metrics.init(config);
-            this.rollbackTimerName = getMetricsName("timer", "rollback");
-            this.cleanTimerName = getMetricsName("timer", "clean");
-            this.commitTimerName = getMetricsName("timer", "commit");
+  private HoodieWriteConfig config = null;
+  private String tableName = null;
+  private static Logger logger = LogManager.getLogger(HoodieMetrics.class);
+  // Some timers
+  public String rollbackTimerName = null;
+  public String cleanTimerName = null;
+  public String commitTimerName = null;
+  private Timer rollbackTimer = null;
+  private Timer cleanTimer = null;
+  private Timer commitTimer = null;
+
+  public HoodieMetrics(HoodieWriteConfig config, String tableName) {
+    this.config = config;
+    this.tableName = tableName;
+    if (config.isMetricsOn()) {
+      Metrics.init(config);
+      this.rollbackTimerName = getMetricsName("timer", "rollback");
+      this.cleanTimerName = getMetricsName("timer", "clean");
+      this.commitTimerName = getMetricsName("timer", "commit");
+    }
+  }
+
+  private Timer createTimer(String name) {
+    return config.isMetricsOn() ? Metrics.getInstance().getRegistry().timer(name) : null;
+  }
+
+  public Timer.Context getRollbackCtx() {
+    if (config.isMetricsOn() && rollbackTimer == null) {
+      rollbackTimer = createTimer(rollbackTimerName);
+    }
+    return rollbackTimer == null ? null : rollbackTimer.time();
+  }
+
+  public Timer.Context getCleanCtx() {
+    if (config.isMetricsOn() && cleanTimer == null) {
+      cleanTimer = createTimer(cleanTimerName);
+    }
+    return cleanTimer == null ? null : cleanTimer.time();
+  }
+
+  public Timer.Context getCommitCtx() {
+    if (config.isMetricsOn() && commitTimer == null) {
+      commitTimer = createTimer(commitTimerName);
+    }
+    return commitTimer == null ? null : commitTimer.time();
+  }
+
+  public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs,
+      HoodieCommitMetadata metadata) {
+    if (config.isMetricsOn()) {
+      long totalPartitionsWritten = metadata.fetchTotalPartitionsWritten();
+      long totalFilesInsert = metadata.fetchTotalFilesInsert();
+      long totalFilesUpdate = metadata.fetchTotalFilesUpdated();
+      long totalRecordsWritten = metadata.fetchTotalRecordsWritten();
+      long totalUpdateRecordsWritten = metadata.fetchTotalUpdateRecordsWritten();
+      long totalInsertRecordsWritten = metadata.fetchTotalInsertRecordsWritten();
+      long totalBytesWritten = metadata.fetchTotalBytesWritten();
+      registerGauge(getMetricsName("commit", "duration"), durationInMs);
+      registerGauge(getMetricsName("commit", "totalPartitionsWritten"), totalPartitionsWritten);
+      registerGauge(getMetricsName("commit", "totalFilesInsert"), totalFilesInsert);
+      registerGauge(getMetricsName("commit", "totalFilesUpdate"), totalFilesUpdate);
+      registerGauge(getMetricsName("commit", "totalRecordsWritten"), totalRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalUpdateRecordsWritten"),
+          totalUpdateRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalInsertRecordsWritten"),
+          totalInsertRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalBytesWritten"), totalBytesWritten);
+      registerGauge(getMetricsName("commit", "commitTime"), commitEpochTimeInMs);
+    }
+  }
+
+  public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
+    if (config.isMetricsOn()) {
+      logger.info(String.format("Sending rollback metrics (duration=%d, numFilesDeleted=$d)",
+          durationInMs, numFilesDeleted));
+      registerGauge(getMetricsName("rollback", "duration"), durationInMs);
+      registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
+    }
+  }
+
+  public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
+    if (config.isMetricsOn()) {
+      logger.info(String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)",
+          durationInMs, numFilesDeleted));
+      registerGauge(getMetricsName("clean", "duration"), durationInMs);
+      registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
+    }
+  }
+
+  @VisibleForTesting
+  String getMetricsName(String action, String metric) {
+    return config == null ? null :
+        String.format("%s.%s.%s", tableName, action, metric);
+  }
+
+  void registerGauge(String metricName, final long value) {
+    try {
+      MetricRegistry registry = Metrics.getInstance().getRegistry();
+      registry.register(metricName, new Gauge<Long>() {
+        @Override
+        public Long getValue() {
+          return value;
         }
+      });
+    } catch (Exception e) {
+      // Here we catch all exception, so the major upsert pipeline will not be affected if the metrics system
+      // has some issues.
+      logger.error("Failed to send metrics: ", e);
     }
+  }
 
-    private Timer createTimer(String name) {
-        return config.isMetricsOn() ? Metrics.getInstance().getRegistry().timer(name) : null;
-    }
-
-    public Timer.Context getRollbackCtx() {
-        if (config.isMetricsOn() && rollbackTimer == null) {
-            rollbackTimer = createTimer(rollbackTimerName);
-        }
-        return rollbackTimer == null ? null : rollbackTimer.time();
-    }
-
-    public Timer.Context getCleanCtx() {
-        if (config.isMetricsOn() && cleanTimer == null) {
-            cleanTimer = createTimer(cleanTimerName);
-        }
-        return cleanTimer == null ? null : cleanTimer.time();
-    }
-
-    public Timer.Context getCommitCtx() {
-        if (config.isMetricsOn() && commitTimer == null) {
-            commitTimer = createTimer(commitTimerName);
-        }
-        return commitTimer == null ? null : commitTimer.time();
-    }
-
-    public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs, HoodieCommitMetadata metadata) {
-        if (config.isMetricsOn()) {
-            long totalPartitionsWritten = metadata.fetchTotalPartitionsWritten();
-            long totalFilesInsert = metadata.fetchTotalFilesInsert();
-            long totalFilesUpdate = metadata.fetchTotalFilesUpdated();
-            long totalRecordsWritten = metadata.fetchTotalRecordsWritten();
-            long totalUpdateRecordsWritten = metadata.fetchTotalUpdateRecordsWritten();
-            long totalInsertRecordsWritten = metadata.fetchTotalInsertRecordsWritten();
-            long totalBytesWritten = metadata.fetchTotalBytesWritten();
-            registerGauge(getMetricsName("commit", "duration"), durationInMs);
-            registerGauge(getMetricsName("commit", "totalPartitionsWritten"), totalPartitionsWritten);
-            registerGauge(getMetricsName("commit", "totalFilesInsert"), totalFilesInsert);
-            registerGauge(getMetricsName("commit", "totalFilesUpdate"), totalFilesUpdate);
-            registerGauge(getMetricsName("commit", "totalRecordsWritten"), totalRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalUpdateRecordsWritten"), totalUpdateRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalInsertRecordsWritten"), totalInsertRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalBytesWritten"), totalBytesWritten);
-            registerGauge(getMetricsName("commit", "commitTime"), commitEpochTimeInMs);
-        }
-    }
-
-    public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
-        if (config.isMetricsOn()) {
-            logger.info(String.format("Sending rollback metrics (duration=%d, numFilesDeleted=$d)",
-                    durationInMs, numFilesDeleted));
-            registerGauge(getMetricsName("rollback", "duration"), durationInMs);
-            registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
-        }
-    }
-
-    public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
-        if (config.isMetricsOn()) {
-            logger.info(String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)",
-                    durationInMs, numFilesDeleted));
-            registerGauge(getMetricsName("clean", "duration"), durationInMs);
-            registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
-        }
-    }
-
-    @VisibleForTesting
-    String getMetricsName(String action, String metric) {
-        return config == null ? null :
-                String.format("%s.%s.%s", tableName, action, metric);
-    }
-
-    void registerGauge(String metricName, final long value) {
-        try {
-            MetricRegistry registry = Metrics.getInstance().getRegistry();
-            registry.register(metricName, new Gauge<Long>() {
-                @Override
-                public Long getValue() {
-                    return value;
-                }
-            });
-        } catch (Exception e) {
-            // Here we catch all exception, so the major upsert pipeline will not be affected if the metrics system
-            // has some issues.
-            logger.error("Failed to send metrics: ", e);
-        }
-    }
-
-    /**
-     * By default, the timer context returns duration with nano seconds.
-     * Convert it to millisecond.
-     */
-    public long getDurationInMs(long ctxDuration) {
-        return ctxDuration / 1000000;
-    }
+  /**
+   * By default, the timer context returns duration with nano seconds. Convert it to millisecond.
+   */
+  public long getDurationInMs(long ctxDuration) {
+    return ctxDuration / 1000000;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java
index e3511b523..282c592cc 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java
@@ -22,16 +22,17 @@ import java.io.Closeable;
  * Used for testing.
  */
 public class InMemoryMetricsReporter extends MetricsReporter {
-    @Override
-    public void start() {
-    }
 
-    @Override
-    public void report() {
-    }
+  @Override
+  public void start() {
+  }
 
-    @Override
-    public Closeable getReporter() {
-        return null;
-    }
+  @Override
+  public void report() {
+  }
+
+  @Override
+  public Closeable getReporter() {
+    return null;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java
index 337d21c16..c5a357317 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java
@@ -19,65 +19,64 @@ package com.uber.hoodie.metrics;
 import com.codahale.metrics.MetricRegistry;
 import com.google.common.io.Closeables;
 import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.config.HoodieMetricsConfig;
 import com.uber.hoodie.exception.HoodieException;
-import org.apache.commons.configuration.ConfigurationException;
-
 import java.io.Closeable;
+import org.apache.commons.configuration.ConfigurationException;
 
 /**
  * This is the main class of the metrics system.
  */
 public class Metrics {
-    private static volatile boolean initialized = false;
-    private static Metrics metrics = null;
-    private final MetricRegistry registry;
-    private MetricsReporter reporter = null;
 
-    private Metrics(HoodieWriteConfig metricConfig) throws ConfigurationException {
-        registry = new MetricRegistry();
+  private static volatile boolean initialized = false;
+  private static Metrics metrics = null;
+  private final MetricRegistry registry;
+  private MetricsReporter reporter = null;
 
-        reporter = MetricsReporterFactory.createReporter(metricConfig, registry);
-        if (reporter == null) {
-            throw new RuntimeException("Cannot initialize Reporter.");
-        }
+  private Metrics(HoodieWriteConfig metricConfig) throws ConfigurationException {
+    registry = new MetricRegistry();
+
+    reporter = MetricsReporterFactory.createReporter(metricConfig, registry);
+    if (reporter == null) {
+      throw new RuntimeException("Cannot initialize Reporter.");
+    }
 //        reporter.start();
 
-        Runtime.getRuntime().addShutdownHook(new Thread() {
-            @Override
-            public void run() {
-                try {
-                    reporter.report();
-                    Closeables.close(reporter.getReporter(), true);
-                } catch (Exception e) {
-                    e.printStackTrace();
-                }
-            }
-        });
-    }
-
-    public static Metrics getInstance() {
-        assert initialized;
-        return metrics;
-    }
-
-    public static synchronized void init(HoodieWriteConfig metricConfig) {
-        if (initialized) {
-            return;
-        }
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
         try {
-            metrics = new Metrics(metricConfig);
-        } catch (ConfigurationException e) {
-            throw new HoodieException(e);
+          reporter.report();
+          Closeables.close(reporter.getReporter(), true);
+        } catch (Exception e) {
+          e.printStackTrace();
         }
-        initialized = true;
-    }
+      }
+    });
+  }
 
-    public MetricRegistry getRegistry() {
-        return registry;
-    }
+  public static Metrics getInstance() {
+    assert initialized;
+    return metrics;
+  }
 
-    public Closeable getReporter() {
-        return reporter.getReporter();
+  public static synchronized void init(HoodieWriteConfig metricConfig) {
+    if (initialized) {
+      return;
     }
+    try {
+      metrics = new Metrics(metricConfig);
+    } catch (ConfigurationException e) {
+      throw new HoodieException(e);
+    }
+    initialized = true;
+  }
+
+  public MetricRegistry getRegistry() {
+    return registry;
+  }
+
+  public Closeable getReporter() {
+    return reporter.getReporter();
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java
index aeb5464d4..bc7d024a5 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java
@@ -21,75 +21,74 @@ import com.codahale.metrics.MetricRegistry;
 import com.codahale.metrics.graphite.Graphite;
 import com.codahale.metrics.graphite.GraphiteReporter;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.Closeable;
 import java.net.InetSocketAddress;
 import java.util.concurrent.TimeUnit;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
- * Implementation of Graphite reporter, which connects to the Graphite server,
- * and send metrics to that server.
+ * Implementation of Graphite reporter, which connects to the Graphite server, and send metrics to
+ * that server.
  */
 public class MetricsGraphiteReporter extends MetricsReporter {
-    private final MetricRegistry registry;
-    private final GraphiteReporter graphiteReporter;
-    private final HoodieWriteConfig config;
-    private String serverHost;
-    private int serverPort;
 
-    private static Logger logger = LogManager.getLogger(MetricsGraphiteReporter.class);
+  private final MetricRegistry registry;
+  private final GraphiteReporter graphiteReporter;
+  private final HoodieWriteConfig config;
+  private String serverHost;
+  private int serverPort;
 
-    public MetricsGraphiteReporter(HoodieWriteConfig config, MetricRegistry registry) {
-        this.registry = registry;
-        this.config = config;
+  private static Logger logger = LogManager.getLogger(MetricsGraphiteReporter.class);
 
-        // Check the serverHost and serverPort here
-        this.serverHost = config.getGraphiteServerHost();
-        this.serverPort = config.getGraphiteServerPort();
-        if (serverHost == null || serverPort == 0) {
-            throw new RuntimeException(
-                    String.format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
-                            serverHost, serverPort));
-        }
+  public MetricsGraphiteReporter(HoodieWriteConfig config, MetricRegistry registry) {
+    this.registry = registry;
+    this.config = config;
 
-        this.graphiteReporter = createGraphiteReport();
+    // Check the serverHost and serverPort here
+    this.serverHost = config.getGraphiteServerHost();
+    this.serverPort = config.getGraphiteServerPort();
+    if (serverHost == null || serverPort == 0) {
+      throw new RuntimeException(
+          String.format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
+              serverHost, serverPort));
     }
 
-    @Override
-    public void start() {
-        if (graphiteReporter != null) {
-            graphiteReporter.start(30, TimeUnit.SECONDS);
-        } else {
-            logger.error("Cannot start as the graphiteReporter is null.");
-        }
-    }
+    this.graphiteReporter = createGraphiteReport();
+  }
 
-    @Override
-    public void report() {
-        if (graphiteReporter != null) {
-            graphiteReporter.report();
-        } else {
-            logger.error("Cannot report metrics as the graphiteReporter is null.");
-        }
+  @Override
+  public void start() {
+    if (graphiteReporter != null) {
+      graphiteReporter.start(30, TimeUnit.SECONDS);
+    } else {
+      logger.error("Cannot start as the graphiteReporter is null.");
     }
+  }
 
-    @Override
-    public Closeable getReporter() {
-        return graphiteReporter;
+  @Override
+  public void report() {
+    if (graphiteReporter != null) {
+      graphiteReporter.report();
+    } else {
+      logger.error("Cannot report metrics as the graphiteReporter is null.");
     }
+  }
 
-    private GraphiteReporter createGraphiteReport() {
-        Graphite graphite = new Graphite(
-                new InetSocketAddress(serverHost, serverPort));
-        String reporterPrefix = config.getGraphiteMetricPrefix();
-        return GraphiteReporter.forRegistry(registry)
-                .prefixedWith(reporterPrefix)
-                .convertRatesTo(TimeUnit.SECONDS)
-                .convertDurationsTo(TimeUnit.MILLISECONDS)
-                .filter(MetricFilter.ALL)
-                .build(graphite);
-    }
+  @Override
+  public Closeable getReporter() {
+    return graphiteReporter;
+  }
+
+  private GraphiteReporter createGraphiteReport() {
+    Graphite graphite = new Graphite(
+        new InetSocketAddress(serverHost, serverPort));
+    String reporterPrefix = config.getGraphiteMetricPrefix();
+    return GraphiteReporter.forRegistry(registry)
+        .prefixedWith(reporterPrefix)
+        .convertRatesTo(TimeUnit.SECONDS)
+        .convertDurationsTo(TimeUnit.MILLISECONDS)
+        .filter(MetricFilter.ALL)
+        .build(graphite);
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java
index 719e7c6a3..22c0a076a 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java
@@ -22,15 +22,16 @@ import java.io.Closeable;
  * Interface for implementing a Reporter.
  */
 public abstract class MetricsReporter {
-    /**
-     * Push out metrics at scheduled intervals
-     */
-    public abstract void start();
 
-    /**
-     * Deterministically push out metrics
-     */
-    public abstract void report();
+  /**
+   * Push out metrics at scheduled intervals
+   */
+  public abstract void start();
 
-    public abstract Closeable getReporter();
+  /**
+   * Deterministically push out metrics
+   */
+  public abstract void report();
+
+  public abstract Closeable getReporter();
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java
index 3c0d9e667..31bc555c7 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.metrics;
 
 import com.codahale.metrics.MetricRegistry;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -26,23 +25,24 @@ import org.apache.log4j.Logger;
  * Factory class for creating MetricsReporter.
  */
 public class MetricsReporterFactory {
-    private static Logger logger = LogManager.getLogger(MetricsReporterFactory.class);
 
-    public static MetricsReporter createReporter(HoodieWriteConfig config,
-                                                 MetricRegistry registry) {
-        MetricsReporterType type = config.getMetricsReporterType();
-        MetricsReporter reporter = null;
-        switch (type) {
-            case GRAPHITE:
-                reporter = new MetricsGraphiteReporter(config, registry);
-                break;
-            case INMEMORY:
-                reporter = new InMemoryMetricsReporter();
-                break;
-            default:
-                logger.error("Reporter type[" + type + "] is not supported.");
-                break;
-        }
-        return reporter;
+  private static Logger logger = LogManager.getLogger(MetricsReporterFactory.class);
+
+  public static MetricsReporter createReporter(HoodieWriteConfig config,
+      MetricRegistry registry) {
+    MetricsReporterType type = config.getMetricsReporterType();
+    MetricsReporter reporter = null;
+    switch (type) {
+      case GRAPHITE:
+        reporter = new MetricsGraphiteReporter(config, registry);
+        break;
+      case INMEMORY:
+        reporter = new InMemoryMetricsReporter();
+        break;
+      default:
+        logger.error("Reporter type[" + type + "] is not supported.");
+        break;
     }
+    return reporter;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java
index cac162cec..46e128ddb 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java
@@ -17,10 +17,10 @@
 package com.uber.hoodie.metrics;
 
 /**
- * Types of the reporter. Right now we only support Graphite.
- * We can include JMX and CSV in the future.
+ * Types of the reporter. Right now we only support Graphite. We can include JMX and CSV in the
+ * future.
  */
 public enum MetricsReporterType {
-    GRAPHITE,
-    INMEMORY
+  GRAPHITE,
+  INMEMORY
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
index 9c2a80e9b..3bcce373b 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
@@ -70,626 +70,626 @@ import org.apache.spark.api.java.function.PairFlatMapFunction;
 import scala.Option;
 import scala.Tuple2;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
-import java.util.stream.Collectors;
-
 /**
  * Implementation of a very heavily read-optimized Hoodie Table where
  *
- * INSERTS - Produce new files, block aligned to desired size (or)
- *           Merge with the smallest existing file, to expand it
+ * INSERTS - Produce new files, block aligned to desired size (or) Merge with the smallest existing
+ * file, to expand it
  *
  * UPDATES - Produce a new version of the file, just replacing the updated records with new values
- *
  */
 public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends HoodieTable<T> {
-    public HoodieCopyOnWriteTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
-        super(config, metaClient);
-    }
 
-    private static Logger logger = LogManager.getLogger(HoodieCopyOnWriteTable.class);
+  public HoodieCopyOnWriteTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
+    super(config, metaClient);
+  }
 
-    enum BucketType {
-        UPDATE,
-        INSERT
+  private static Logger logger = LogManager.getLogger(HoodieCopyOnWriteTable.class);
+
+  enum BucketType {
+    UPDATE,
+    INSERT
+  }
+
+  /**
+   * Helper class for a small file's location and its actual size on disk
+   */
+  class SmallFile implements Serializable {
+
+    HoodieRecordLocation location;
+    long sizeBytes;
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder("SmallFile {");
+      sb.append("location=").append(location).append(", ");
+      sb.append("sizeBytes=").append(sizeBytes);
+      sb.append('}');
+      return sb.toString();
     }
+  }
+
+  /**
+   * Helper class for an insert bucket along with the weight [0.0, 0.1] that defines the amount of
+   * incoming inserts that should be allocated to the bucket
+   */
+  class InsertBucket implements Serializable {
+
+    int bucketNumber;
+    // fraction of total inserts, that should go into this bucket
+    double weight;
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder("WorkloadStat {");
+      sb.append("bucketNumber=").append(bucketNumber).append(", ");
+      sb.append("weight=").append(weight);
+      sb.append('}');
+      return sb.toString();
+    }
+  }
+
+  /**
+   * Helper class for a bucket's type (INSERT and UPDATE) and its file location
+   */
+  class BucketInfo implements Serializable {
+
+    BucketType bucketType;
+    String fileLoc;
+
+    @Override
+    public String toString() {
+      final StringBuilder sb = new StringBuilder("BucketInfo {");
+      sb.append("bucketType=").append(bucketType).append(", ");
+      sb.append("fileLoc=").append(fileLoc);
+      sb.append('}');
+      return sb.toString();
+    }
+  }
+
+
+  /**
+   * Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition)
+   */
+  class UpsertPartitioner extends Partitioner {
 
     /**
-     * Helper class for a small file's location and its actual size on disk
+     * Total number of RDD partitions, is determined by total buckets we want to pack the incoming
+     * workload into
      */
-    class SmallFile implements Serializable {
-        HoodieRecordLocation location;
-        long sizeBytes;
-
-        @Override
-        public String toString() {
-            final StringBuilder sb = new StringBuilder("SmallFile {");
-            sb.append("location=").append(location).append(", ");
-            sb.append("sizeBytes=").append(sizeBytes);
-            sb.append('}');
-            return sb.toString();
-        }
-    }
+    private int totalBuckets = 0;
 
     /**
-     * Helper class for an insert bucket along with the weight [0.0, 0.1]
-     * that defines the amount of incoming inserts that should be allocated to
-     * the bucket
+     * Stat for the current workload. Helps in determining total inserts, upserts etc.
      */
-    class InsertBucket implements Serializable {
-        int bucketNumber;
-        // fraction of total inserts, that should go into this bucket
-        double weight;
-
-        @Override
-        public String toString() {
-            final StringBuilder sb = new StringBuilder("WorkloadStat {");
-            sb.append("bucketNumber=").append(bucketNumber).append(", ");
-            sb.append("weight=").append(weight);
-            sb.append('}');
-            return sb.toString();
-        }
-    }
+    private WorkloadStat globalStat;
 
     /**
-     * Helper class for a bucket's type (INSERT and UPDATE) and its file location
+     * Helps decide which bucket an incoming update should go to.
      */
-    class BucketInfo implements Serializable {
-        BucketType bucketType;
-        String fileLoc;
+    private HashMap<String, Integer> updateLocationToBucket;
 
-        @Override
-        public String toString() {
-            final StringBuilder sb = new StringBuilder("BucketInfo {");
-            sb.append("bucketType=").append(bucketType).append(", ");
-            sb.append("fileLoc=").append(fileLoc);
-            sb.append('}');
-            return sb.toString();
+
+    /**
+     * Helps us pack inserts into 1 or more buckets depending on number of incoming records.
+     */
+    private HashMap<String, List<InsertBucket>> partitionPathToInsertBuckets;
+
+
+    /**
+     * Remembers what type each bucket is for later.
+     */
+    private HashMap<Integer, BucketInfo> bucketInfoMap;
+
+    UpsertPartitioner(WorkloadProfile profile) {
+      updateLocationToBucket = new HashMap<>();
+      partitionPathToInsertBuckets = new HashMap<>();
+      bucketInfoMap = new HashMap<>();
+      globalStat = profile.getGlobalStat();
+
+      assignUpdates(profile);
+      assignInserts(profile);
+
+      logger.info("Total Buckets :" + totalBuckets + ", " +
+          "buckets info => " + bucketInfoMap + ", \n" +
+          "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n" +
+          "UpdateLocations mapped to buckets =>" + updateLocationToBucket);
+    }
+
+    private void assignUpdates(WorkloadProfile profile) {
+      // each update location gets a partition
+      WorkloadStat gStat = profile.getGlobalStat();
+      for (Map.Entry<String, Long> updateLocEntry : gStat.getUpdateLocationToCount().entrySet()) {
+        addUpdateBucket(updateLocEntry.getKey());
+      }
+    }
+
+    private int addUpdateBucket(String fileLoc) {
+      int bucket = totalBuckets;
+      updateLocationToBucket.put(fileLoc, bucket);
+      BucketInfo bucketInfo = new BucketInfo();
+      bucketInfo.bucketType = BucketType.UPDATE;
+      bucketInfo.fileLoc = fileLoc;
+      bucketInfoMap.put(totalBuckets, bucketInfo);
+      totalBuckets++;
+      return bucket;
+    }
+
+    private void assignInserts(WorkloadProfile profile) {
+      // for new inserts, compute buckets depending on how many records we have for each partition
+      Set<String> partitionPaths = profile.getPartitionPaths();
+      long averageRecordSize = averageBytesPerRecord();
+      logger.info("AvgRecordSize => " + averageRecordSize);
+      for (String partitionPath : partitionPaths) {
+        WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
+        if (pStat.getNumInserts() > 0) {
+
+          List<SmallFile> smallFiles = getSmallFiles(partitionPath);
+          logger.info("For partitionPath : " + partitionPath + " Small Files => " + smallFiles);
+
+          long totalUnassignedInserts = pStat.getNumInserts();
+          List<Integer> bucketNumbers = new ArrayList<>();
+          List<Long> recordsPerBucket = new ArrayList<>();
+
+          // first try packing this into one of the smallFiles
+          for (SmallFile smallFile : smallFiles) {
+            long recordsToAppend = Math
+                .min((config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageRecordSize,
+                    totalUnassignedInserts);
+            if (recordsToAppend > 0 && totalUnassignedInserts > 0) {
+              // create a new bucket or re-use an existing bucket
+              int bucket;
+              if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
+                bucket = updateLocationToBucket.get(smallFile.location.getFileId());
+                logger.info("Assigning " + recordsToAppend + " inserts to existing update bucket "
+                    + bucket);
+              } else {
+                bucket = addUpdateBucket(smallFile.location.getFileId());
+                logger.info(
+                    "Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
+              }
+              bucketNumbers.add(bucket);
+              recordsPerBucket.add(recordsToAppend);
+              totalUnassignedInserts -= recordsToAppend;
+            }
+          }
+
+          // if we have anything more, create new insert buckets, like normal
+          if (totalUnassignedInserts > 0) {
+            long insertRecordsPerBucket = config.getCopyOnWriteInsertSplitSize();
+            if (config.shouldAutoTuneInsertSplits()) {
+              insertRecordsPerBucket = config.getParquetMaxFileSize() / averageRecordSize;
+            }
+
+            int insertBuckets = (int) Math.max(totalUnassignedInserts / insertRecordsPerBucket, 1L);
+            logger
+                .info("After small file assignment: unassignedInserts => " + totalUnassignedInserts
+                    + ", totalInsertBuckets => " + insertBuckets
+                    + ", recordsPerBucket => " + insertRecordsPerBucket);
+            for (int b = 0; b < insertBuckets; b++) {
+              bucketNumbers.add(totalBuckets);
+              recordsPerBucket.add(totalUnassignedInserts / insertBuckets);
+              BucketInfo bucketInfo = new BucketInfo();
+              bucketInfo.bucketType = BucketType.INSERT;
+              bucketInfoMap.put(totalBuckets, bucketInfo);
+              totalBuckets++;
+            }
+          }
+
+          // Go over all such buckets, and assign weights as per amount of incoming inserts.
+          List<InsertBucket> insertBuckets = new ArrayList<>();
+          for (int i = 0; i < bucketNumbers.size(); i++) {
+            InsertBucket bkt = new InsertBucket();
+            bkt.bucketNumber = bucketNumbers.get(i);
+            bkt.weight = (1.0 * recordsPerBucket.get(i)) / pStat.getNumInserts();
+            insertBuckets.add(bkt);
+          }
+          logger.info(
+              "Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
+          partitionPathToInsertBuckets.put(partitionPath, insertBuckets);
         }
+      }
     }
 
 
     /**
-     * Packs incoming records to be upserted, into buckets (1 bucket = 1 RDD partition)
+     * Returns a list  of small files in the given partition path
      */
-    class UpsertPartitioner extends Partitioner {
+    private List<SmallFile> getSmallFiles(String partitionPath) {
+      List<SmallFile> smallFileLocations = new ArrayList<>();
 
-        /**
-         * Total number of RDD partitions, is determined by total buckets we want to
-         * pack the incoming workload into
-         */
-        private int totalBuckets = 0;
+      HoodieTimeline commitTimeline = getCompletedCommitTimeline();
 
-        /**
-         * Stat for the current workload. Helps in determining total inserts, upserts etc.
-         */
-        private WorkloadStat globalStat;
+      if (!commitTimeline.empty()) { // if we have some commits
+        HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
+        List<HoodieDataFile> allFiles = getROFileSystemView()
+            .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp())
+            .collect(Collectors.toList());
 
-        /**
-         * Helps decide which bucket an incoming update should go to.
-         */
-        private HashMap<String, Integer> updateLocationToBucket;
-
-
-        /**
-         * Helps us pack inserts into 1 or more buckets depending on number of
-         * incoming records.
-         */
-        private HashMap<String, List<InsertBucket>> partitionPathToInsertBuckets;
-
-
-        /**
-         * Remembers what type each bucket is for later.
-         */
-        private HashMap<Integer, BucketInfo> bucketInfoMap;
-
-        UpsertPartitioner(WorkloadProfile profile) {
-            updateLocationToBucket = new HashMap<>();
-            partitionPathToInsertBuckets = new HashMap<>();
-            bucketInfoMap = new HashMap<>();
-            globalStat = profile.getGlobalStat();
-
-            assignUpdates(profile);
-            assignInserts(profile);
-
-            logger.info("Total Buckets :" + totalBuckets + ", " +
-                    "buckets info => " + bucketInfoMap + ", \n" +
-                    "Partition to insert buckets => " + partitionPathToInsertBuckets + ", \n" +
-                    "UpdateLocations mapped to buckets =>" + updateLocationToBucket);
+        for (HoodieDataFile file : allFiles) {
+          if (file.getFileSize() < config.getParquetSmallFileLimit()) {
+            String filename = file.getFileName();
+            SmallFile sf = new SmallFile();
+            sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename),
+                FSUtils.getFileId(filename));
+            sf.sizeBytes = file.getFileSize();
+            smallFileLocations.add(sf);
+          }
         }
+      }
 
-        private void assignUpdates(WorkloadProfile profile) {
-            // each update location gets a partition
-            WorkloadStat gStat = profile.getGlobalStat();
-            for (Map.Entry<String, Long> updateLocEntry : gStat.getUpdateLocationToCount().entrySet()) {
-                addUpdateBucket(updateLocEntry.getKey());
-            }
-        }
-
-        private int addUpdateBucket(String fileLoc) {
-            int bucket = totalBuckets;
-            updateLocationToBucket.put(fileLoc, bucket);
-            BucketInfo bucketInfo = new BucketInfo();
-            bucketInfo.bucketType = BucketType.UPDATE;
-            bucketInfo.fileLoc = fileLoc;
-            bucketInfoMap.put(totalBuckets, bucketInfo);
-            totalBuckets++;
-            return bucket;
-        }
-
-        private void assignInserts(WorkloadProfile profile) {
-            // for new inserts, compute buckets depending on how many records we have for each partition
-            Set<String> partitionPaths = profile.getPartitionPaths();
-            long averageRecordSize = averageBytesPerRecord();
-            logger.info("AvgRecordSize => " + averageRecordSize);
-            for (String partitionPath : partitionPaths) {
-                WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
-                if (pStat.getNumInserts() > 0) {
-
-                    List<SmallFile> smallFiles = getSmallFiles(partitionPath);
-                    logger.info("For partitionPath : "+ partitionPath + " Small Files => " + smallFiles);
-
-                    long totalUnassignedInserts = pStat.getNumInserts();
-                    List<Integer> bucketNumbers = new ArrayList<>();
-                    List<Long> recordsPerBucket = new ArrayList<>();
-
-                    // first try packing this into one of the smallFiles
-                    for (SmallFile smallFile: smallFiles) {
-                        long recordsToAppend = Math.min((config.getParquetMaxFileSize() - smallFile.sizeBytes)/ averageRecordSize, totalUnassignedInserts);
-                        if (recordsToAppend > 0 && totalUnassignedInserts > 0){
-                            // create a new bucket or re-use an existing bucket
-                            int bucket;
-                            if (updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
-                                bucket = updateLocationToBucket.get(smallFile.location.getFileId());
-                                logger.info("Assigning " + recordsToAppend + " inserts to existing update bucket "+ bucket);
-                            } else {
-                                bucket = addUpdateBucket(smallFile.location.getFileId());
-                                logger.info("Assigning " + recordsToAppend + " inserts to new update bucket "+ bucket);
-                            }
-                            bucketNumbers.add(bucket);
-                            recordsPerBucket.add(recordsToAppend);
-                            totalUnassignedInserts -= recordsToAppend;
-                        }
-                    }
-
-                    // if we have anything more, create new insert buckets, like normal
-                    if (totalUnassignedInserts > 0) {
-                        long insertRecordsPerBucket = config.getCopyOnWriteInsertSplitSize();
-                        if (config.shouldAutoTuneInsertSplits()) {
-                            insertRecordsPerBucket = config.getParquetMaxFileSize()/averageRecordSize;
-                        }
-
-                        int insertBuckets = (int) Math.max(totalUnassignedInserts / insertRecordsPerBucket, 1L);
-                        logger.info("After small file assignment: unassignedInserts => " + totalUnassignedInserts
-                                + ", totalInsertBuckets => " + insertBuckets
-                                + ", recordsPerBucket => " + insertRecordsPerBucket);
-                        for (int b = 0; b < insertBuckets; b++) {
-                            bucketNumbers.add(totalBuckets);
-                            recordsPerBucket.add(totalUnassignedInserts/insertBuckets);
-                            BucketInfo bucketInfo = new BucketInfo();
-                            bucketInfo.bucketType = BucketType.INSERT;
-                            bucketInfoMap.put(totalBuckets, bucketInfo);
-                            totalBuckets++;
-                        }
-                    }
-
-                    // Go over all such buckets, and assign weights as per amount of incoming inserts.
-                    List<InsertBucket> insertBuckets = new ArrayList<>();
-                    for (int i = 0; i < bucketNumbers.size(); i++) {
-                        InsertBucket bkt = new InsertBucket();
-                        bkt.bucketNumber = bucketNumbers.get(i);
-                        bkt.weight = (1.0 * recordsPerBucket.get(i))/pStat.getNumInserts();
-                        insertBuckets.add(bkt);
-                    }
-                    logger.info("Total insert buckets for partition path "+ partitionPath + " => " + insertBuckets);
-                    partitionPathToInsertBuckets.put(partitionPath, insertBuckets);
-                }
-            }
-        }
-
-
-        /**
-         * Returns a list  of small files in the given partition path
-         *
-         * @param partitionPath
-         * @return
-         */
-        private List<SmallFile> getSmallFiles(String partitionPath) {
-            List<SmallFile> smallFileLocations = new ArrayList<>();
-
-            HoodieTimeline commitTimeline = getCompletedCommitTimeline();
-
-            if (!commitTimeline.empty()) { // if we have some commits
-                HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
-                List<HoodieDataFile> allFiles = getROFileSystemView()
-                    .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp())
-                    .collect(Collectors.toList());
-
-                for (HoodieDataFile file : allFiles) {
-                    if (file.getFileSize() < config.getParquetSmallFileLimit()) {
-                        String filename = file.getFileName();
-                        SmallFile sf = new SmallFile();
-                        sf.location = new HoodieRecordLocation(FSUtils.getCommitTime(filename),
-                            FSUtils.getFileId(filename));
-                        sf.sizeBytes = file.getFileSize();
-                        smallFileLocations.add(sf);
-                    }
-                }
-            }
-
-            return smallFileLocations;
-        }
-
-        /**
-         * Obtains the average record size based on records written during last commit.
-         * Used for estimating how many records pack into one file.
-         *
-         * @return
-         */
-        private long averageBytesPerRecord() {
-            long avgSize = 0L;
-            HoodieTimeline commitTimeline =
-                metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
-            try {
-                if (!commitTimeline.empty()) {
-                    HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
-                    HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(latestCommitTime).get());
-                    avgSize = (long) Math.ceil(
-                        (1.0 * commitMetadata.fetchTotalBytesWritten()) / commitMetadata
-                            .fetchTotalRecordsWritten());
-                }
-            } catch (Throwable t) {
-                // make this fail safe.
-                logger.error("Error trying to compute average bytes/record ", t);
-            }
-            return avgSize <= 0L ? config.getCopyOnWriteRecordSizeEstimate() : avgSize;
-        }
-
-        public BucketInfo getBucketInfo(int bucketNumber) {
-            return bucketInfoMap.get(bucketNumber);
-        }
-
-        public List<InsertBucket> getInsertBuckets(String partitionPath) {
-            return partitionPathToInsertBuckets.get(partitionPath);
-        }
-
-        @Override
-        public int numPartitions() {
-            return totalBuckets;
-        }
-
-        @Override
-        public int getPartition(Object key) {
-            Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation = (Tuple2<HoodieKey, Option<HoodieRecordLocation>>) key;
-            if (keyLocation._2().isDefined()) {
-                HoodieRecordLocation location = keyLocation._2().get();
-                return updateLocationToBucket.get(location.getFileId());
-            } else {
-                List<InsertBucket> targetBuckets = partitionPathToInsertBuckets.get(keyLocation._1().getPartitionPath());
-                // pick the target bucket to use based on the weights.
-                double totalWeight = 0.0;
-                final long totalInserts = Math.max(1, globalStat.getNumInserts());
-                final long hashOfKey = Hashing.md5().hashString(keyLocation._1().getRecordKey(), StandardCharsets.UTF_8).asLong();
-                final double r = 1.0 * Math.floorMod(hashOfKey, totalInserts) / totalInserts;
-                for (InsertBucket insertBucket: targetBuckets) {
-                    totalWeight += insertBucket.weight;
-                    if (r <= totalWeight) {
-                        return insertBucket.bucketNumber;
-                    }
-                }
-                // return first one, by default
-                return targetBuckets.get(0).bucketNumber;
-            }
-        }
-    }
-
-
-    @Override
-    public Partitioner getUpsertPartitioner(WorkloadProfile profile) {
-        if (profile == null) {
-            throw new HoodieUpsertException("Need workload profile to construct the upsert partitioner.");
-        }
-        return new UpsertPartitioner(profile);
-    }
-
-    @Override
-    public Partitioner getInsertPartitioner(WorkloadProfile profile) {
-        return getUpsertPartitioner(profile);
-    }
-
-    @Override
-    public boolean isWorkloadProfileNeeded() {
-        return true;
-    }
-
-
-
-    public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileLoc, Iterator<HoodieRecord<T>> recordItr)
-        throws IOException {
-        // these are updates
-        HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileLoc, recordItr);
-        if (upsertHandle.getOldFilePath() == null) {
-            throw new HoodieUpsertException("Error in finding the old file path at commit " +
-                    commitTime +" at fileLoc: " + fileLoc);
-        } else {
-            Configuration conf = FSUtils.getFs().getConf();
-            AvroReadSupport.setAvroReadSchema(conf, upsertHandle.getSchema());
-            ParquetReader<IndexedRecord> reader =
-                    AvroParquetReader.builder(upsertHandle.getOldFilePath()).withConf(conf).build();
-            try {
-                IndexedRecord record;
-                while ((record = reader.read()) != null) {
-                    // Two types of writes here (new record, and old record).
-                    // We have already catch the exception during writing new records.
-                    // But for old records, we should fail if any exception happens.
-                    upsertHandle.write((GenericRecord) record);
-                }
-            } catch (IOException e) {
-                throw new HoodieUpsertException(
-                        "Failed to read record from " + upsertHandle.getOldFilePath()
-                                + " with new Schema " + upsertHandle.getSchema(), e);
-            } finally {
-                reader.close();
-                upsertHandle.close();
-            }
-        }
-        //TODO(vc): This needs to be revisited
-        if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
-            logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath()
-                            + ", " + upsertHandle.getWriteStatus());
-        }
-        return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())).iterator();
-    }
-
-    protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileLoc, Iterator<HoodieRecord<T>> recordItr) {
-        return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileLoc);
-    }
-
-    public Iterator<List<WriteStatus>> handleInsert(String commitTime, Iterator<HoodieRecord<T>> recordItr) throws Exception {
-        return new LazyInsertIterable<>(recordItr, config, commitTime, this);
-    }
-
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
-        Iterator recordItr, Partitioner partitioner) {
-        UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
-        BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
-        BucketType btype = binfo.bucketType;
-        try {
-            if (btype.equals(BucketType.INSERT)) {
-                return handleInsert(commitTime, recordItr);
-            } else if (btype.equals(BucketType.UPDATE)) {
-                return handleUpdate(commitTime, binfo.fileLoc, recordItr);
-            } else {
-                throw new HoodieUpsertException("Unknown bucketType " + btype + " for partition :" + partition);
-            }
-        } catch (Throwable t) {
-            String msg = "Error upserting bucketType " + btype + " for partition :" + partition;
-            logger.error(msg, t);
-            throw new HoodieUpsertException(msg, t);
-        }
-    }
-
-    @Override
-    public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
-        Iterator recordItr,
-        Partitioner partitioner) {
-        return handleUpsertPartition(commitTime, partition, recordItr, partitioner);
-    }
-
-    @Override
-    public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
-        logger.info("Nothing to compact in COW storage format");
-        return Optional.empty();
+      return smallFileLocations;
     }
 
     /**
-     * Performs cleaning of partition paths according to cleaning policy and returns the number
-     * of files cleaned. Handles skews in partitions to clean by making files to clean as the
-     * unit of task distribution.
-     *
-     * @throws IllegalArgumentException if unknown cleaning policy is provided
+     * Obtains the average record size based on records written during last commit. Used for
+     * estimating how many records pack into one file.
      */
-    @Override
-    public List<HoodieCleanStat> clean(JavaSparkContext jsc) {
-        try {
-            List<String> partitionsToClean =
-                FSUtils.getAllPartitionPaths(getFs(), getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning());
-            logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config
-                .getCleanerPolicy());
-            if (partitionsToClean.isEmpty()) {
-                logger.info("Nothing to clean here mom. It is already clean");
-                return Collections.emptyList();
-            }
-            return cleanPartitionPaths(partitionsToClean, jsc);
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to clean up after commit", e);
+    private long averageBytesPerRecord() {
+      long avgSize = 0L;
+      HoodieTimeline commitTimeline =
+          metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
+      try {
+        if (!commitTimeline.empty()) {
+          HoodieInstant latestCommitTime = commitTimeline.lastInstant().get();
+          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+              .fromBytes(commitTimeline.getInstantDetails(latestCommitTime).get());
+          avgSize = (long) Math.ceil(
+              (1.0 * commitMetadata.fetchTotalBytesWritten()) / commitMetadata
+                  .fetchTotalRecordsWritten());
         }
+      } catch (Throwable t) {
+        // make this fail safe.
+        logger.error("Error trying to compute average bytes/record ", t);
+      }
+      return avgSize <= 0L ? config.getCopyOnWriteRecordSizeEstimate() : avgSize;
     }
 
-    /**
-     *
-     * Common method used for cleaning out parquet files under a partition path during rollback of a set of commits
-     * @param partitionPath
-     * @param commits
-     * @return
-     * @throws IOException
-     */
-    protected Map<FileStatus, Boolean> deleteCleanedFiles(String partitionPath, List<String> commits) throws IOException {
-        logger.info("Cleaning path " + partitionPath);
-        FileSystem fs = FSUtils.getFs();
-        FileStatus[] toBeDeleted =
-                fs.listStatus(new Path(config.getBasePath(), partitionPath), path -> {
-                    if(!path.toString().contains(".parquet")) {
-                        return false;
-                    }
-                    String fileCommitTime = FSUtils.getCommitTime(path.getName());
-                    return commits.contains(fileCommitTime);
-                });
-        Map<FileStatus, Boolean> results = Maps.newHashMap();
-        for (FileStatus file : toBeDeleted) {
-            boolean success = fs.delete(file.getPath(), false);
-            results.put(file, success);
-            logger.info("Delete file " + file.getPath() + "\t" + success);
-        }
-        return results;
+    public BucketInfo getBucketInfo(int bucketNumber) {
+      return bucketInfoMap.get(bucketNumber);
+    }
+
+    public List<InsertBucket> getInsertBuckets(String partitionPath) {
+      return partitionPathToInsertBuckets.get(partitionPath);
     }
 
     @Override
-    public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException {
-        String actionType = this.getCompactedCommitActionType();
-        HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
-        List<String> inflights = this.getInflightCommitTimeline().getInstants().map(HoodieInstant::getTimestamp)
-                .collect(Collectors.toList());
-
-        // Atomically unpublish all the commits
-        commits.stream().filter(s -> !inflights.contains(s))
-                .map(s -> new HoodieInstant(false, actionType, s))
-                .forEach(activeTimeline::revertToInflight);
-        logger.info("Unpublished " + commits);
-
-        // delete all the data files for all these commits
-        logger.info("Clean out all parquet files generated for commits: " + commits);
-        List<HoodieRollbackStat> stats = jsc.parallelize(
-                FSUtils.getAllPartitionPaths(FSUtils.getFs(), this.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()))
-                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
-                    // Scan all partitions files with this commit time
-                    Map<FileStatus, Boolean> results = deleteCleanedFiles(partitionPath, commits);
-                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                            .withDeletedFileResults(results).build();
-                }).collect();
-
-        // Remove the rolled back inflight commits
-        commits.stream().map(s -> new HoodieInstant(true, actionType, s))
-                .forEach(activeTimeline::deleteInflight);
-        logger.info("Deleted inflight commits " + commits);
-        return stats;
+    public int numPartitions() {
+      return totalBuckets;
     }
 
-    private static class PartitionCleanStat implements Serializable {
-        private final String partitionPath;
-        private final List<String> deletePathPatterns = new ArrayList<>();
-        private final List<String> successDeleteFiles = new ArrayList<>();
-        private final List<String> failedDeleteFiles = new ArrayList<>();
-
-        private PartitionCleanStat(String partitionPath) {
-            this.partitionPath = partitionPath;
+    @Override
+    public int getPartition(Object key) {
+      Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation = (Tuple2<HoodieKey, Option<HoodieRecordLocation>>) key;
+      if (keyLocation._2().isDefined()) {
+        HoodieRecordLocation location = keyLocation._2().get();
+        return updateLocationToBucket.get(location.getFileId());
+      } else {
+        List<InsertBucket> targetBuckets = partitionPathToInsertBuckets
+            .get(keyLocation._1().getPartitionPath());
+        // pick the target bucket to use based on the weights.
+        double totalWeight = 0.0;
+        final long totalInserts = Math.max(1, globalStat.getNumInserts());
+        final long hashOfKey = Hashing.md5()
+            .hashString(keyLocation._1().getRecordKey(), StandardCharsets.UTF_8).asLong();
+        final double r = 1.0 * Math.floorMod(hashOfKey, totalInserts) / totalInserts;
+        for (InsertBucket insertBucket : targetBuckets) {
+          totalWeight += insertBucket.weight;
+          if (r <= totalWeight) {
+            return insertBucket.bucketNumber;
+          }
         }
+        // return first one, by default
+        return targetBuckets.get(0).bucketNumber;
+      }
+    }
+  }
 
-        private void addDeletedFileResult(String deletePathStr, Boolean deletedFileResult) {
-            if (deletedFileResult) {
-                successDeleteFiles.add(deletePathStr);
-            } else {
-                failedDeleteFiles.add(deletePathStr);
-            }
+
+  @Override
+  public Partitioner getUpsertPartitioner(WorkloadProfile profile) {
+    if (profile == null) {
+      throw new HoodieUpsertException("Need workload profile to construct the upsert partitioner.");
+    }
+    return new UpsertPartitioner(profile);
+  }
+
+  @Override
+  public Partitioner getInsertPartitioner(WorkloadProfile profile) {
+    return getUpsertPartitioner(profile);
+  }
+
+  @Override
+  public boolean isWorkloadProfileNeeded() {
+    return true;
+  }
+
+
+  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileLoc,
+      Iterator<HoodieRecord<T>> recordItr)
+      throws IOException {
+    // these are updates
+    HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileLoc, recordItr);
+    if (upsertHandle.getOldFilePath() == null) {
+      throw new HoodieUpsertException("Error in finding the old file path at commit " +
+          commitTime + " at fileLoc: " + fileLoc);
+    } else {
+      Configuration conf = FSUtils.getFs().getConf();
+      AvroReadSupport.setAvroReadSchema(conf, upsertHandle.getSchema());
+      ParquetReader<IndexedRecord> reader =
+          AvroParquetReader.builder(upsertHandle.getOldFilePath()).withConf(conf).build();
+      try {
+        IndexedRecord record;
+        while ((record = reader.read()) != null) {
+          // Two types of writes here (new record, and old record).
+          // We have already catch the exception during writing new records.
+          // But for old records, we should fail if any exception happens.
+          upsertHandle.write((GenericRecord) record);
         }
+      } catch (IOException e) {
+        throw new HoodieUpsertException(
+            "Failed to read record from " + upsertHandle.getOldFilePath()
+                + " with new Schema " + upsertHandle.getSchema(), e);
+      } finally {
+        reader.close();
+        upsertHandle.close();
+      }
+    }
+    //TODO(vc): This needs to be revisited
+    if (upsertHandle.getWriteStatus().getPartitionPath() == null) {
+      logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath()
+          + ", " + upsertHandle.getWriteStatus());
+    }
+    return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus()))
+        .iterator();
+  }
 
-        private void addDeleteFilePatterns(String deletePathStr) {
-            deletePathPatterns.add(deletePathStr);
+  protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileLoc,
+      Iterator<HoodieRecord<T>> recordItr) {
+    return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileLoc);
+  }
+
+  public Iterator<List<WriteStatus>> handleInsert(String commitTime,
+      Iterator<HoodieRecord<T>> recordItr) throws Exception {
+    return new LazyInsertIterable<>(recordItr, config, commitTime, this);
+  }
+
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime, Integer partition,
+      Iterator recordItr, Partitioner partitioner) {
+    UpsertPartitioner upsertPartitioner = (UpsertPartitioner) partitioner;
+    BucketInfo binfo = upsertPartitioner.getBucketInfo(partition);
+    BucketType btype = binfo.bucketType;
+    try {
+      if (btype.equals(BucketType.INSERT)) {
+        return handleInsert(commitTime, recordItr);
+      } else if (btype.equals(BucketType.UPDATE)) {
+        return handleUpdate(commitTime, binfo.fileLoc, recordItr);
+      } else {
+        throw new HoodieUpsertException(
+            "Unknown bucketType " + btype + " for partition :" + partition);
+      }
+    } catch (Throwable t) {
+      String msg = "Error upserting bucketType " + btype + " for partition :" + partition;
+      logger.error(msg, t);
+      throw new HoodieUpsertException(msg, t);
+    }
+  }
+
+  @Override
+  public Iterator<List<WriteStatus>> handleInsertPartition(String commitTime, Integer partition,
+      Iterator recordItr,
+      Partitioner partitioner) {
+    return handleUpsertPartition(commitTime, partition, recordItr, partitioner);
+  }
+
+  @Override
+  public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
+    logger.info("Nothing to compact in COW storage format");
+    return Optional.empty();
+  }
+
+  /**
+   * Performs cleaning of partition paths according to cleaning policy and returns the number of
+   * files cleaned. Handles skews in partitions to clean by making files to clean as the unit of
+   * task distribution.
+   *
+   * @throws IllegalArgumentException if unknown cleaning policy is provided
+   */
+  @Override
+  public List<HoodieCleanStat> clean(JavaSparkContext jsc) {
+    try {
+      List<String> partitionsToClean =
+          FSUtils.getAllPartitionPaths(getFs(), getMetaClient().getBasePath(),
+              config.shouldAssumeDatePartitioning());
+      logger.info("Partitions to clean up : " + partitionsToClean + ", with policy " + config
+          .getCleanerPolicy());
+      if (partitionsToClean.isEmpty()) {
+        logger.info("Nothing to clean here mom. It is already clean");
+        return Collections.emptyList();
+      }
+      return cleanPartitionPaths(partitionsToClean, jsc);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to clean up after commit", e);
+    }
+  }
+
+  /**
+   * Common method used for cleaning out parquet files under a partition path during rollback of a
+   * set of commits
+   */
+  protected Map<FileStatus, Boolean> deleteCleanedFiles(String partitionPath, List<String> commits)
+      throws IOException {
+    logger.info("Cleaning path " + partitionPath);
+    FileSystem fs = FSUtils.getFs();
+    FileStatus[] toBeDeleted =
+        fs.listStatus(new Path(config.getBasePath(), partitionPath), path -> {
+          if (!path.toString().contains(".parquet")) {
+            return false;
+          }
+          String fileCommitTime = FSUtils.getCommitTime(path.getName());
+          return commits.contains(fileCommitTime);
+        });
+    Map<FileStatus, Boolean> results = Maps.newHashMap();
+    for (FileStatus file : toBeDeleted) {
+      boolean success = fs.delete(file.getPath(), false);
+      results.put(file, success);
+      logger.info("Delete file " + file.getPath() + "\t" + success);
+    }
+    return results;
+  }
+
+  @Override
+  public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
+      throws IOException {
+    String actionType = this.getCompactedCommitActionType();
+    HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
+    List<String> inflights = this.getInflightCommitTimeline().getInstants()
+        .map(HoodieInstant::getTimestamp)
+        .collect(Collectors.toList());
+
+    // Atomically unpublish all the commits
+    commits.stream().filter(s -> !inflights.contains(s))
+        .map(s -> new HoodieInstant(false, actionType, s))
+        .forEach(activeTimeline::revertToInflight);
+    logger.info("Unpublished " + commits);
+
+    // delete all the data files for all these commits
+    logger.info("Clean out all parquet files generated for commits: " + commits);
+    List<HoodieRollbackStat> stats = jsc.parallelize(
+        FSUtils.getAllPartitionPaths(FSUtils.getFs(), this.getMetaClient().getBasePath(),
+            config.shouldAssumeDatePartitioning()))
+        .map((Function<String, HoodieRollbackStat>) partitionPath -> {
+          // Scan all partitions files with this commit time
+          Map<FileStatus, Boolean> results = deleteCleanedFiles(partitionPath, commits);
+          return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
+              .withDeletedFileResults(results).build();
+        }).collect();
+
+    // Remove the rolled back inflight commits
+    commits.stream().map(s -> new HoodieInstant(true, actionType, s))
+        .forEach(activeTimeline::deleteInflight);
+    logger.info("Deleted inflight commits " + commits);
+    return stats;
+  }
+
+  private static class PartitionCleanStat implements Serializable {
+
+    private final String partitionPath;
+    private final List<String> deletePathPatterns = new ArrayList<>();
+    private final List<String> successDeleteFiles = new ArrayList<>();
+    private final List<String> failedDeleteFiles = new ArrayList<>();
+
+    private PartitionCleanStat(String partitionPath) {
+      this.partitionPath = partitionPath;
+    }
+
+    private void addDeletedFileResult(String deletePathStr, Boolean deletedFileResult) {
+      if (deletedFileResult) {
+        successDeleteFiles.add(deletePathStr);
+      } else {
+        failedDeleteFiles.add(deletePathStr);
+      }
+    }
+
+    private void addDeleteFilePatterns(String deletePathStr) {
+      deletePathPatterns.add(deletePathStr);
+    }
+
+    private PartitionCleanStat merge(PartitionCleanStat other) {
+      if (!this.partitionPath.equals(other.partitionPath)) {
+        throw new RuntimeException(String.format(
+            "partitionPath is not a match: (%s, %s)",
+            partitionPath, other.partitionPath));
+      }
+      successDeleteFiles.addAll(other.successDeleteFiles);
+      deletePathPatterns.addAll(other.deletePathPatterns);
+      failedDeleteFiles.addAll(other.failedDeleteFiles);
+      return this;
+    }
+  }
+
+  private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean,
+      JavaSparkContext jsc) {
+    int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
+    logger.info("Using cleanerParallelism: " + cleanerParallelism);
+    List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
+        .parallelize(partitionsToClean, cleanerParallelism)
+        .flatMapToPair(getFilesToDeleteFunc(this, config))
+        .repartition(cleanerParallelism)                    // repartition to remove skews
+        .mapPartitionsToPair(deleteFilesFunc(this, config))
+        .reduceByKey(
+            // merge partition level clean stats below
+            (Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
+                .merge(e2))
+        .collect();
+
+    Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats
+        .stream().collect(Collectors.toMap(e -> e._1(), e -> e._2()));
+
+    HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
+    // Return PartitionCleanStat for each partition passed.
+    return partitionsToClean.stream().map(partitionPath -> {
+      PartitionCleanStat partitionCleanStat =
+          (partitionCleanStatsMap.containsKey(partitionPath)) ?
+              partitionCleanStatsMap.get(partitionPath)
+              : new PartitionCleanStat(partitionPath);
+      return HoodieCleanStat.newBuilder()
+          .withPolicy(config.getCleanerPolicy())
+          .withPartitionPath(partitionPath)
+          .withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
+          .withDeletePathPattern(partitionCleanStat.deletePathPatterns)
+          .withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
+          .withFailedDeletes(partitionCleanStat.failedDeleteFiles)
+          .build();
+    }).collect(Collectors.toList());
+  }
+
+  private PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat> deleteFilesFunc(
+      HoodieTable table, HoodieWriteConfig config) {
+    return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>) iter -> {
+      HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config);
+      Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();
+
+      while (iter.hasNext()) {
+        Tuple2<String, String> partitionDelFileTuple = iter.next();
+        String partitionPath = partitionDelFileTuple._1();
+        String deletePathStr = partitionDelFileTuple._2();
+        Boolean deletedFileResult = deleteFileAndGetResult(deletePathStr);
+        if (!partitionCleanStatMap.containsKey(partitionPath)) {
+          partitionCleanStatMap.put(partitionPath,
+              new PartitionCleanStat(partitionPath));
         }
+        PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath);
+        partitionCleanStat.addDeleteFilePatterns(deletePathStr);
+        partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
+      }
 
-        private PartitionCleanStat merge(PartitionCleanStat other) {
-            if (!this.partitionPath.equals(other.partitionPath)) {
-                throw new RuntimeException(String.format(
-                    "partitionPath is not a match: (%s, %s)",
-                    partitionPath, other.partitionPath));
-            }
-            successDeleteFiles.addAll(other.successDeleteFiles);
-            deletePathPatterns.addAll(other.deletePathPatterns);
-            failedDeleteFiles.addAll(other.failedDeleteFiles);
-            return this;
-        }
-    }
-
-    private List<HoodieCleanStat> cleanPartitionPaths(List<String> partitionsToClean, JavaSparkContext jsc) {
-        int cleanerParallelism = Math.min(partitionsToClean.size(), config.getCleanerParallelism());
-        logger.info("Using cleanerParallelism: " + cleanerParallelism);
-        List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
-            .parallelize(partitionsToClean, cleanerParallelism)
-            .flatMapToPair(getFilesToDeleteFunc(this, config))
-            .repartition(cleanerParallelism)                    // repartition to remove skews
-            .mapPartitionsToPair(deleteFilesFunc(this, config))
-            .reduceByKey(                                       // merge partition level clean stats below
-                (Function2<PartitionCleanStat, PartitionCleanStat, PartitionCleanStat>) (e1, e2) -> e1
-                    .merge(e2))
-            .collect();
-
-        Map<String, PartitionCleanStat> partitionCleanStatsMap = partitionCleanStats
-            .stream().collect(Collectors.toMap(e -> e._1(), e -> e._2()));
-
-        HoodieCleanHelper cleaner = new HoodieCleanHelper(this, config);
-        // Return PartitionCleanStat for each partition passed.
-        return partitionsToClean.stream().map(partitionPath -> {
-            PartitionCleanStat partitionCleanStat =
-                (partitionCleanStatsMap.containsKey(partitionPath)) ?
-                    partitionCleanStatsMap.get(partitionPath)
-                    : new PartitionCleanStat(partitionPath);
-            return HoodieCleanStat.newBuilder()
-                .withPolicy(config.getCleanerPolicy())
-                .withPartitionPath(partitionPath)
-                .withEarliestCommitRetained(cleaner.getEarliestCommitToRetain())
-                .withDeletePathPattern(partitionCleanStat.deletePathPatterns)
-                .withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
-                .withFailedDeletes(partitionCleanStat.failedDeleteFiles)
-                .build();
-        }).collect(Collectors.toList());
-    }
-
-    private PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat> deleteFilesFunc(
-        HoodieTable table, HoodieWriteConfig config) {
-        return (PairFlatMapFunction<Iterator<Tuple2<String, String>>, String, PartitionCleanStat>) iter -> {
-            HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config);
-            Map<String, PartitionCleanStat> partitionCleanStatMap = new HashMap<>();
-
-            while (iter.hasNext()) {
-                Tuple2<String, String> partitionDelFileTuple = iter.next();
-                String partitionPath = partitionDelFileTuple._1();
-                String deletePathStr = partitionDelFileTuple._2();
-                Boolean deletedFileResult = deleteFileAndGetResult(deletePathStr);
-                if (!partitionCleanStatMap.containsKey(partitionPath)) {
-                    partitionCleanStatMap.put(partitionPath,
-                        new PartitionCleanStat(partitionPath));
-                }
-                PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath);
-                partitionCleanStat.addDeleteFilePatterns(deletePathStr);
-                partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
-            }
-
-            return partitionCleanStatMap.entrySet().stream()
-                .map(e -> new Tuple2<>(e.getKey(), e.getValue()))
-                .collect(Collectors.toList()).iterator();
-        };
-    }
-
-    private static PairFlatMapFunction<String, String, String> getFilesToDeleteFunc(
-        HoodieTable table, HoodieWriteConfig config) {
-        return (PairFlatMapFunction<String, String, String>) partitionPathToClean -> {
-            HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config);
-            return cleaner.getDeletePaths(partitionPathToClean).stream()
-                .map(deleteFile -> new Tuple2<>(partitionPathToClean, deleteFile.toString()))
-                .iterator();
-        };
-    }
-
-    private Boolean deleteFileAndGetResult(String deletePathStr) throws IOException {
-        Path deletePath = new Path(deletePathStr);
-        logger.debug("Working on delete path :" + deletePath);
-        boolean deleteResult = getFs().delete(deletePath, false);
-        if (deleteResult) {
-            logger.debug("Cleaned file at path :" + deletePath);
-        }
-        return deleteResult;
+      return partitionCleanStatMap.entrySet().stream()
+          .map(e -> new Tuple2<>(e.getKey(), e.getValue()))
+          .collect(Collectors.toList()).iterator();
+    };
+  }
+
+  private static PairFlatMapFunction<String, String, String> getFilesToDeleteFunc(
+      HoodieTable table, HoodieWriteConfig config) {
+    return (PairFlatMapFunction<String, String, String>) partitionPathToClean -> {
+      HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config);
+      return cleaner.getDeletePaths(partitionPathToClean).stream()
+          .map(deleteFile -> new Tuple2<>(partitionPathToClean, deleteFile.toString()))
+          .iterator();
+    };
+  }
+
+  private Boolean deleteFileAndGetResult(String deletePathStr) throws IOException {
+    Path deletePath = new Path(deletePathStr);
+    logger.debug("Working on delete path :" + deletePath);
+    boolean deleteResult = getFs().delete(deletePath, false);
+    if (deleteResult) {
+      logger.debug("Cleaned file at path :" + deletePath);
     }
+    return deleteResult;
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java
index fe84238e3..1a2cfa1c0 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java
@@ -39,13 +39,6 @@ import com.uber.hoodie.exception.HoodieCompactionException;
 import com.uber.hoodie.exception.HoodieRollbackException;
 import com.uber.hoodie.io.HoodieAppendHandle;
 import com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function;
-
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.Arrays;
@@ -56,179 +49,209 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;
 
 
 /**
  * Implementation of a more real-time read-optimized Hoodie Table where
  *
  * INSERTS - Same as HoodieCopyOnWriteTable - Produce new files, block aligned to desired size (or)
- *           Merge with the smallest existing file, to expand it
+ * Merge with the smallest existing file, to expand it
  *
- * UPDATES - Appends the changes to a rolling log file maintained per file Id.
- * Compaction merges the log file into the base file.
+ * UPDATES - Appends the changes to a rolling log file maintained per file Id. Compaction merges the
+ * log file into the base file.
  *
- * WARNING - MOR table type does not support nested rollbacks, every rollback
- *           must be followed by an attempted commit action
+ * WARNING - MOR table type does not support nested rollbacks, every rollback must be followed by an
+ * attempted commit action
  */
-public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieCopyOnWriteTable<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeOnReadTable.class);
+public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
+    HoodieCopyOnWriteTable<T> {
 
-    public HoodieMergeOnReadTable(HoodieWriteConfig config,
-        HoodieTableMetaClient metaClient) {
-        super(config, metaClient);
+  private static Logger logger = LogManager.getLogger(HoodieMergeOnReadTable.class);
+
+  public HoodieMergeOnReadTable(HoodieWriteConfig config,
+      HoodieTableMetaClient metaClient) {
+    super(config, metaClient);
+  }
+
+  @Override
+  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
+      Iterator<HoodieRecord<T>> recordItr) throws IOException {
+    logger.info("Merging updates for commit " + commitTime + " for file " + fileId);
+    HoodieAppendHandle<T> appendHandle =
+        new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
+    appendHandle.doAppend();
+    appendHandle.close();
+    return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus()))
+        .iterator();
+  }
+
+  @Override
+  public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
+    logger.info("Checking if compaction needs to be run on " + config.getBasePath());
+    Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCompactionTimeline()
+        .filterCompletedInstants().lastInstant();
+    String deltaCommitsSinceTs = "0";
+    if (lastCompaction.isPresent()) {
+      deltaCommitsSinceTs = lastCompaction.get().getTimestamp();
     }
 
-    @Override
-    public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
-        Iterator<HoodieRecord<T>> recordItr) throws IOException {
-        logger.info("Merging updates for commit " + commitTime + " for file " + fileId);
-        HoodieAppendHandle<T> appendHandle =
-            new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
-        appendHandle.doAppend();
-        appendHandle.close();
-        return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus()))
-            .iterator();
+    int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline()
+        .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
+    if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
+      logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction
+          + " delta commits was found since last compaction " + deltaCommitsSinceTs
+          + ". Waiting for " + config.getInlineCompactDeltaCommitMax());
+      return Optional.empty();
     }
 
-    @Override
-    public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
-        logger.info("Checking if compaction needs to be run on " + config.getBasePath());
-        Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCompactionTimeline()
-            .filterCompletedInstants().lastInstant();
-        String deltaCommitsSinceTs = "0";
-        if (lastCompaction.isPresent()) {
-            deltaCommitsSinceTs = lastCompaction.get().getTimestamp();
-        }
-
-        int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline()
-            .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
-        if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
-            logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction
-                + " delta commits was found since last compaction " + deltaCommitsSinceTs
-                + ". Waiting for " + config.getInlineCompactDeltaCommitMax());
-            return Optional.empty();
-        }
-
-        logger.info("Compacting merge on read table " + config.getBasePath());
-        HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor();
-        try {
-            return Optional.of(compactor.compact(jsc, config, this));
-        } catch (IOException e) {
-            throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e);
-        }
+    logger.info("Compacting merge on read table " + config.getBasePath());
+    HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor();
+    try {
+      return Optional.of(compactor.compact(jsc, config, this));
+    } catch (IOException e) {
+      throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e);
     }
+  }
 
-    @Override
-    public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException {
+  @Override
+  public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
+      throws IOException {
 
-        //At the moment, MOR table type does not support nested rollbacks
-        if(commits.size() > 1) {
-            throw new UnsupportedOperationException("Nested Rollbacks are not supported");
-        }
-        Map<String, HoodieInstant> commitsAndCompactions =
-             this.getActiveTimeline()
-                .getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION))
-                .getInstants()
-                .filter(i -> commits.contains(i.getTimestamp()))
-                .collect(Collectors.toMap(i -> i.getTimestamp(), i -> i));
-
-        // Atomically un-publish all non-inflight commits
-        commitsAndCompactions.entrySet().stream().map(entry -> entry.getValue())
-                .filter(i -> !i.isInflight()).forEach(this.getActiveTimeline()::revertToInflight);
-
-        logger.info("Unpublished " + commits);
-
-        Long startTime = System.currentTimeMillis();
-
-        List<HoodieRollbackStat> allRollbackStats = commits.stream().map(commit -> {
-            HoodieInstant instant = commitsAndCompactions.get(commit);
-            List<HoodieRollbackStat> stats = null;
-            switch (instant.getAction()) {
-                case HoodieTimeline.COMMIT_ACTION:
-                case HoodieTimeline.COMPACTION_ACTION:
-                    try {
-                        logger.info("Starting to rollback Commit/Compaction " + instant);
-                        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                            .fromBytes(this.getCommitTimeline().getInstantDetails(new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
-
-                        stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream().collect(Collectors.toList()))
-                                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
-                                    Map<FileStatus, Boolean> results = super.deleteCleanedFiles(partitionPath, Arrays.asList(commit));
-                                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                                            .withDeletedFileResults(results).build();
-                                }).collect();
-                        logger.info("Finished rollback of Commit/Compaction " + instant);
-                        break;
-                    } catch (IOException io) {
-                        throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
-                    }
-                case HoodieTimeline.DELTA_COMMIT_ACTION:
-                    try {
-                        logger.info("Starting to rollback delta commit " + instant);
-
-                        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                            .fromBytes(this.getCommitTimeline().getInstantDetails(new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
-
-                        stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream().collect(Collectors.toList()))
-                                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
-                                    // read commit file and (either append delete blocks or delete file)
-                                    Map<FileStatus, Boolean> filesToDeletedStatus = new HashMap<>();
-                                    Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
-
-                                    // we do not know fileIds for inserts (first inserts are parquet files), delete all parquet files for the corresponding failed commit, if present (same as COW)
-                                    filesToDeletedStatus = super.deleteCleanedFiles(partitionPath, Arrays.asList(commit));
-
-                                    // append rollback blocks for updates
-                                    commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT).forEach(wStat -> {
-                                        HoodieLogFormat.Writer writer = null;
-                                        try {
-                                            writer = HoodieLogFormat.newWriterBuilder()
-                                                    .onParentPath(new Path(this.getMetaClient().getBasePath(), partitionPath))
-                                                    .withFileId(wStat.getFileId()).overBaseCommit(wStat.getPrevCommit())
-                                                    .withFs(FSUtils.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
-                                            Long numRollbackBlocks = 0L;
-                                            // generate metadata
-                                            Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-                                            metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
-                                            metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, commit);
-                                            // if update belongs to an existing log file
-                                            writer.appendBlock(new HoodieCommandBlock(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata));
-                                            numRollbackBlocks++;
-                                            if(wStat.getNumDeletes() > 0) {
-                                                writer.appendBlock(new HoodieCommandBlock(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata));
-                                                numRollbackBlocks++;
-                                            }
-                                            filesToNumBlocksRollback.put(FSUtils.getFs().getFileStatus(writer.getLogFile().getPath()), numRollbackBlocks);
-                                        } catch (IOException | InterruptedException io) {
-                                            throw new HoodieRollbackException("Failed to rollback for commit " + commit, io);
-                                        } finally {
-                                            try {
-                                                writer.close();
-                                            } catch (IOException io) {
-                                                throw new UncheckedIOException(io);
-                                            }
-                                        }
-                                    });
-                                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                                            .withDeletedFileResults(filesToDeletedStatus)
-                                            .withRollbackBlockAppendResults(filesToNumBlocksRollback).build();
-                                }).collect();
-                        logger.info("Fnished rollback of delta commit " + instant);
-                        break;
-                    } catch (IOException io) {
-                        throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
-                    }
-            }
-            return stats;
-        }).flatMap(x -> x.stream()).collect(Collectors.toList());
-
-        commitsAndCompactions.entrySet().stream()
-                .map(entry -> new HoodieInstant(true, entry.getValue().getAction(), entry.getValue().getTimestamp()))
-                .forEach(this.getActiveTimeline()::deleteInflight);
-
-        logger.debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
-
-        return allRollbackStats;
+    //At the moment, MOR table type does not support nested rollbacks
+    if (commits.size() > 1) {
+      throw new UnsupportedOperationException("Nested Rollbacks are not supported");
     }
+    Map<String, HoodieInstant> commitsAndCompactions =
+        this.getActiveTimeline()
+            .getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
+                HoodieActiveTimeline.COMPACTION_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION))
+            .getInstants()
+            .filter(i -> commits.contains(i.getTimestamp()))
+            .collect(Collectors.toMap(i -> i.getTimestamp(), i -> i));
+
+    // Atomically un-publish all non-inflight commits
+    commitsAndCompactions.entrySet().stream().map(entry -> entry.getValue())
+        .filter(i -> !i.isInflight()).forEach(this.getActiveTimeline()::revertToInflight);
+
+    logger.info("Unpublished " + commits);
+
+    Long startTime = System.currentTimeMillis();
+
+    List<HoodieRollbackStat> allRollbackStats = commits.stream().map(commit -> {
+      HoodieInstant instant = commitsAndCompactions.get(commit);
+      List<HoodieRollbackStat> stats = null;
+      switch (instant.getAction()) {
+        case HoodieTimeline.COMMIT_ACTION:
+        case HoodieTimeline.COMPACTION_ACTION:
+          try {
+            logger.info("Starting to rollback Commit/Compaction " + instant);
+            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+                .fromBytes(this.getCommitTimeline().getInstantDetails(
+                    new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
+
+            stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream()
+                .collect(Collectors.toList()))
+                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
+                  Map<FileStatus, Boolean> results = super
+                      .deleteCleanedFiles(partitionPath, Arrays.asList(commit));
+                  return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
+                      .withDeletedFileResults(results).build();
+                }).collect();
+            logger.info("Finished rollback of Commit/Compaction " + instant);
+            break;
+          } catch (IOException io) {
+            throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
+          }
+        case HoodieTimeline.DELTA_COMMIT_ACTION:
+          try {
+            logger.info("Starting to rollback delta commit " + instant);
+
+            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+                .fromBytes(this.getCommitTimeline().getInstantDetails(
+                    new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
+
+            stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream()
+                .collect(Collectors.toList()))
+                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
+                  // read commit file and (either append delete blocks or delete file)
+                  Map<FileStatus, Boolean> filesToDeletedStatus = new HashMap<>();
+                  Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
+
+                  // we do not know fileIds for inserts (first inserts are parquet files), delete all parquet files for the corresponding failed commit, if present (same as COW)
+                  filesToDeletedStatus = super
+                      .deleteCleanedFiles(partitionPath, Arrays.asList(commit));
+
+                  // append rollback blocks for updates
+                  commitMetadata.getPartitionToWriteStats().get(partitionPath).stream()
+                      .filter(wStat -> wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT)
+                      .forEach(wStat -> {
+                        HoodieLogFormat.Writer writer = null;
+                        try {
+                          writer = HoodieLogFormat.newWriterBuilder()
+                              .onParentPath(
+                                  new Path(this.getMetaClient().getBasePath(), partitionPath))
+                              .withFileId(wStat.getFileId()).overBaseCommit(wStat.getPrevCommit())
+                              .withFs(FSUtils.getFs())
+                              .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+                          Long numRollbackBlocks = 0L;
+                          // generate metadata
+                          Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+                          metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME,
+                              metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
+                          metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, commit);
+                          // if update belongs to an existing log file
+                          writer.appendBlock(new HoodieCommandBlock(
+                              HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK,
+                              metadata));
+                          numRollbackBlocks++;
+                          if (wStat.getNumDeletes() > 0) {
+                            writer.appendBlock(new HoodieCommandBlock(
+                                HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK,
+                                metadata));
+                            numRollbackBlocks++;
+                          }
+                          filesToNumBlocksRollback
+                              .put(FSUtils.getFs().getFileStatus(writer.getLogFile().getPath()),
+                                  numRollbackBlocks);
+                        } catch (IOException | InterruptedException io) {
+                          throw new HoodieRollbackException(
+                              "Failed to rollback for commit " + commit, io);
+                        } finally {
+                          try {
+                            writer.close();
+                          } catch (IOException io) {
+                            throw new UncheckedIOException(io);
+                          }
+                        }
+                      });
+                  return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
+                      .withDeletedFileResults(filesToDeletedStatus)
+                      .withRollbackBlockAppendResults(filesToNumBlocksRollback).build();
+                }).collect();
+            logger.info("Fnished rollback of delta commit " + instant);
+            break;
+          } catch (IOException io) {
+            throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
+          }
+      }
+      return stats;
+    }).flatMap(x -> x.stream()).collect(Collectors.toList());
+
+    commitsAndCompactions.entrySet().stream()
+        .map(entry -> new HoodieInstant(true, entry.getValue().getAction(),
+            entry.getValue().getTimestamp()))
+        .forEach(this.getActiveTimeline()::deleteInflight);
+
+    logger
+        .debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
+
+    return allRollbackStats;
+  }
 
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java
index 8ed494f79..88f7f9b4b 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java
@@ -34,7 +34,6 @@ import com.uber.hoodie.common.util.AvroUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieCommitException;
 import com.uber.hoodie.exception.HoodieException;
-import com.uber.hoodie.exception.HoodieRollbackException;
 import com.uber.hoodie.exception.HoodieSavepointException;
 import java.io.IOException;
 import java.io.Serializable;
@@ -43,8 +42,6 @@ import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -55,291 +52,245 @@ import org.apache.spark.api.java.JavaSparkContext;
  * Abstract implementation of a HoodieTable
  */
 public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
-    protected final HoodieWriteConfig config;
-    protected final HoodieTableMetaClient metaClient;
-    private static Logger logger = LogManager.getLogger(HoodieTable.class);
 
-    protected HoodieTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
-        this.config = config;
-        this.metaClient = metaClient;
+  protected final HoodieWriteConfig config;
+  protected final HoodieTableMetaClient metaClient;
+  private static Logger logger = LogManager.getLogger(HoodieTable.class);
+
+  protected HoodieTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
+    this.config = config;
+    this.metaClient = metaClient;
+  }
+
+  /**
+   * Provides a partitioner to perform the upsert operation, based on the workload profile
+   */
+  public abstract Partitioner getUpsertPartitioner(WorkloadProfile profile);
+
+
+  /**
+   * Provides a partitioner to perform the insert operation, based on the workload profile
+   */
+  public abstract Partitioner getInsertPartitioner(WorkloadProfile profile);
+
+
+  /**
+   * Return whether this HoodieTable implementation can benefit from workload profiling
+   */
+  public abstract boolean isWorkloadProfileNeeded();
+
+  public HoodieWriteConfig getConfig() {
+    return config;
+  }
+
+  public HoodieTableMetaClient getMetaClient() {
+    return metaClient;
+  }
+
+  public FileSystem getFs() {
+    return metaClient.getFs();
+  }
+
+  /**
+   * Get the view of the file system for this table
+   */
+  public TableFileSystemView getFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the read optimized view of the file system for this table
+   */
+  public TableFileSystemView.ReadOptimizedView getROFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the real time view of the file system for this table
+   */
+  public TableFileSystemView.RealtimeView getRTFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the completed (commit + compaction) view of the file system for this table
+   */
+  public TableFileSystemView getCompletedFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCommitTimeline());
+  }
+
+  /**
+   * Get only the completed (no-inflights) commit timeline
+   */
+  public HoodieTimeline getCompletedCommitTimeline() {
+    return getCommitTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get only the inflights (no-completed) commit timeline
+   */
+  public HoodieTimeline getInflightCommitTimeline() {
+    return getCommitTimeline().filterInflights();
+  }
+
+
+  /**
+   * Get only the completed (no-inflights) clean timeline
+   */
+  public HoodieTimeline getCompletedCleanTimeline() {
+    return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get only the completed (no-inflights) savepoint timeline
+   */
+  public HoodieTimeline getCompletedSavepointTimeline() {
+    return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get the list of savepoints in this table
+   */
+  public List<String> getSavepoints() {
+    return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
+        .collect(Collectors.toList());
+  }
+
+  /**
+   * Get the list of data file names savepointed
+   */
+  public Stream<String> getSavepointedDataFiles(String savepointTime) {
+    if (!getSavepoints().contains(savepointTime)) {
+      throw new HoodieSavepointException(
+          "Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
     }
-
-    /**
-     * Provides a partitioner to perform the upsert operation, based on the
-     * workload profile
-     *
-     * @return
-     */
-    public abstract Partitioner getUpsertPartitioner(WorkloadProfile profile);
-
-
-    /**
-     * Provides a partitioner to perform the insert operation, based on the workload profile
-     *
-     * @return
-     */
-    public abstract Partitioner getInsertPartitioner(WorkloadProfile profile);
-
-
-    /**
-     * Return whether this HoodieTable implementation can benefit from workload
-     * profiling
-     *
-     * @return
-     */
-    public abstract boolean isWorkloadProfileNeeded();
-
-    public HoodieWriteConfig getConfig() {
-        return config;
+    HoodieInstant instant =
+        new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
+    HoodieSavepointMetadata metadata = null;
+    try {
+      metadata = AvroUtils.deserializeHoodieSavepointMetadata(
+          getActiveTimeline().getInstantDetails(instant).get());
+    } catch (IOException e) {
+      throw new HoodieSavepointException(
+          "Could not get savepointed data files for savepoint " + savepointTime, e);
     }
+    return metadata.getPartitionMetadata().values().stream()
+        .flatMap(s -> s.getSavepointDataFile().stream());
+  }
 
-    public HoodieTableMetaClient getMetaClient() {
-        return metaClient;
+  public HoodieActiveTimeline getActiveTimeline() {
+    return metaClient.getActiveTimeline();
+  }
+
+  /**
+   * Get the commit timeline visible for this table
+   */
+  public HoodieTimeline getCommitTimeline() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return getActiveTimeline().getCommitTimeline();
+      case MERGE_ON_READ:
+        // We need to include the parquet files written out in delta commits
+        // Include commit action to be able to start doing a MOR over a COW dataset - no migration required
+        return getActiveTimeline().getCommitsAndCompactionsTimeline();
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
     }
+  }
 
-    public FileSystem getFs() {
-        return metaClient.getFs();
+  /**
+   * Get only the completed (no-inflights) compaction commit timeline
+   */
+  public HoodieTimeline getCompletedCompactionCommitTimeline() {
+    return getCompactionCommitTimeline().filterCompletedInstants();
+  }
+
+
+  /**
+   * Get the compacted commit timeline visible for this table
+   */
+  public HoodieTimeline getCompactionCommitTimeline() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return getActiveTimeline().getCommitsAndCompactionsTimeline();
+      case MERGE_ON_READ:
+        // We need to include the parquet files written out in delta commits in tagging
+        return getActiveTimeline().getTimelineOfActions(
+            Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION));
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
     }
+  }
 
-    /**
-     * Get the view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView getFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  /**
+   * Gets the commit action type
+   */
+  public String getCommitActionType() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return HoodieActiveTimeline.COMMIT_ACTION;
+      case MERGE_ON_READ:
+        return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
     }
+    throw new HoodieCommitException(
+        "Could not commit on unknown storage type " + metaClient.getTableType());
+  }
 
-    /**
-     * Get the read optimized view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView.ReadOptimizedView getROFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  /**
+   * Gets the action type for a compaction commit
+   */
+  public String getCompactedCommitActionType() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return HoodieTimeline.COMMIT_ACTION;
+      case MERGE_ON_READ:
+        return HoodieTimeline.COMPACTION_ACTION;
     }
+    throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
+  }
 
-    /**
-     * Get the real time view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView.RealtimeView getRTFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+
+  /**
+   * Perform the ultimate IO for a given upserted (RDD) partition
+   */
+  public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime,
+      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+
+  /**
+   * Perform the ultimate IO for a given inserted (RDD) partition
+   */
+  public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime,
+      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+
+
+  public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(
+      HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return new HoodieCopyOnWriteTable<>(config, metaClient);
+      case MERGE_ON_READ:
+        return new HoodieMergeOnReadTable<>(config, metaClient);
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
     }
+  }
 
-    /**
-     * Get the completed (commit + compaction) view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView getCompletedFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCommitTimeline());
-    }
+  /**
+   * Run Compaction on the table. Compaction arranges the data so that it is optimized for data
+   * access
+   */
+  public abstract Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc);
 
-    /**
-     * Get only the completed (no-inflights) commit timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCommitTimeline() {
-        return getCommitTimeline().filterCompletedInstants();
-    }
+  /**
+   * Clean partition paths according to cleaning policy and returns the number of files cleaned.
+   */
+  public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc);
 
-    /**
-     * Get only the inflights (no-completed) commit timeline
-     * @return
-     */
-    public HoodieTimeline getInflightCommitTimeline() {
-        return getCommitTimeline().filterInflights();
-    }
-
-
-    /**
-     * Get only the completed (no-inflights) clean timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCleanTimeline() {
-        return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
-    }
-
-    /**
-     * Get only the completed (no-inflights) savepoint timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedSavepointTimeline() {
-        return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
-    }
-
-    /**
-     * Get the list of savepoints in this table
-     * @return
-     */
-    public List<String> getSavepoints() {
-        return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
-            .collect(Collectors.toList());
-    }
-
-    /**
-     * Get the list of data file names savepointed
-     *
-     * @param savepointTime
-     * @return
-     * @throws IOException
-     */
-    public Stream<String> getSavepointedDataFiles(String savepointTime) {
-        if (!getSavepoints().contains(savepointTime)) {
-            throw new HoodieSavepointException(
-                "Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
-        }
-        HoodieInstant instant =
-            new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
-        HoodieSavepointMetadata metadata = null;
-        try {
-            metadata = AvroUtils.deserializeHoodieSavepointMetadata(
-                getActiveTimeline().getInstantDetails(instant).get());
-        } catch (IOException e) {
-            throw new HoodieSavepointException(
-                "Could not get savepointed data files for savepoint " + savepointTime, e);
-        }
-        return metadata.getPartitionMetadata().values().stream()
-            .flatMap(s -> s.getSavepointDataFile().stream());
-    }
-
-    public HoodieActiveTimeline getActiveTimeline() {
-        return metaClient.getActiveTimeline();
-    }
-
-    /**
-     * Get the commit timeline visible for this table
-     *
-     * @return
-     */
-    public HoodieTimeline getCommitTimeline() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return getActiveTimeline().getCommitTimeline();
-            case MERGE_ON_READ:
-                // We need to include the parquet files written out in delta commits
-                // Include commit action to be able to start doing a MOR over a COW dataset - no migration required
-                return getActiveTimeline().getCommitsAndCompactionsTimeline();
-            default:
-                throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Get only the completed (no-inflights) compaction commit timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCompactionCommitTimeline() {
-        return getCompactionCommitTimeline().filterCompletedInstants();
-    }
-
-
-    /**
-     * Get the compacted commit timeline visible for this table
-     *
-     * @return
-     */
-    public HoodieTimeline getCompactionCommitTimeline() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return getActiveTimeline().getCommitsAndCompactionsTimeline();
-            case MERGE_ON_READ:
-                // We need to include the parquet files written out in delta commits in tagging
-                return getActiveTimeline().getTimelineOfActions(
-                    Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION));
-            default:
-                throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Gets the commit action type
-     * @return
-     */
-    public String getCommitActionType() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return HoodieActiveTimeline.COMMIT_ACTION;
-            case MERGE_ON_READ:
-                return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
-        }
-        throw new HoodieCommitException(
-            "Could not commit on unknown storage type " + metaClient.getTableType());
-    }
-
-    /**
-     * Gets the action type for a compaction commit
-     * @return
-     */
-    public String getCompactedCommitActionType() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return HoodieTimeline.COMMIT_ACTION;
-            case MERGE_ON_READ:
-                return HoodieTimeline.COMPACTION_ACTION;
-        }
-        throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-    }
-
-
-
-    /**
-     * Perform the ultimate IO for a given upserted (RDD) partition
-     *
-     * @param partition
-     * @param recordIterator
-     * @param partitioner
-     */
-    public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime,
-        Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
-
-    /**
-     * Perform the ultimate IO for a given inserted (RDD) partition
-     *
-     * @param partition
-     * @param recordIterator
-     * @param partitioner
-     */
-    public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime,
-        Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
-
-
-    public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(
-        HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return new HoodieCopyOnWriteTable<>(config, metaClient);
-            case MERGE_ON_READ:
-                return new HoodieMergeOnReadTable<>(config, metaClient);
-            default:
-                throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Run Compaction on the table.
-     * Compaction arranges the data so that it is optimized for data access
-     */
-    public abstract Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc);
-
-    /**
-     * Clean partition paths according to cleaning policy and returns the number
-     * of files cleaned.
-     */
-    public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc);
-
-    /**
-     * Rollback the (inflight/committed) record changes with the given commit time.
-     * Four steps:
-     * (1) Atomically unpublish this commit
-     * (2) clean indexing data
-     * (3) clean new generated parquet files / log blocks
-     * (4) Finally, delete .<action>.commit or .<action>.inflight file
-     * @param commits
-     * @return
-     * @throws HoodieRollbackException
-     */
-    public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException;
+  /**
+   * Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1)
+   * Atomically unpublish this commit (2) clean indexing data (3) clean new generated parquet files
+   * / log blocks (4) Finally, delete .<action>.commit or .<action>.inflight file
+   */
+  public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
+      throws IOException;
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java b/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java
index 9a676f4e7..2ca51a31f 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java
@@ -20,13 +20,13 @@ import com.uber.hoodie.common.model.HoodieRecordPayload;
 import org.apache.spark.api.java.JavaRDD;
 
 /**
- * Repartition input records into at least expected number of output spark partitions. It should give
- * below guarantees
- * - Output spark partition will have records from only one hoodie partition.
- * - Average records per output spark partitions should be almost equal to (#inputRecords / #outputSparkPartitions)
- * to avoid possible skews.
+ * Repartition input records into at least expected number of output spark partitions. It should
+ * give below guarantees - Output spark partition will have records from only one hoodie partition.
+ * - Average records per output spark partitions should be almost equal to (#inputRecords /
+ * #outputSparkPartitions) to avoid possible skews.
  */
 public interface UserDefinedBulkInsertPartitioner<T extends HoodieRecordPayload> {
 
-    JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions);
+  JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records,
+      int outputSparkPartitions);
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java
index 1d1332ae8..07e863690 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java
@@ -20,15 +20,11 @@ package com.uber.hoodie.table;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.PairFunction;
-
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
-
+import org.apache.spark.api.java.JavaRDD;
 import scala.Option;
 import scala.Tuple2;
 
@@ -40,73 +36,76 @@ import scala.Tuple2;
  */
 public class WorkloadProfile<T extends HoodieRecordPayload> implements Serializable {
 
-    /**
-     * Input workload
-     */
-    private final JavaRDD<HoodieRecord<T>> taggedRecords;
+  /**
+   * Input workload
+   */
+  private final JavaRDD<HoodieRecord<T>> taggedRecords;
 
-    /**
-     * Computed workload profile
-     */
-    private final HashMap<String, WorkloadStat> partitionPathStatMap;
+  /**
+   * Computed workload profile
+   */
+  private final HashMap<String, WorkloadStat> partitionPathStatMap;
 
 
-    private final WorkloadStat globalStat;
+  private final WorkloadStat globalStat;
 
 
-    public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) {
-        this.taggedRecords = taggedRecords;
-        this.partitionPathStatMap = new HashMap<>();
-        this.globalStat = new WorkloadStat();
-        buildProfile();
+  public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) {
+    this.taggedRecords = taggedRecords;
+    this.partitionPathStatMap = new HashMap<>();
+    this.globalStat = new WorkloadStat();
+    buildProfile();
+  }
+
+  private void buildProfile() {
+
+    Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords
+        .mapToPair(record ->
+            new Tuple2<>(
+                new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())),
+                record))
+        .countByKey();
+
+    for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts
+        .entrySet()) {
+      String partitionPath = e.getKey()._1();
+      Long count = e.getValue();
+      Option<HoodieRecordLocation> locOption = e.getKey()._2();
+
+      if (!partitionPathStatMap.containsKey(partitionPath)) {
+        partitionPathStatMap.put(partitionPath, new WorkloadStat());
+      }
+
+      if (locOption.isDefined()) {
+        // update
+        partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count);
+        globalStat.addUpdates(locOption.get(), count);
+      } else {
+        // insert
+        partitionPathStatMap.get(partitionPath).addInserts(count);
+        globalStat.addInserts(count);
+      }
     }
+  }
 
-    private void buildProfile() {
+  public WorkloadStat getGlobalStat() {
+    return globalStat;
+  }
 
-        Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords
-                .mapToPair(record ->
-                        new Tuple2<>(new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())), record))
-                .countByKey();
+  public Set<String> getPartitionPaths() {
+    return partitionPathStatMap.keySet();
+  }
 
-        for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e: partitionLocationCounts.entrySet()) {
-            String partitionPath = e.getKey()._1();
-            Long count = e.getValue();
-            Option<HoodieRecordLocation> locOption = e.getKey()._2();
+  public WorkloadStat getWorkloadStat(String partitionPath) {
+    return partitionPathStatMap.get(partitionPath);
+  }
 
-            if (!partitionPathStatMap.containsKey(partitionPath)){
-                partitionPathStatMap.put(partitionPath, new WorkloadStat());
-            }
-
-            if (locOption.isDefined()) {
-                // update
-                partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count);
-                globalStat.addUpdates(locOption.get(), count);
-            } else {
-                // insert
-                partitionPathStatMap.get(partitionPath).addInserts(count);
-                globalStat.addInserts(count);
-            }
-        }
-    }
-
-    public WorkloadStat getGlobalStat() {
-        return globalStat;
-    }
-
-    public Set<String> getPartitionPaths() {
-        return partitionPathStatMap.keySet();
-    }
-
-    public WorkloadStat getWorkloadStat(String partitionPath){
-        return partitionPathStatMap.get(partitionPath);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WorkloadProfile {");
-        sb.append("globalStat=").append(globalStat).append(", ");
-        sb.append("partitionStat=").append(partitionPathStatMap);
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WorkloadProfile {");
+    sb.append("globalStat=").append(globalStat).append(", ");
+    sb.append("partitionStat=").append(partitionPathStatMap);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java
index a0eea477a..10bf6735a 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java
@@ -17,7 +17,6 @@
 package com.uber.hoodie.table;
 
 import com.uber.hoodie.common.model.HoodieRecordLocation;
-
 import java.io.Serializable;
 import java.util.HashMap;
 
@@ -25,43 +24,44 @@ import java.util.HashMap;
  * Wraps stats about a single partition path.
  */
 public class WorkloadStat implements Serializable {
-    private long numInserts = 0L;
 
-    private long numUpdates = 0L;
+  private long numInserts = 0L;
 
-    private HashMap<String, Long> updateLocationToCount;
+  private long numUpdates = 0L;
 
-    public WorkloadStat() {
-        updateLocationToCount = new HashMap<>();
-    }
+  private HashMap<String, Long> updateLocationToCount;
 
-    long addInserts(long numInserts) {
-        return this.numInserts += numInserts;
-    }
+  public WorkloadStat() {
+    updateLocationToCount = new HashMap<>();
+  }
 
-    long addUpdates(HoodieRecordLocation location, long numUpdates) {
-        updateLocationToCount.put(location.getFileId(), numUpdates);
-        return this.numUpdates += numUpdates;
-    }
+  long addInserts(long numInserts) {
+    return this.numInserts += numInserts;
+  }
 
-    public long getNumUpdates() {
-        return numUpdates;
-    }
+  long addUpdates(HoodieRecordLocation location, long numUpdates) {
+    updateLocationToCount.put(location.getFileId(), numUpdates);
+    return this.numUpdates += numUpdates;
+  }
 
-    public long getNumInserts() {
-        return numInserts;
-    }
+  public long getNumUpdates() {
+    return numUpdates;
+  }
 
-    public HashMap<String, Long> getUpdateLocationToCount() {
-        return updateLocationToCount;
-    }
+  public long getNumInserts() {
+    return numInserts;
+  }
 
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WorkloadStat {");
-        sb.append("numInserts=").append(numInserts).append(", ");
-        sb.append("numUpdates=").append(numUpdates);
-        sb.append('}');
-        return sb.toString();
-    }
+  public HashMap<String, Long> getUpdateLocationToCount() {
+    return updateLocationToCount;
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WorkloadStat {");
+    sb.append("numInserts=").append(numInserts).append(", ");
+    sb.append("numUpdates=").append(numUpdates);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-client/src/main/resources/log4j.properties b/hoodie-client/src/main/resources/log4j.properties
index 5a8b643fd..ab922d18a 100644
--- a/hoodie-client/src/main/resources/log4j.properties
+++ b/hoodie-client/src/main/resources/log4j.properties
@@ -13,7 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 # Set root logger level to DEBUG and its only appender to A1.
 log4j.rootLogger=INFO, A1
 # A1 is set to be a ConsoleAppender.
diff --git a/hoodie-client/src/test/java/HoodieClientExample.java b/hoodie-client/src/test/java/HoodieClientExample.java
index 26f097a93..ef31fea1c 100644
--- a/hoodie-client/src/test/java/HoodieClientExample.java
+++ b/hoodie-client/src/test/java/HoodieClientExample.java
@@ -22,13 +22,12 @@ import com.uber.hoodie.common.HoodieTestDataGenerator;
 import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieTableType;
-import com.uber.hoodie.common.table.HoodieTableConfig;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.config.HoodieIndexConfig;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.index.HoodieIndex;
-
+import java.util.List;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
@@ -36,7 +35,6 @@ import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-import java.util.List;
 
 /**
  * Driver program that uses the Hoodie client with synthetic workload, and performs basic
@@ -44,75 +42,77 @@ import java.util.List;
  */
 public class HoodieClientExample {
 
-    @Parameter(names={"--table-path", "-p"}, description = "path for Hoodie sample table")
-    private String tablePath = "file:///tmp/hoodie/sample-table";
+  @Parameter(names = {"--table-path", "-p"}, description = "path for Hoodie sample table")
+  private String tablePath = "file:///tmp/hoodie/sample-table";
 
-    @Parameter(names={"--table-name", "-n"}, description = "table name for Hoodie sample table")
-    private String tableName =  "hoodie_rt";
+  @Parameter(names = {"--table-name", "-n"}, description = "table name for Hoodie sample table")
+  private String tableName = "hoodie_rt";
 
-    @Parameter(names={"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
-    private String tableType =  HoodieTableType.COPY_ON_WRITE.name();
+  @Parameter(names = {"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
+  private String tableType = HoodieTableType.COPY_ON_WRITE.name();
 
-    @Parameter(names = {"--help", "-h"}, help = true)
-    public Boolean help = false;
+  @Parameter(names = {"--help", "-h"}, help = true)
+  public Boolean help = false;
 
-    private static Logger logger = LogManager.getLogger(HoodieClientExample.class);
+  private static Logger logger = LogManager.getLogger(HoodieClientExample.class);
 
-    public static void main(String[] args) throws Exception {
-        HoodieClientExample cli = new HoodieClientExample();
-        JCommander cmd = new JCommander(cli, args);
+  public static void main(String[] args) throws Exception {
+    HoodieClientExample cli = new HoodieClientExample();
+    JCommander cmd = new JCommander(cli, args);
 
-        if (cli.help) {
-            cmd.usage();
-            System.exit(1);
-        }
-        cli.run();
+    if (cli.help) {
+      cmd.usage();
+      System.exit(1);
+    }
+    cli.run();
+  }
+
+
+  public void run() throws Exception {
+
+    SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example");
+    sparkConf.setMaster("local[1]");
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    sparkConf.set("spark.kryoserializer.buffer.max", "512m");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    // Generator of some records to be loaded in.
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+
+    // initialize the table, if not done already
+    Path path = new Path(tablePath);
+    FileSystem fs = FSUtils.getFs();
+    if (!fs.exists(path)) {
+      HoodieTableMetaClient
+          .initTableType(fs, tablePath, HoodieTableType.valueOf(tableType), tableName,
+              HoodieAvroPayload.class.getName());
     }
 
+    // Create the write client to write some records in
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .forTable(tableName).withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
 
-    public void run() throws Exception {
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = client.startCommit();
+    logger.info("Starting commit " + newCommitTime);
 
-        SparkConf sparkConf = new SparkConf().setAppName("hoodie-client-example");
-        sparkConf.setMaster("local[1]");
-        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-        sparkConf.set("spark.kryoserializer.buffer.max", "512m");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
+    JavaRDD<HoodieRecord> writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
+    client.upsert(writeRecords, newCommitTime);
 
-        // Generator of some records to be loaded in.
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-
-        // initialize the table, if not done already
-        Path path = new Path(tablePath);
-        FileSystem fs = FSUtils.getFs();
-        if (!fs.exists(path)) {
-            HoodieTableMetaClient.initTableType(fs, tablePath, HoodieTableType.valueOf(tableType), tableName, HoodieAvroPayload.class.getName());
-        }
-
-        // Create the write client to write some records in
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
-                        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-                        .forTable(tableName).withIndexConfig(
-                        HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-                        .build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = client.startCommit();
-        logger.info("Starting commit " + newCommitTime);
-
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
-        JavaRDD<HoodieRecord> writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
-        client.upsert(writeRecords, newCommitTime);
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = client.startCommit();
-        logger.info("Starting commit " + newCommitTime);
-        records.addAll(dataGen.generateUpdates(newCommitTime, 100));
-        writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
-        client.upsert(writeRecords, newCommitTime);
-    }
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = client.startCommit();
+    logger.info("Starting commit " + newCommitTime);
+    records.addAll(dataGen.generateUpdates(newCommitTime, 100));
+    writeRecords = jsc.<HoodieRecord>parallelize(records, 1);
+    client.upsert(writeRecords, newCommitTime);
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/TestHoodieClientOnCopyOnWriteStorage.java b/hoodie-client/src/test/java/com/uber/hoodie/TestHoodieClientOnCopyOnWriteStorage.java
index be4022dc1..ccdd12839 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/TestHoodieClientOnCopyOnWriteStorage.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/TestHoodieClientOnCopyOnWriteStorage.java
@@ -16,8 +16,12 @@
 
 package com.uber.hoodie;
 
-import com.google.common.collect.Iterables;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
+import com.google.common.collect.Iterables;
 import com.uber.hoodie.common.HoodieCleanStat;
 import com.uber.hoodie.common.HoodieClientTestUtils;
 import com.uber.hoodie.common.HoodieTestDataGenerator;
@@ -45,22 +49,6 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieRollbackException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.scheduler.SparkListener;
-import org.apache.spark.scheduler.SparkListenerTaskEnd;
-import org.apache.spark.sql.SQLContext;
-import org.apache.spark.util.AccumulatorV2;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.rules.TemporaryFolder;
-
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -76,1399 +64,1488 @@ import java.util.Optional;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.stream.Collectors;
-
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.scheduler.SparkListener;
+import org.apache.spark.scheduler.SparkListenerTaskEnd;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.util.AccumulatorV2;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 import scala.collection.Iterator;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
 public class TestHoodieClientOnCopyOnWriteStorage implements Serializable {
-    private transient JavaSparkContext jsc = null;
-    private transient SQLContext sqlContext;
-    private String basePath = null;
-    private transient HoodieTestDataGenerator dataGen = null;
-    private String[] partitionPaths = {"2016/01/01", "2016/02/02", "2016/06/02"};
 
-    @Before
-    public void init() throws IOException {
-        // Initialize a local spark env
-        jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieClient"));
+  private transient JavaSparkContext jsc = null;
+  private transient SQLContext sqlContext;
+  private String basePath = null;
+  private transient HoodieTestDataGenerator dataGen = null;
+  private String[] partitionPaths = {"2016/01/01", "2016/02/02", "2016/06/02"};
 
-        //SQLContext stuff
-        sqlContext = new SQLContext(jsc);
+  @Before
+  public void init() throws IOException {
+    // Initialize a local spark env
+    jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieClient"));
 
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.init(basePath);
-        dataGen = new HoodieTestDataGenerator();
+    //SQLContext stuff
+    sqlContext = new SQLContext(jsc);
+
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.init(basePath);
+    dataGen = new HoodieTestDataGenerator();
+  }
+
+
+  private HoodieWriteConfig getConfig() {
+    return getConfigBuilder().build();
+  }
+
+  private HoodieWriteConfig.Builder getConfigBuilder() {
+    return HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
+        .forTable("test-trip-table").withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
+  }
+
+  private void assertNoWriteErrors(List<WriteStatus> statuses) {
+    // Verify there are no errors
+    for (WriteStatus status : statuses) {
+      assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
+    }
+  }
+
+  private void assertPartitionMetadata(String[] partitionPaths, FileSystem fs) throws IOException {
+    for (String partitionPath : partitionPaths) {
+      assertTrue(
+          HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
+      HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs,
+          new Path(basePath, partitionPath));
+      pmeta.readFromFS();
+      assertEquals(3, pmeta.getPartitionDepth());
+    }
+  }
+
+  private void checkTaggedRecords(List<HoodieRecord> taggedRecords, String commitTime) {
+    for (HoodieRecord rec : taggedRecords) {
+      assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
+      assertEquals(
+          "All records should have commit time " + commitTime + ", since updates were made",
+          rec.getCurrentLocation().getCommitTime(), commitTime);
+    }
+  }
+
+
+  @Test
+  public void testFilterExist() throws Exception {
+    HoodieWriteConfig config = getConfig();
+    HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
+    String newCommitTime = writeClient.startCommit();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
+    JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
+
+    HoodieReadClient readClient = new HoodieReadClient(jsc, config.getBasePath());
+    JavaRDD<HoodieRecord> filteredRDD = readClient.filterExists(recordsRDD);
+
+    // Should not find any files
+    assertTrue(filteredRDD.collect().size() == 100);
+
+    JavaRDD<HoodieRecord> smallRecordsRDD = jsc.parallelize(records.subList(0, 75), 1);
+    // We create three parquet file, each having one record. (two different partitions)
+    List<WriteStatus> statuses = writeClient.bulkInsert(smallRecordsRDD, newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    readClient = new HoodieReadClient(jsc, config.getBasePath());
+    filteredRDD = readClient.filterExists(recordsRDD);
+    List<HoodieRecord> result = filteredRDD.collect();
+    // Check results
+    assertTrue(result.size() == 25);
+  }
+
+  @Test
+  public void testAutoCommit() throws Exception {
+    // Set autoCommit false
+    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, newCommitTime);
+
+    assertFalse("If Autocommit is false, then commit should not be made automatically",
+        HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
+    assertTrue("Commit should succeed", client.commit(newCommitTime, result));
+    assertTrue("After explicit commit, commit file should be created",
+        HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
+
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, 100);
+    JavaRDD<HoodieRecord> updateRecords = jsc.parallelize(records, 1);
+    result = client.upsert(updateRecords, newCommitTime);
+    assertFalse("If Autocommit is false, then commit should not be made automatically",
+        HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
+    assertTrue("Commit should succeed", client.commit(newCommitTime, result));
+    assertTrue("After explicit commit, commit file should be created",
+        HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
+  }
+
+  @Test
+  public void testUpserts() throws Exception {
+    HoodieWriteConfig cfg = getConfig();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
+    FileSystem fs = FSUtils.getFs();
+
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    // check the partition metadata is written out
+    assertPartitionMetadata(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, fs);
+
+    // verify that there is a commit
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath())
+        .getCommitTimeline();
+
+    assertEquals("Expecting a single commit.", 1,
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
+    assertEquals("Latest commit should be 001", newCommitTime,
+        timeline.lastInstant().get().getTimestamp());
+    assertEquals("Must contain 200 records",
+        records.size(),
+        HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
+    // Should have 100 records in table (check using Index), all in locations marked at commit
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+
+    List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table)
+        .collect();
+    checkTaggedRecords(taggedRecords, "001");
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, 100);
+    LinkedHashMap<HoodieKey, HoodieRecord> recordsMap = new LinkedHashMap<>();
+    for (HoodieRecord rec : records) {
+      if (!recordsMap.containsKey(rec.getKey())) {
+        recordsMap.put(rec.getKey(), rec);
+      }
+    }
+    List<HoodieRecord> dedupedRecords = new ArrayList<>(recordsMap.values());
+
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    // verify there are now 2 commits
+    timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
+    assertEquals("Expecting two commits.",
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), 2);
+    assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(),
+        newCommitTime);
+
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, getConfig());
+
+    // Index should be able to locate all updates in correct locations.
+    taggedRecords = index.tagLocation(jsc.parallelize(dedupedRecords, 1), table).collect();
+    checkTaggedRecords(taggedRecords, "004");
+
+    // Check the entire dataset has 100 records still
+    String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
+    for (int i = 0; i < fullPartitionPaths.length; i++) {
+      fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
+    }
+    assertEquals("Must contain 200 records",
+        200,
+        HoodieClientTestUtils.read(basePath, sqlContext, fs, fullPartitionPaths).count());
+
+    // Check that the incremental consumption from time 000
+    assertEquals("Incremental consumption from time 002, should give all records in commit 004",
+        HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
+        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "002").count());
+    assertEquals("Incremental consumption from time 001, should give all records in commit 004",
+        HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
+        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "001").count());
+  }
+
+  @Test
+  public void testDeletes() throws Exception {
+
+    HoodieWriteConfig cfg = getConfig();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
+    FileSystem fs = FSUtils.getFs();
+
+    /**
+     * Write 1 (inserts and deletes)
+     * Write actual 200 insert records and ignore 100 delete records
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> fewRecordsForInsert = dataGen.generateInserts(newCommitTime, 200);
+    List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletes(newCommitTime, 100);
+
+    List<HoodieRecord> records = new ArrayList(fewRecordsForInsert);
+    records.addAll(fewRecordsForDelete);
+
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    // verify that there is a commit
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath())
+        .getCommitTimeline();
+    assertEquals("Expecting a single commit.", 1,
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
+    assertEquals("Latest commit should be 001", newCommitTime,
+        timeline.lastInstant().get().getTimestamp());
+    assertEquals("Must contain 200 records", fewRecordsForInsert.size(),
+        HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
+    // Should have 100 records in table (check using Index), all in locations marked at commit
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+
+    List<HoodieRecord> taggedRecords = index
+        .tagLocation(jsc.parallelize(fewRecordsForInsert, 1), table).collect();
+    checkTaggedRecords(taggedRecords, "001");
+
+    /**
+     * Write 2 (deletes+writes)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    fewRecordsForDelete = records.subList(0, 50);
+    List<HoodieRecord> fewRecordsForUpdate = records.subList(50, 100);
+    records = dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete);
+
+    records.addAll(fewRecordsForUpdate);
+
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    // verify there are now 2 commits
+    timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
+    assertEquals("Expecting two commits.",
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), 2);
+    assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(),
+        newCommitTime);
+
+    // Check the entire dataset has 150 records(200-50) still
+    String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
+    for (int i = 0; i < fullPartitionPaths.length; i++) {
+      fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
+    }
+    assertEquals("Must contain 150 records", 150,
+        HoodieClientTestUtils.read(basePath, sqlContext, fs, fullPartitionPaths).count());
+
+    // Check that the incremental consumption from time 000
+    assertEquals("Incremental consumption from latest commit, should give 50 updated records",
+        50,
+        HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
+    assertEquals("Incremental consumption from time 001, should give 50 updated records",
+        50,
+        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "001").count());
+    assertEquals("Incremental consumption from time 000, should give 150",
+        150,
+        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
+  }
+
+
+  @Test
+  public void testCreateSavepoint() throws Exception {
+    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+        HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1)
+            .build()).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    FileSystem fs = FSUtils.getFs();
+    HoodieTestDataGenerator
+        .writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
+
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    List<WriteStatus> statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime)
+        .collect();
+    assertNoWriteErrors(statuses);
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    client.savepoint("hoodie-unit-test", "test");
+    try {
+      client.rollback(newCommitTime);
+      fail("Rollback of a savepoint was allowed " + newCommitTime);
+    } catch (HoodieRollbackException e) {
+      // this is good
     }
 
+    /**
+     * Write 3 (updates)
+     */
+    newCommitTime = "003";
+    client.startCommitWithTime(newCommitTime);
 
-    private HoodieWriteConfig getConfig() {
-        return getConfigBuilder().build();
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    /**
+     * Write 4 (updates)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    List<String> partitionPaths = FSUtils
+        .getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    final TableFileSystemView.ReadOptimizedView view = table.getROFileSystemView();
+    List<HoodieDataFile> dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
+    }).collect(Collectors.toList());
+
+    assertEquals("The data files for commit 002 should not be cleaned", 3, dataFiles.size());
+
+    // Delete savepoint
+    assertFalse(table.getCompletedSavepointTimeline().empty());
+    client.deleteSavepoint(
+        table.getCompletedSavepointTimeline().getInstants().findFirst().get().getTimestamp());
+    // rollback and reupsert 004
+    client.rollback(newCommitTime);
+
+    client.startCommitWithTime(newCommitTime);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    final TableFileSystemView.ReadOptimizedView view1 = table.getROFileSystemView();
+    dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
+    }).collect(Collectors.toList());
+
+    assertEquals("The data files for commit 002 should be cleaned now", 0, dataFiles.size());
+  }
+
+
+  @Test
+  public void testRollbackToSavepoint() throws Exception {
+    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+        HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1)
+            .build()).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    FileSystem fs = FSUtils.getFs();
+    HoodieTestDataGenerator
+        .writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
+
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    client.savepoint("hoodie-unit-test", "test");
+
+    /**
+     * Write 3 (updates)
+     */
+    newCommitTime = "003";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+    List<String> partitionPaths = FSUtils
+        .getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    final TableFileSystemView.ReadOptimizedView view1 = table.getROFileSystemView();
+
+    List<HoodieDataFile> dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
+    }).collect(Collectors.toList());
+    assertEquals("The data files for commit 003 should be present", 3, dataFiles.size());
+
+    /**
+     * Write 4 (updates)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    final TableFileSystemView.ReadOptimizedView view2 = table.getROFileSystemView();
+
+    dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view2.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"));
+    }).collect(Collectors.toList());
+    assertEquals("The data files for commit 004 should be present", 3, dataFiles.size());
+
+    // rolling back to a non existent savepoint must not succeed
+    try {
+      client.rollbackToSavepoint("001");
+      fail("Rolling back to non-existent savepoint should not be allowed");
+    } catch (HoodieRollbackException e) {
+      // this is good
     }
 
-    private HoodieWriteConfig.Builder getConfigBuilder() {
-        return HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
-            .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
-            .forTable("test-trip-table").withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
-    }
+    // rollback to savepoint 002
+    HoodieInstant savepoint =
+        table.getCompletedSavepointTimeline().getInstants().findFirst().get();
+    client.rollbackToSavepoint(savepoint.getTimestamp());
 
-    private void assertNoWriteErrors(List<WriteStatus> statuses) {
-        // Verify there are no errors
-        for (WriteStatus status : statuses) {
-            assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
-        }
-    }
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    final TableFileSystemView.ReadOptimizedView view3 = table.getROFileSystemView();
+    dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
+    }).collect(Collectors.toList());
+    assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
 
-    private void assertPartitionMetadata(String[] partitionPaths, FileSystem fs) throws IOException {
-        for (String partitionPath: partitionPaths) {
-            assertTrue(HoodiePartitionMetadata.hasPartitionMetadata(fs, new Path(basePath, partitionPath)));
-            HoodiePartitionMetadata pmeta = new HoodiePartitionMetadata(fs, new Path(basePath, partitionPath));
-            pmeta.readFromFS();
-            assertEquals(3, pmeta.getPartitionDepth());
-        }
-    }
+    dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
+    }).collect(Collectors.toList());
+    assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
 
-    private void checkTaggedRecords(List<HoodieRecord> taggedRecords, String commitTime) {
-        for (HoodieRecord rec : taggedRecords) {
-            assertTrue("Record " + rec + " found with no location.", rec.isCurrentLocationKnown());
-            assertEquals("All records should have commit time "+ commitTime+", since updates were made",
-                    rec.getCurrentLocation().getCommitTime(), commitTime);
-        }
-    }
+    dataFiles = partitionPaths.stream().flatMap(s -> {
+      return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"));
+    }).collect(Collectors.toList());
+    assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
+  }
 
 
+  @Test
+  public void testInsertAndCleanByVersions() throws Exception {
+    int maxVersions = 2; // keep upto 2 versions for each file
+    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+        HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
+            .retainFileVersions(maxVersions).build()).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
+    FileSystem fs = FSUtils.getFs();
 
-    @Test
-    public void testFilterExist() throws Exception {
-        HoodieWriteConfig config = getConfig();
-        HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
-        String newCommitTime = writeClient.startCommit();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
-        JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
+    /**
+     * do a big insert
+     * (this is basically same as insert part of upsert, just adding it here so we can
+     * catch breakages in insert(), if the implementation diverges.)
+     */
+    String newCommitTime = client.startCommit();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 500);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 5);
 
-        HoodieReadClient readClient = new HoodieReadClient(jsc, config.getBasePath());
-        JavaRDD<HoodieRecord> filteredRDD = readClient.filterExists(recordsRDD);
+    List<WriteStatus> statuses = client.insert(writeRecords, newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
 
-        // Should not find any files
-        assertTrue(filteredRDD.collect().size() == 100);
+    // verify that there is a commit
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath())
+        .getCommitTimeline();
+    assertEquals("Expecting a single commit.", 1,
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
+    // Should have 100 records in table (check using Index), all in locations marked at commit
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    assertFalse(table.getCompletedCommitTimeline().empty());
+    String commitTime =
+        table.getCompletedCommitTimeline().getInstants().findFirst().get().getTimestamp();
+    assertFalse(table.getCompletedCleanTimeline().empty());
+    assertEquals("The clean instant should be the same as the commit instant", commitTime,
+        table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
 
-        JavaRDD<HoodieRecord> smallRecordsRDD = jsc.parallelize(records.subList(0, 75), 1);
-        // We create three parquet file, each having one record. (two different partitions)
-        List<WriteStatus> statuses = writeClient.bulkInsert(smallRecordsRDD, newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
+    List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table)
+        .collect();
+    checkTaggedRecords(taggedRecords, newCommitTime);
 
-        readClient = new HoodieReadClient(jsc, config.getBasePath());
-        filteredRDD = readClient.filterExists(recordsRDD);
-        List<HoodieRecord> result = filteredRDD.collect();
-        // Check results
-        assertTrue(result.size() == 25);
-    }
+    // Keep doing some writes and clean inline. Make sure we have expected number of files remaining.
+    for (int writeCnt = 2; writeCnt < 10; writeCnt++) {
 
-    @Test
-    public void testAutoCommit() throws Exception {
-        // Set autoCommit false
-        HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+      Thread.sleep(1100); // make sure commits are unique
+      newCommitTime = client.startCommit();
+      records = dataGen.generateUpdates(newCommitTime, 100);
 
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
+      statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+      // Verify there are no errors
+      assertNoWriteErrors(statuses);
 
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+      HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+      table = HoodieTable.getHoodieTable(metadata, getConfig());
+      timeline = table.getCommitTimeline();
 
-        JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, newCommitTime);
+      TableFileSystemView fsView = table.getFileSystemView();
+      // Need to ensure the following
+      for (String partitionPath : dataGen.getPartitionPaths()) {
+        // compute all the versions of all files, from time 0
+        HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
+        for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) {
+          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+              .fromBytes(timeline.getInstantDetails(entry).get());
 
-        assertFalse("If Autocommit is false, then commit should not be made automatically",
-            HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
-        assertTrue("Commit should succeed", client.commit(newCommitTime, result));
-        assertTrue("After explicit commit, commit file should be created",
-            HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
-
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, 100);
-        JavaRDD<HoodieRecord> updateRecords = jsc.parallelize(records, 1);
-        result = client.upsert(updateRecords, newCommitTime);
-        assertFalse("If Autocommit is false, then commit should not be made automatically",
-            HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
-        assertTrue("Commit should succeed", client.commit(newCommitTime, result));
-        assertTrue("After explicit commit, commit file should be created",
-            HoodieTestUtils.doesCommitExist(basePath, newCommitTime));
-    }
-
-    @Test
-    public void testUpserts() throws Exception {
-        HoodieWriteConfig cfg = getConfig();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
-        FileSystem fs = FSUtils.getFs();
-
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
-
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        // check the partition metadata is written out
-        assertPartitionMetadata(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, fs);
-
-        // verify that there is a commit
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-
-        assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
-        assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp());
-        assertEquals("Must contain 200 records",
-                records.size(),
-                HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
-        // Should have 100 records in table (check using Index), all in locations marked at commit
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
-
-        List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table).collect();
-        checkTaggedRecords(taggedRecords, "001");
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, 100);
-        LinkedHashMap<HoodieKey, HoodieRecord> recordsMap = new LinkedHashMap<>();
-        for (HoodieRecord rec : records) {
-            if (!recordsMap.containsKey(rec.getKey())) {
-                recordsMap.put(rec.getKey(), rec);
+          for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
+            if (!fileIdToVersions.containsKey(wstat.getFileId())) {
+              fileIdToVersions.put(wstat.getFileId(), new TreeSet<>());
             }
+            fileIdToVersions.get(wstat.getFileId())
+                .add(FSUtils.getCommitTime(new Path(wstat.getPath()).getName()));
+          }
         }
-        List<HoodieRecord> dedupedRecords = new ArrayList<>(recordsMap.values());
 
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
+        List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath)
+            .collect(Collectors.toList());
 
-        // verify there are now 2 commits
-        timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-        assertEquals("Expecting two commits.", timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), 2);
-        assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime);
+        for (HoodieFileGroup fileGroup : fileGroups) {
+          // No file has no more than max versions
+          String fileId = fileGroup.getId();
+          List<HoodieDataFile> dataFiles = fileGroup.getAllDataFiles().collect(Collectors.toList());
 
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, getConfig());
+          assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions",
+              dataFiles.size() <= maxVersions);
 
-        // Index should be able to locate all updates in correct locations.
-        taggedRecords = index.tagLocation(jsc.parallelize(dedupedRecords, 1), table).collect();
-        checkTaggedRecords(taggedRecords, "004");
-
-        // Check the entire dataset has 100 records still
-        String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
-        for (int i=0; i < fullPartitionPaths.length; i++) {
-            fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
+          // Each file, has the latest N versions (i.e cleaning gets rid of older versions)
+          List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId));
+          for (int i = 0; i < dataFiles.size(); i++) {
+            assertEquals(
+                "File " + fileId + " does not have latest versions on commits" + commitedVersions,
+                Iterables.get(dataFiles, i).getCommitTime(),
+                commitedVersions.get(commitedVersions.size() - 1 - i));
+          }
         }
-        assertEquals("Must contain 200 records",
-                200,
-                HoodieClientTestUtils.read(basePath, sqlContext, fs, fullPartitionPaths).count());
-
-
-        // Check that the incremental consumption from time 000
-        assertEquals("Incremental consumption from time 002, should give all records in commit 004",
-            HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count(),
-            HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "002").count());
-        assertEquals("Incremental consumption from time 001, should give all records in commit 004",
-            HoodieClientTestUtils.readCommit(basePath, sqlContext,timeline, newCommitTime).count(),
-            HoodieClientTestUtils.readSince(basePath, sqlContext,timeline, "001").count());
+      }
     }
+  }
 
-    @Test
-    public void testDeletes() throws Exception {
+  @Test
+  public void testInsertAndCleanByCommits() throws Exception {
+    int maxCommits = 3; // keep upto 3 commits from the past
+    HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
+        HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
+            .retainCommits(maxCommits).build()).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
+    FileSystem fs = FSUtils.getFs();
 
-        HoodieWriteConfig cfg = getConfig();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
-        FileSystem fs = FSUtils.getFs();
+    /**
+     * do a big insert
+     * (this is basically same as insert part of upsert, just adding it here so we can
+     * catch breakages in insert(), if the implementation diverges.)
+     */
+    String newCommitTime = client.startCommit();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 500);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 5);
 
-        /**
-         * Write 1 (inserts and deletes)
-         * Write actual 200 insert records and ignore 100 delete records
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
+    List<WriteStatus> statuses = client.insert(writeRecords, newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
 
-        List<HoodieRecord> fewRecordsForInsert = dataGen.generateInserts(newCommitTime, 200);
-        List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletes(newCommitTime, 100);
+    // verify that there is a commit
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath())
+        .getCommitTimeline();
+    assertEquals("Expecting a single commit.", 1,
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
+    // Should have 100 records in table (check using Index), all in locations marked at commit
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
 
-        List<HoodieRecord> records = new ArrayList(fewRecordsForInsert);
-        records.addAll(fewRecordsForDelete);
+    assertFalse(table.getCompletedCommitTimeline().empty());
+    String commitTime =
+        table.getCompletedCommitTimeline().getInstants().findFirst().get().getTimestamp();
+    assertFalse(table.getCompletedCleanTimeline().empty());
+    assertEquals("The clean instant should be the same as the commit instant", commitTime,
+        table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
 
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+    List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table)
+        .collect();
+    checkTaggedRecords(taggedRecords, newCommitTime);
 
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
+    // Keep doing some writes and clean inline. Make sure we have expected number of files remaining.
+    for (int writeCnt = 2; writeCnt < 10; writeCnt++) {
+      Thread.sleep(1100); // make sure commits are unique
+      newCommitTime = client.startCommit();
+      records = dataGen.generateUpdates(newCommitTime, 100);
 
-        // verify that there is a commit
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-        assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
-        assertEquals("Latest commit should be 001", newCommitTime, timeline.lastInstant().get().getTimestamp());
-        assertEquals("Must contain 200 records", fewRecordsForInsert.size(),
-                HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
-        // Should have 100 records in table (check using Index), all in locations marked at commit
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+      statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+      // Verify there are no errors
+      assertNoWriteErrors(statuses);
 
-        List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(fewRecordsForInsert, 1), table).collect();
-        checkTaggedRecords(taggedRecords, "001");
+      HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+      HoodieTable table1 = HoodieTable.getHoodieTable(metadata, cfg);
+      HoodieTimeline activeTimeline = table1.getCompletedCommitTimeline();
+      Optional<HoodieInstant>
+          earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
+      Set<HoodieInstant> acceptableCommits =
+          activeTimeline.getInstants().collect(Collectors.toSet());
+      if (earliestRetainedCommit.isPresent()) {
+        acceptableCommits.removeAll(
+            activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp())
+                .getInstants()
+                .collect(Collectors.toSet()));
+        acceptableCommits.add(earliestRetainedCommit.get());
+      }
 
-        /**
-         * Write 2 (deletes+writes)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        fewRecordsForDelete = records.subList(0,50);
-        List<HoodieRecord> fewRecordsForUpdate = records.subList(50,100);
-        records = dataGen.generateDeletesFromExistingRecords(fewRecordsForDelete);
-
-        records.addAll(fewRecordsForUpdate);
-
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        // verify there are now 2 commits
-        timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-        assertEquals("Expecting two commits.", timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants(), 2);
-        assertEquals("Latest commit should be 004", timeline.lastInstant().get().getTimestamp(), newCommitTime);
-
-        // Check the entire dataset has 150 records(200-50) still
-        String[] fullPartitionPaths = new String[dataGen.getPartitionPaths().length];
-        for (int i=0; i < fullPartitionPaths.length; i++) {
-            fullPartitionPaths[i] = String.format("%s/%s/*", basePath, dataGen.getPartitionPaths()[i]);
+      TableFileSystemView fsView = table1.getFileSystemView();
+      // Need to ensure the following
+      for (String partitionPath : dataGen.getPartitionPaths()) {
+        List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath)
+            .collect(Collectors.toList());
+        for (HoodieFileGroup fileGroup : fileGroups) {
+          Set<String> commitTimes = new HashSet<>();
+          fileGroup.getAllDataFiles().forEach(value -> {
+            System.out.println("Data File - " + value);
+            commitTimes.add(value.getCommitTime());
+          });
+          assertEquals("Only contain acceptable versions of file should be present",
+              acceptableCommits.stream().map(HoodieInstant::getTimestamp)
+                  .collect(Collectors.toSet()), commitTimes);
         }
-        assertEquals("Must contain 150 records", 150,
-                HoodieClientTestUtils.read(basePath, sqlContext, fs, fullPartitionPaths).count());
-
-
-        // Check that the incremental consumption from time 000
-        assertEquals("Incremental consumption from latest commit, should give 50 updated records",
-                50,
-                HoodieClientTestUtils.readCommit(basePath, sqlContext, timeline, newCommitTime).count());
-        assertEquals("Incremental consumption from time 001, should give 50 updated records",
-                50,
-                HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "001").count());
-        assertEquals("Incremental consumption from time 000, should give 150",
-                150,
-                HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
+      }
     }
+  }
 
+  @Test
+  public void testRollbackCommit() throws Exception {
+    // Let's create some commit files and parquet files
+    String commitTime1 = "20160501010101";
+    String commitTime2 = "20160502020601";
+    String commitTime3 = "20160506030611";
+    new File(basePath + "/.hoodie").mkdirs();
+    HoodieTestDataGenerator.writePartitionMetadata(FSUtils.getFs(),
+        new String[]{"2016/05/01", "2016/05/02", "2016/05/06"},
+        basePath);
 
-    @Test
-    public void testCreateSavepoint() throws Exception {
-        HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
-            HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1)
+    // Only first two have commit files
+    HoodieTestUtils.createCommitFiles(basePath, commitTime1, commitTime2);
+    // Third one has a .inflight intermediate commit file
+    HoodieTestUtils.createInflightCommitFiles(basePath, commitTime3);
+
+    // Make commit1
+    String file11 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime1, "id11");
+    String file12 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime1, "id12");
+    String file13 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime1, "id13");
+
+    // Make commit2
+    String file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
+    String file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
+    String file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
+
+    // Make commit3
+    String file31 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime3, "id31");
+    String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32");
+    String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
+
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY)
                 .build()).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        FileSystem fs = FSUtils.getFs();
-        HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
 
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
+    HoodieWriteClient client = new HoodieWriteClient(jsc, config, false);
 
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        List<WriteStatus> statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        client.savepoint("hoodie-unit-test", "test");
-        try {
-            client.rollback(newCommitTime);
-            fail("Rollback of a savepoint was allowed " + newCommitTime);
-        } catch (HoodieRollbackException e) {
-            // this is good
-        }
-
-        /**
-         * Write 3 (updates)
-         */
-        newCommitTime = "003";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        /**
-         * Write 4 (updates)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        List<String> partitionPaths = FSUtils.getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        final TableFileSystemView.ReadOptimizedView view = table.getROFileSystemView();
-        List<HoodieDataFile> dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
-        }).collect(Collectors.toList());
-
-        assertEquals("The data files for commit 002 should not be cleaned", 3, dataFiles.size());
-
-        // Delete savepoint
-        assertFalse(table.getCompletedSavepointTimeline().empty());
-        client.deleteSavepoint(
-            table.getCompletedSavepointTimeline().getInstants().findFirst().get().getTimestamp());
-        // rollback and reupsert 004
-        client.rollback(newCommitTime);
-
-        client.startCommitWithTime(newCommitTime);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        final TableFileSystemView.ReadOptimizedView view1 = table.getROFileSystemView();
-        dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
-        }).collect(Collectors.toList());
-
-        assertEquals("The data files for commit 002 should be cleaned now", 0, dataFiles.size());
+    // Rollback commit 1 (this should fail, since commit2 is still around)
+    try {
+      client.rollback(commitTime1);
+      assertTrue("Should have thrown an exception ", false);
+    } catch (HoodieRollbackException hrbe) {
+      // should get here
     }
 
+    // Rollback commit3
+    client.rollback(commitTime3);
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
 
-    @Test
-    public void testRollbackToSavepoint() throws Exception {
-        HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
-            HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1)
+    // simulate partial failure, where .inflight was not deleted, but data files were.
+    HoodieTestUtils.createInflightCommitFiles(basePath, commitTime3);
+    client.rollback(commitTime3);
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
+
+    // Rollback commit2
+    client.rollback(commitTime2);
+    assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime2));
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
+
+    // simulate partial failure, where only .commit => .inflight renaming succeeded, leaving a
+    // .inflight commit and a bunch of data files around.
+    HoodieTestUtils.createInflightCommitFiles(basePath, commitTime2);
+    file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
+    file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
+    file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
+
+    client.rollback(commitTime2);
+    assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime2));
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
+
+    // Let's rollback commit1, Check results
+    client.rollback(commitTime1);
+    assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime1));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
+  }
+
+
+  @Test
+  public void testAutoRollbackCommit() throws Exception {
+    // Let's create some commit files and parquet files
+    String commitTime1 = "20160501010101";
+    String commitTime2 = "20160502020601";
+    String commitTime3 = "20160506030611";
+    new File(basePath + "/.hoodie").mkdirs();
+    HoodieTestDataGenerator.writePartitionMetadata(FSUtils.getFs(),
+        new String[]{"2016/05/01", "2016/05/02", "2016/05/06"},
+        basePath);
+
+    // One good commit
+    HoodieTestUtils.createCommitFiles(basePath, commitTime1);
+    // Two inflight commits
+    HoodieTestUtils.createInflightCommitFiles(basePath, commitTime2, commitTime3);
+
+    // Make commit1
+    String file11 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime1, "id11");
+    String file12 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime1, "id12");
+    String file13 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime1, "id13");
+
+    // Make commit2
+    String file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
+    String file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
+    String file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
+
+    // Make commit3
+    String file31 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime3, "id31");
+    String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32");
+    String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
+
+    // Turn auto rollback off
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY)
                 .build()).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        FileSystem fs = FSUtils.getFs();
-        HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
 
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
+    new HoodieWriteClient(jsc, config, false);
 
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+    // Check results, nothing changed
+    assertTrue(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
+    assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
+    assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
 
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        client.savepoint("hoodie-unit-test", "test");
-
-        /**
-         * Write 3 (updates)
-         */
-        newCommitTime = "003";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-        List<String> partitionPaths = FSUtils.getAllPartitionPaths(fs, cfg.getBasePath(), getConfig().shouldAssumeDatePartitioning());
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        final TableFileSystemView.ReadOptimizedView view1 = table.getROFileSystemView();
-
-        List<HoodieDataFile> dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
-        }).collect(Collectors.toList());
-        assertEquals("The data files for commit 003 should be present", 3, dataFiles.size());
+    // Turn auto rollback on
+    new HoodieWriteClient(jsc, config, true);
+    assertTrue(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
+    assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) &&
+        HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
+  }
 
 
-        /**
-         * Write 4 (updates)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        final TableFileSystemView.ReadOptimizedView view2 = table.getROFileSystemView();
-
-        dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view2.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"));
-        }).collect(Collectors.toList());
-        assertEquals("The data files for commit 004 should be present", 3, dataFiles.size());
+  private HoodieWriteConfig getSmallInsertWriteConfig(int insertSplitSize) {
+    HoodieWriteConfig.Builder builder = getConfigBuilder();
+    return builder.withCompactionConfig(
+        HoodieCompactionConfig.newBuilder()
+            .compactionSmallFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 15)
+            .insertSplitSize(insertSplitSize).build()) // tolerate upto 15 records
+        .withStorageConfig(HoodieStorageConfig.newBuilder()
+            .limitFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 20)
+            .build())
+        .build();
+  }
 
 
-        // rolling back to a non existent savepoint must not succeed
-        try {
-            client.rollbackToSavepoint("001");
-            fail("Rolling back to non-existent savepoint should not be allowed");
-        } catch (HoodieRollbackException e) {
-            // this is good
-        }
+  @Test
+  public void testSmallInsertHandlingForUpserts() throws Exception {
 
-        // rollback to savepoint 002
-        HoodieInstant savepoint =
-            table.getCompletedSavepointTimeline().getInstants().findFirst().get();
-        client.rollbackToSavepoint(savepoint.getTimestamp());
+    FileSystem fs = FSUtils.getFs();
+    final String TEST_PARTITION_PATH = "2016/09/26";
+    final int INSERT_SPLIT_LIMIT = 100;
+    // setup the small file handling params
+    HoodieWriteConfig config = getSmallInsertWriteConfig(
+        INSERT_SPLIT_LIMIT); // hold upto 200 records max
+    dataGen = new HoodieTestDataGenerator(new String[]{TEST_PARTITION_PATH});
 
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        final TableFileSystemView.ReadOptimizedView view3 = table.getROFileSystemView();
-        dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002"));
-        }).collect(Collectors.toList());
-        assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
+    HoodieWriteClient client = new HoodieWriteClient(jsc, config);
 
-        dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003"));
-        }).collect(Collectors.toList());
-        assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
+    // Inserts => will write file1
+    String commitTime1 = "001";
+    client.startCommitWithTime(commitTime1);
+    List<HoodieRecord> inserts1 = dataGen
+        .generateInserts(commitTime1, INSERT_SPLIT_LIMIT); // this writes ~500kb
+    Set<String> keys1 = HoodieClientTestUtils.getRecordKeys(inserts1);
 
-        dataFiles = partitionPaths.stream().flatMap(s -> {
-            return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004"));
-        }).collect(Collectors.toList());
-        assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
+    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
+    List<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1).collect();
+
+    assertNoWriteErrors(statuses);
+
+    assertEquals("Just 1 file needs to be added.", 1, statuses.size());
+    String file1 = statuses.get(0).getFileId();
+    assertEquals("file should contain 100 records",
+        ParquetUtils.readRowKeysFromParquet(new Path(basePath,
+            TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(),
+        100);
+
+    // Update + Inserts such that they just expand file1
+    String commitTime2 = "002";
+    client.startCommitWithTime(commitTime2);
+    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
+    Set<String> keys2 = HoodieClientTestUtils.getRecordKeys(inserts2);
+    List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
+    insertsAndUpdates2.addAll(inserts2);
+    insertsAndUpdates2.addAll(dataGen.generateUpdates(commitTime2, inserts1));
+
+    JavaRDD<HoodieRecord> insertAndUpdatesRDD2 = jsc.parallelize(insertsAndUpdates2, 1);
+    statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
+    assertNoWriteErrors(statuses);
+
+    assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
+    assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
+    assertEquals("Existing file should be expanded", commitTime1,
+        statuses.get(0).getStat().getPrevCommit());
+    Path newFile = new Path(basePath,
+        TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
+    assertEquals("file should contain 140 records",
+        ParquetUtils.readRowKeysFromParquet(newFile).size(), 140);
+
+    List<GenericRecord> records = ParquetUtils.readAvroRecords(newFile);
+    for (GenericRecord record : records) {
+      String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+      assertEquals("only expect commit2", commitTime2,
+          record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
+      assertTrue("key expected to be part of commit2",
+          keys2.contains(recordKey) || keys1.contains(recordKey));
     }
 
-
-    @Test
-    public void testInsertAndCleanByVersions() throws Exception {
-        int maxVersions = 2; // keep upto 2 versions for each file
-        HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
-            HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
-                .retainFileVersions(maxVersions).build()).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
-        FileSystem fs = FSUtils.getFs();
-
-        /**
-         * do a big insert
-         * (this is basically same as insert part of upsert, just adding it here so we can
-         * catch breakages in insert(), if the implementation diverges.)
-         */
-        String newCommitTime = client.startCommit();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 500);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 5);
-
-        List<WriteStatus> statuses = client.insert(writeRecords, newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        // verify that there is a commit
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-        assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
-        // Should have 100 records in table (check using Index), all in locations marked at commit
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
-        assertFalse(table.getCompletedCommitTimeline().empty());
-        String commitTime =
-            table.getCompletedCommitTimeline().getInstants().findFirst().get().getTimestamp();
-        assertFalse(table.getCompletedCleanTimeline().empty());
-        assertEquals("The clean instant should be the same as the commit instant", commitTime,
-            table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
-
-        List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table).collect();
-        checkTaggedRecords(taggedRecords, newCommitTime);
-
-        // Keep doing some writes and clean inline. Make sure we have expected number of files remaining.
-        for (int writeCnt = 2; writeCnt < 10; writeCnt++) {
-
-            Thread.sleep(1100); // make sure commits are unique
-            newCommitTime = client.startCommit();
-            records = dataGen.generateUpdates(newCommitTime, 100);
-
-            statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-            // Verify there are no errors
-            assertNoWriteErrors(statuses);
-
-            HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-            table = HoodieTable.getHoodieTable(metadata, getConfig());
-            timeline = table.getCommitTimeline();
-
-            TableFileSystemView fsView = table.getFileSystemView();
-            // Need to ensure the following
-            for (String partitionPath : dataGen.getPartitionPaths()) {
-                // compute all the versions of all files, from time 0
-                HashMap<String, TreeSet<String>> fileIdToVersions = new HashMap<>();
-                for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) {
-                    HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(entry).get());
-
-                    for (HoodieWriteStat wstat : commitMetadata.getWriteStats(partitionPath)) {
-                        if (!fileIdToVersions.containsKey(wstat.getFileId())) {
-                            fileIdToVersions.put(wstat.getFileId(), new TreeSet<>());
-                        }
-                        fileIdToVersions.get(wstat.getFileId()).add(FSUtils.getCommitTime(new Path(wstat.getPath()).getName()));
-                    }
-                }
-
-
-                List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath).collect(Collectors.toList());
-
-                for (HoodieFileGroup fileGroup : fileGroups) {
-                    // No file has no more than max versions
-                    String fileId = fileGroup.getId();
-                    List<HoodieDataFile> dataFiles = fileGroup.getAllDataFiles().collect(Collectors.toList());
-
-                    assertTrue("fileId " + fileId + " has more than " + maxVersions + " versions",
-                            dataFiles.size() <= maxVersions);
-
-                    // Each file, has the latest N versions (i.e cleaning gets rid of older versions)
-                    List<String> commitedVersions = new ArrayList<>(fileIdToVersions.get(fileId));
-                    for (int i = 0; i < dataFiles.size(); i++) {
-                        assertEquals("File " + fileId + " does not have latest versions on commits" + commitedVersions,
-                            Iterables.get(dataFiles, i).getCommitTime(),
-                                commitedVersions.get(commitedVersions.size() - 1 - i));
-                    }
-                }
-            }
-        }
-    }
-
-    @Test
-    public void testInsertAndCleanByCommits() throws Exception {
-        int maxCommits = 3; // keep upto 3 commits from the past
-        HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(
-            HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
-                .retainCommits(maxCommits).build()).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        HoodieIndex index = HoodieIndex.createIndex(cfg, jsc);
-        FileSystem fs = FSUtils.getFs();
-
-        /**
-         * do a big insert
-         * (this is basically same as insert part of upsert, just adding it here so we can
-         * catch breakages in insert(), if the implementation diverges.)
-         */
-        String newCommitTime = client.startCommit();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 500);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 5);
-
-        List<WriteStatus> statuses = client.insert(writeRecords, newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        // verify that there is a commit
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTimeline timeline = new HoodieActiveTimeline(fs, metaClient.getMetaPath()).getCommitTimeline();
-        assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
-        // Should have 100 records in table (check using Index), all in locations marked at commit
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
-
-        assertFalse(table.getCompletedCommitTimeline().empty());
-        String commitTime =
-            table.getCompletedCommitTimeline().getInstants().findFirst().get().getTimestamp();
-        assertFalse(table.getCompletedCleanTimeline().empty());
-        assertEquals("The clean instant should be the same as the commit instant", commitTime,
-            table.getCompletedCleanTimeline().getInstants().findFirst().get().getTimestamp());
-
-        List<HoodieRecord> taggedRecords = index.tagLocation(jsc.parallelize(records, 1), table).collect();
-        checkTaggedRecords(taggedRecords, newCommitTime);
-
-        // Keep doing some writes and clean inline. Make sure we have expected number of files remaining.
-        for (int writeCnt = 2; writeCnt < 10; writeCnt++) {
-            Thread.sleep(1100); // make sure commits are unique
-            newCommitTime = client.startCommit();
-            records = dataGen.generateUpdates(newCommitTime, 100);
-
-            statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-            // Verify there are no errors
-            assertNoWriteErrors(statuses);
-
-            HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-            HoodieTable table1 = HoodieTable.getHoodieTable(metadata, cfg);
-            HoodieTimeline activeTimeline = table1.getCompletedCommitTimeline();
-            Optional<HoodieInstant>
-                earliestRetainedCommit = activeTimeline.nthFromLastInstant(maxCommits - 1);
-            Set<HoodieInstant> acceptableCommits =
-                activeTimeline.getInstants().collect(Collectors.toSet());
-            if (earliestRetainedCommit.isPresent()) {
-                acceptableCommits.removeAll(
-                    activeTimeline.findInstantsInRange("000", earliestRetainedCommit.get().getTimestamp()).getInstants()
-                        .collect(Collectors.toSet()));
-                acceptableCommits.add(earliestRetainedCommit.get());
-            }
-
-            TableFileSystemView fsView = table1.getFileSystemView();
-            // Need to ensure the following
-            for (String partitionPath : dataGen.getPartitionPaths()) {
-                List<HoodieFileGroup> fileGroups = fsView.getAllFileGroups(partitionPath).collect(Collectors.toList());
-                for (HoodieFileGroup fileGroup : fileGroups) {
-                    Set<String> commitTimes = new HashSet<>();
-                    fileGroup.getAllDataFiles().forEach(value ->  {
-                        System.out.println("Data File - " + value);
-                        commitTimes.add(value.getCommitTime());
-                    });
-                    assertEquals("Only contain acceptable versions of file should be present",
-                        acceptableCommits.stream().map(HoodieInstant::getTimestamp)
-                            .collect(Collectors.toSet()), commitTimes);
-                }
-            }
-        }
-    }
-
-    @Test
-    public void testRollbackCommit() throws Exception {
-        // Let's create some commit files and parquet files
-        String commitTime1 = "20160501010101";
-        String commitTime2 = "20160502020601";
-        String commitTime3 = "20160506030611";
-        new File(basePath + "/.hoodie").mkdirs();
-        HoodieTestDataGenerator.writePartitionMetadata(FSUtils.getFs(),
-                new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
-                basePath);
-
-
-        // Only first two have commit files
-        HoodieTestUtils.createCommitFiles(basePath, commitTime1, commitTime2);
-        // Third one has a .inflight intermediate commit file
-        HoodieTestUtils.createInflightCommitFiles(basePath, commitTime3);
-
-        // Make commit1
-        String file11 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime1, "id11");
-        String file12 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime1, "id12");
-        String file13 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime1, "id13");
-
-        // Make commit2
-        String file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
-        String file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
-        String file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
-
-        // Make commit3
-        String file31 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime3, "id31");
-        String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32");
-        String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
-
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY)
-                    .build()).build();
-
-        HoodieWriteClient client = new HoodieWriteClient(jsc, config, false);
-
-        // Rollback commit 1 (this should fail, since commit2 is still around)
-        try {
-            client.rollback(commitTime1);
-            assertTrue("Should have thrown an exception ", false);
-        } catch (HoodieRollbackException hrbe) {
-            // should get here
-        }
-
-        // Rollback commit3
-        client.rollback(commitTime3);
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
-
-        // simulate partial failure, where .inflight was not deleted, but data files were.
-        HoodieTestUtils.createInflightCommitFiles(basePath, commitTime3);
-        client.rollback(commitTime3);
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
-
-
-        // Rollback commit2
-        client.rollback(commitTime2);
-        assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime2));
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
-
-        // simulate partial failure, where only .commit => .inflight renaming succeeded, leaving a
-        // .inflight commit and a bunch of data files around.
-        HoodieTestUtils.createInflightCommitFiles(basePath, commitTime2);
-        file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
-        file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
-        file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
-
-        client.rollback(commitTime2);
-        assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime2));
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
-
-
-        // Let's rollback commit1, Check results
-        client.rollback(commitTime1);
-        assertFalse(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime1));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) ||
-                    HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
-    }
-
-
-    @Test
-    public void testAutoRollbackCommit() throws Exception {
-        // Let's create some commit files and parquet files
-        String commitTime1 = "20160501010101";
-        String commitTime2 = "20160502020601";
-        String commitTime3 = "20160506030611";
-        new File(basePath + "/.hoodie").mkdirs();
-        HoodieTestDataGenerator.writePartitionMetadata(FSUtils.getFs(),
-                new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
-                basePath);
-
-        // One good commit
-        HoodieTestUtils.createCommitFiles(basePath, commitTime1);
-        // Two inflight commits
-        HoodieTestUtils.createInflightCommitFiles(basePath, commitTime2, commitTime3);
-
-        // Make commit1
-        String file11 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime1, "id11");
-        String file12 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime1, "id12");
-        String file13 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime1, "id13");
-
-        // Make commit2
-        String file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21");
-        String file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22");
-        String file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23");
-
-        // Make commit3
-        String file31 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime3, "id31");
-        String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32");
-        String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33");
-
-        // Turn auto rollback off
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY)
-                    .build()).build();
-
-        new HoodieWriteClient(jsc, config, false);
-
-        // Check results, nothing changed
-        assertTrue(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
-        assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
-        assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
-
-        // Turn auto rollback on
-        new HoodieWriteClient(jsc, config, true);
-        assertTrue(HoodieTestUtils.doesCommitExist(basePath, commitTime1));
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime2));
-        assertFalse(HoodieTestUtils.doesInflightExist(basePath, commitTime3));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) ||
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) ||
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) ||
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) ||
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) &&
-                HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
-    }
-
-
-    private HoodieWriteConfig getSmallInsertWriteConfig(int insertSplitSize) {
-        HoodieWriteConfig.Builder builder = getConfigBuilder();
-        return builder.withCompactionConfig(
-                HoodieCompactionConfig.newBuilder()
-                        .compactionSmallFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 15)
-                        .insertSplitSize(insertSplitSize).build()) // tolerate upto 15 records
-                .withStorageConfig(HoodieStorageConfig.newBuilder()
-                        .limitFileSize(HoodieTestDataGenerator.SIZE_PER_RECORD * 20)
-                        .build())
-                .build();
-    }
-
-
-    @Test
-    public void testSmallInsertHandlingForUpserts() throws Exception {
-
-        FileSystem fs = FSUtils.getFs();
-        final String TEST_PARTITION_PATH = "2016/09/26";
-        final int INSERT_SPLIT_LIMIT = 100;
-        // setup the small file handling params
-        HoodieWriteConfig config = getSmallInsertWriteConfig(INSERT_SPLIT_LIMIT); // hold upto 200 records max
-        dataGen = new HoodieTestDataGenerator(new String[] {TEST_PARTITION_PATH});
-
-        HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-
-        // Inserts => will write file1
-        String commitTime1 = "001";
-        client.startCommitWithTime(commitTime1);
-        List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, INSERT_SPLIT_LIMIT); // this writes ~500kb
-        Set<String> keys1 = HoodieClientTestUtils.getRecordKeys(inserts1);
-
-        JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
-        List<WriteStatus> statuses= client.upsert(insertRecordsRDD1, commitTime1).collect();
-
-        assertNoWriteErrors(statuses);
-
-        assertEquals("Just 1 file needs to be added.", 1, statuses.size());
-        String file1 = statuses.get(0).getFileId();
-        assertEquals("file should contain 100 records",
-                ParquetUtils.readRowKeysFromParquet(new Path(basePath, TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(),
-                100);
-
-        // Update + Inserts such that they just expand file1
-        String commitTime2 = "002";
-        client.startCommitWithTime(commitTime2);
-        List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
-        Set<String> keys2 = HoodieClientTestUtils.getRecordKeys(inserts2);
-        List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
-        insertsAndUpdates2.addAll(inserts2);
-        insertsAndUpdates2.addAll(dataGen.generateUpdates(commitTime2, inserts1));
-
-        JavaRDD<HoodieRecord> insertAndUpdatesRDD2 = jsc.parallelize(insertsAndUpdates2, 1);
-        statuses = client.upsert(insertAndUpdatesRDD2, commitTime2).collect();
-        assertNoWriteErrors(statuses);
-
-        assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
-        assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
-        assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
-        Path newFile = new Path(basePath, TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
-        assertEquals("file should contain 140 records", ParquetUtils.readRowKeysFromParquet(newFile).size(), 140);
-
-        List<GenericRecord> records = ParquetUtils.readAvroRecords(newFile);
-        for (GenericRecord record: records) {
-            String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-            assertEquals("only expect commit2", commitTime2, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
-            assertTrue("key expected to be part of commit2", keys2.contains(recordKey) || keys1.contains(recordKey));
-        }
-
-        // update + inserts such that file1 is updated and expanded, a new file2 is created.
-        String commitTime3 = "003";
-        client.startCommitWithTime(commitTime3);
-        List<HoodieRecord> insertsAndUpdates3 = dataGen.generateInserts(commitTime3, 200);
-        Set<String> keys3 = HoodieClientTestUtils.getRecordKeys(insertsAndUpdates3);
-        List<HoodieRecord> updates3 = dataGen.generateUpdates(commitTime3, inserts2);
-        insertsAndUpdates3.addAll(updates3);
-
-        JavaRDD<HoodieRecord> insertAndUpdatesRDD3 = jsc.parallelize(insertsAndUpdates3, 1);
-        statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
-        assertNoWriteErrors(statuses);
-
-        assertEquals("2 files needs to be committed.", 2, statuses.size());
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
-        TableFileSystemView.ReadOptimizedView fileSystemView = table.getROFileSystemView();
-        List<HoodieDataFile> files = fileSystemView.getLatestDataFilesBeforeOrOn(TEST_PARTITION_PATH, commitTime3).collect(
+    // update + inserts such that file1 is updated and expanded, a new file2 is created.
+    String commitTime3 = "003";
+    client.startCommitWithTime(commitTime3);
+    List<HoodieRecord> insertsAndUpdates3 = dataGen.generateInserts(commitTime3, 200);
+    Set<String> keys3 = HoodieClientTestUtils.getRecordKeys(insertsAndUpdates3);
+    List<HoodieRecord> updates3 = dataGen.generateUpdates(commitTime3, inserts2);
+    insertsAndUpdates3.addAll(updates3);
+
+    JavaRDD<HoodieRecord> insertAndUpdatesRDD3 = jsc.parallelize(insertsAndUpdates3, 1);
+    statuses = client.upsert(insertAndUpdatesRDD3, commitTime3).collect();
+    assertNoWriteErrors(statuses);
+
+    assertEquals("2 files needs to be committed.", 2, statuses.size());
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+    TableFileSystemView.ReadOptimizedView fileSystemView = table.getROFileSystemView();
+    List<HoodieDataFile> files = fileSystemView
+        .getLatestDataFilesBeforeOrOn(TEST_PARTITION_PATH, commitTime3).collect(
             Collectors.toList());
-        int numTotalInsertsInCommit3 = 0;
-        for (HoodieDataFile file:  files) {
-            if (file.getFileName().contains(file1)) {
-                assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime());
-                records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
-                for (GenericRecord record: records) {
-                    String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-                    String recordCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
-                    if (recordCommitTime.equals(commitTime3)) {
-                        if (keys2.contains(recordKey)) {
-                            assertEquals("only expect commit3", commitTime3, recordCommitTime);
-                            keys2.remove(recordKey);
-                        } else {
-                            numTotalInsertsInCommit3++;
-                        }
-                    }
-                }
-                assertEquals("All keys added in commit 2 must be updated in commit3 correctly", 0, keys2.size());
+    int numTotalInsertsInCommit3 = 0;
+    for (HoodieDataFile file : files) {
+      if (file.getFileName().contains(file1)) {
+        assertEquals("Existing file should be expanded", commitTime3, file.getCommitTime());
+        records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
+        for (GenericRecord record : records) {
+          String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+          String recordCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
+          if (recordCommitTime.equals(commitTime3)) {
+            if (keys2.contains(recordKey)) {
+              assertEquals("only expect commit3", commitTime3, recordCommitTime);
+              keys2.remove(recordKey);
             } else {
-                assertEquals("New file must be written for commit 3", commitTime3, file.getCommitTime());
-                records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
-                for (GenericRecord record: records) {
-                    String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-                    assertEquals("only expect commit3", commitTime3, record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
-                    assertTrue("key expected to be part of commit3", keys3.contains(recordKey));
-                }
-                numTotalInsertsInCommit3 += records.size();
+              numTotalInsertsInCommit3++;
             }
+          }
         }
-        assertEquals("Total inserts in commit3 must add up", keys3.size(), numTotalInsertsInCommit3);
-    }
-
-    @Test
-    public void testSmallInsertHandlingForInserts() throws Exception {
-
-        final String TEST_PARTITION_PATH = "2016/09/26";
-        final int INSERT_SPLIT_LIMIT = 100;
-        // setup the small file handling params
-        HoodieWriteConfig config = getSmallInsertWriteConfig(INSERT_SPLIT_LIMIT); // hold upto 200 records max
-        dataGen = new HoodieTestDataGenerator(new String[] {TEST_PARTITION_PATH});
-        HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-
-        // Inserts => will write file1
-        String commitTime1 = "001";
-        client.startCommitWithTime(commitTime1);
-        List<HoodieRecord> inserts1 = dataGen.generateInserts(commitTime1, INSERT_SPLIT_LIMIT); // this writes ~500kb
-        Set<String> keys1 = HoodieClientTestUtils.getRecordKeys(inserts1);
-        JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
-        List<WriteStatus> statuses= client.insert(insertRecordsRDD1, commitTime1).collect();
-
-        assertNoWriteErrors(statuses);
-        assertPartitionMetadata(new String[]{TEST_PARTITION_PATH}, FSUtils.getFs());
-
-        assertEquals("Just 1 file needs to be added.", 1, statuses.size());
-        String file1 = statuses.get(0).getFileId();
-        assertEquals("file should contain 100 records",
-                ParquetUtils.readRowKeysFromParquet(new Path(basePath, TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(),
-                100);
-
-        // Second, set of Inserts should just expand file1
-        String commitTime2 = "002";
-        client.startCommitWithTime(commitTime2);
-        List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
-        Set<String> keys2 = HoodieClientTestUtils.getRecordKeys(inserts2);
-        JavaRDD<HoodieRecord> insertRecordsRDD2 = jsc.parallelize(inserts2, 1);
-        statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
-        assertNoWriteErrors(statuses);
-
-        assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
-        assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
-        assertEquals("Existing file should be expanded", commitTime1, statuses.get(0).getStat().getPrevCommit());
-        Path newFile = new Path(basePath, TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
-        assertEquals("file should contain 140 records", ParquetUtils.readRowKeysFromParquet(newFile).size(), 140);
-
-        List<GenericRecord> records = ParquetUtils.readAvroRecords(newFile);
-        for (GenericRecord record: records) {
-            String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-            String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
-            assertTrue("Record expected to be part of commit 1 or commit2", commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime));
-            assertTrue("key expected to be part of commit 1 or commit2", keys2.contains(recordKey) || keys1.contains(recordKey));
+        assertEquals("All keys added in commit 2 must be updated in commit3 correctly", 0,
+            keys2.size());
+      } else {
+        assertEquals("New file must be written for commit 3", commitTime3, file.getCommitTime());
+        records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
+        for (GenericRecord record : records) {
+          String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+          assertEquals("only expect commit3", commitTime3,
+              record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString());
+          assertTrue("key expected to be part of commit3", keys3.contains(recordKey));
         }
+        numTotalInsertsInCommit3 += records.size();
+      }
+    }
+    assertEquals("Total inserts in commit3 must add up", keys3.size(), numTotalInsertsInCommit3);
+  }
 
-        // Lots of inserts such that file1 is updated and expanded, a new file2 is created.
-        String commitTime3 = "003";
-        client.startCommitWithTime(commitTime3);
-        List<HoodieRecord> insert3 = dataGen.generateInserts(commitTime3, 200);
-        JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1);
-        statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
-        assertNoWriteErrors(statuses);
-        assertEquals("2 files needs to be committed.", 2, statuses.size());
+  @Test
+  public void testSmallInsertHandlingForInserts() throws Exception {
 
+    final String TEST_PARTITION_PATH = "2016/09/26";
+    final int INSERT_SPLIT_LIMIT = 100;
+    // setup the small file handling params
+    HoodieWriteConfig config = getSmallInsertWriteConfig(
+        INSERT_SPLIT_LIMIT); // hold upto 200 records max
+    dataGen = new HoodieTestDataGenerator(new String[]{TEST_PARTITION_PATH});
+    HoodieWriteClient client = new HoodieWriteClient(jsc, config);
 
-        FileSystem fs = FSUtils.getFs();
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
-        List<HoodieDataFile> files =
-            table.getROFileSystemView().getLatestDataFilesBeforeOrOn(TEST_PARTITION_PATH, commitTime3)
-                .collect(Collectors.toList());
-        assertEquals("Total of 2 valid data files", 2, files.size());
+    // Inserts => will write file1
+    String commitTime1 = "001";
+    client.startCommitWithTime(commitTime1);
+    List<HoodieRecord> inserts1 = dataGen
+        .generateInserts(commitTime1, INSERT_SPLIT_LIMIT); // this writes ~500kb
+    Set<String> keys1 = HoodieClientTestUtils.getRecordKeys(inserts1);
+    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(inserts1, 1);
+    List<WriteStatus> statuses = client.insert(insertRecordsRDD1, commitTime1).collect();
 
+    assertNoWriteErrors(statuses);
+    assertPartitionMetadata(new String[]{TEST_PARTITION_PATH}, FSUtils.getFs());
 
-        int totalInserts = 0;
-        for (HoodieDataFile file:  files) {
-            assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime());
-            records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
-            totalInserts += records.size();
+    assertEquals("Just 1 file needs to be added.", 1, statuses.size());
+    String file1 = statuses.get(0).getFileId();
+    assertEquals("file should contain 100 records",
+        ParquetUtils.readRowKeysFromParquet(new Path(basePath,
+            TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime1, 0, file1))).size(),
+        100);
+
+    // Second, set of Inserts should just expand file1
+    String commitTime2 = "002";
+    client.startCommitWithTime(commitTime2);
+    List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, 40);
+    Set<String> keys2 = HoodieClientTestUtils.getRecordKeys(inserts2);
+    JavaRDD<HoodieRecord> insertRecordsRDD2 = jsc.parallelize(inserts2, 1);
+    statuses = client.insert(insertRecordsRDD2, commitTime2).collect();
+    assertNoWriteErrors(statuses);
+
+    assertEquals("Just 1 file needs to be updated.", 1, statuses.size());
+    assertEquals("Existing file should be expanded", file1, statuses.get(0).getFileId());
+    assertEquals("Existing file should be expanded", commitTime1,
+        statuses.get(0).getStat().getPrevCommit());
+    Path newFile = new Path(basePath,
+        TEST_PARTITION_PATH + "/" + FSUtils.makeDataFileName(commitTime2, 0, file1));
+    assertEquals("file should contain 140 records",
+        ParquetUtils.readRowKeysFromParquet(newFile).size(), 140);
+
+    List<GenericRecord> records = ParquetUtils.readAvroRecords(newFile);
+    for (GenericRecord record : records) {
+      String recordKey = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+      String recCommitTime = record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD).toString();
+      assertTrue("Record expected to be part of commit 1 or commit2",
+          commitTime1.equals(recCommitTime) || commitTime2.equals(recCommitTime));
+      assertTrue("key expected to be part of commit 1 or commit2",
+          keys2.contains(recordKey) || keys1.contains(recordKey));
+    }
+
+    // Lots of inserts such that file1 is updated and expanded, a new file2 is created.
+    String commitTime3 = "003";
+    client.startCommitWithTime(commitTime3);
+    List<HoodieRecord> insert3 = dataGen.generateInserts(commitTime3, 200);
+    JavaRDD<HoodieRecord> insertRecordsRDD3 = jsc.parallelize(insert3, 1);
+    statuses = client.insert(insertRecordsRDD3, commitTime3).collect();
+    assertNoWriteErrors(statuses);
+    assertEquals("2 files needs to be committed.", 2, statuses.size());
+
+    FileSystem fs = FSUtils.getFs();
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
+    List<HoodieDataFile> files =
+        table.getROFileSystemView().getLatestDataFilesBeforeOrOn(TEST_PARTITION_PATH, commitTime3)
+            .collect(Collectors.toList());
+    assertEquals("Total of 2 valid data files", 2, files.size());
+
+    int totalInserts = 0;
+    for (HoodieDataFile file : files) {
+      assertEquals("All files must be at commit 3", commitTime3, file.getCommitTime());
+      records = ParquetUtils.readAvroRecords(new Path(file.getPath()));
+      totalInserts += records.size();
+    }
+    assertEquals("Total number of records must add up", totalInserts,
+        inserts1.size() + inserts2.size() + insert3.size());
+  }
+
+  @Test
+  public void testKeepLatestFileVersions() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withAssumeDatePartitioning(true)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
+            .retainFileVersions(1).build()).build();
+
+    // make 1 commit, with 1 file per partition
+    HoodieTestUtils.createCommitFiles(basePath, "000");
+
+    String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
+    String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "000");
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsOne, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsOne, partitionPaths[1]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
+
+    // make next commit, with 1 insert & 1 update per partition
+    HoodieTestUtils.createCommitFiles(basePath, "001");
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    String file2P0C1 = HoodieTestUtils
+        .createNewDataFile(basePath, partitionPaths[0], "001"); // insert
+    String file2P1C1 = HoodieTestUtils
+        .createNewDataFile(basePath, partitionPaths[1], "001"); // insert
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[1], "001", file1P1C0); // update
+
+    List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
+    assertEquals("Must clean 1 file", 1,
+        getCleanStat(hoodieCleanStatsTwo, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertEquals("Must clean 1 file", 1,
+        getCleanStat(hoodieCleanStatsTwo, partitionPaths[1]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "001", file2P1C1));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
+
+    // make next commit, with 2 updates to existing files, and 1 insert
+    HoodieTestUtils.createCommitFiles(basePath, "002");
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file2P0C1); // update
+    String file3P0C2 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "002");
+
+    List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
+    assertEquals("Must clean two files", 2,
+        getCleanStat(hoodieCleanStatsThree, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
+
+    // No cleaning on partially written file, with no commit.
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file3P0C2); // update
+    List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsFour, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
+  }
+
+  @Test
+  public void testKeepLatestFileVersionsMOR() throws IOException {
+
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withAssumeDatePartitioning(true)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
+            .retainFileVersions(1).build()).build();
+
+    HoodieTableMetaClient metaClient = HoodieTestUtils
+        .initTableType(basePath, HoodieTableType.MERGE_ON_READ);
+
+    // Make 3 files, one base file and 2 log files associated with base file
+    String file1P0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
+    String file2P0L0 = HoodieTestUtils
+        .createNewLogFile(basePath, partitionPaths[0], "000", file1P0, Optional.empty());
+    String file2P0L1 = HoodieTestUtils
+        .createNewLogFile(basePath, partitionPaths[0], "000", file1P0, Optional.of(2));
+    // make 1 compaction commit
+    HoodieTestUtils.createCompactionCommitFiles(basePath, "000");
+
+    // Make 4 files, one base file and 3 log files associated with base file
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0);
+    file2P0L0 = HoodieTestUtils
+        .createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.empty());
+    file2P0L0 = HoodieTestUtils
+        .createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.of(2));
+    file2P0L0 = HoodieTestUtils
+        .createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.of(3));
+    // make 1 compaction commit
+    HoodieTestUtils.createCompactionCommitFiles(basePath, "001");
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(metaClient, config);
+    List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
+    assertEquals("Must clean three files, one parquet and 2 log files", 3,
+        getCleanStat(hoodieCleanStats, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0));
+    assertFalse(HoodieTestUtils
+        .doesLogFileExist(basePath, partitionPaths[0], "000", file2P0L0, Optional.empty()));
+    assertFalse(HoodieTestUtils
+        .doesLogFileExist(basePath, partitionPaths[0], "000", file2P0L0, Optional.of(2)));
+  }
+
+  @Test
+  public void testKeepLatestCommits() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withAssumeDatePartitioning(true)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(2).build()).build();
+
+    // make 1 commit, with 1 file per partition
+    HoodieTestUtils.createCommitFiles(basePath, "000");
+
+    String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
+    String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "000");
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsOne, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsOne, partitionPaths[1]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
+
+    // make next commit, with 1 insert & 1 update per partition
+    HoodieTestUtils.createCommitFiles(basePath, "001");
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    String file2P0C1 = HoodieTestUtils
+        .createNewDataFile(basePath, partitionPaths[0], "001"); // insert
+    String file2P1C1 = HoodieTestUtils
+        .createNewDataFile(basePath, partitionPaths[1], "001"); // insert
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[1], "001", file1P1C0); // update
+
+    List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsTwo, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsTwo, partitionPaths[1]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "001", file2P1C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
+
+    // make next commit, with 2 updates to existing files, and 1 insert
+    HoodieTestUtils.createCommitFiles(basePath, "002");
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file2P0C1); // update
+    String file3P0C2 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "002");
+
+    List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
+    assertEquals(
+        "Must not clean any file. We have to keep 1 version before the latest commit time to keep",
+        0, getCleanStat(hoodieCleanStatsThree, partitionPaths[0]).getSuccessDeleteFiles().size());
+
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+
+    // make next commit, with 2 updates to existing files, and 1 insert
+    HoodieTestUtils.createCommitFiles(basePath, "003");
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file1P0C0); // update
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file2P0C1); // update
+    String file4P0C3 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "003");
+
+    List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
+    assertEquals(
+        "Must not clean one old file", 1,
+        getCleanStat(hoodieCleanStatsFour, partitionPaths[0]).getSuccessDeleteFiles().size());
+
+    assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file2P0C1));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "003", file4P0C3));
+
+    // No cleaning on partially written file, with no commit.
+    HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "004", file3P0C2); // update
+    List<HoodieCleanStat> hoodieCleanStatsFive = table.clean(jsc);
+    assertEquals("Must not clean any files", 0,
+        getCleanStat(hoodieCleanStatsFive, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
+    assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+  }
+
+  @Test
+  public void testCleaningWithZeroPartitonPaths() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withAssumeDatePartitioning(true)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(2).build()).build();
+
+    // Make a commit, although there are no partitionPaths.
+    // Example use-case of this is when a client wants to create a table
+    // with just some commit metadata, but no data/partitionPaths.
+    HoodieTestUtils.createCommitFiles(basePath, "000");
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+
+    List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
+    assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths",
+        hoodieCleanStatsOne.isEmpty());
+  }
+
+  @Test
+  public void testCleaningSkewedPartitons() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withAssumeDatePartitioning(true)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .retainCommits(2).build()).build();
+    Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();
+
+    // Since clean involves repartition in order to uniformly distribute data,
+    // we can inspect the number of records read by various tasks in stage 1.
+    // There should not be skew in the number of records read in the task.
+
+    // SparkListener below listens to the stage end events and captures number of
+    // records read by various tasks in stage-1.
+    jsc.sc().addSparkListener(new SparkListener() {
+
+      @Override
+      public void onTaskEnd(SparkListenerTaskEnd taskEnd) {
+
+        Iterator<AccumulatorV2<?, ?>> iterator = taskEnd.taskMetrics().accumulators()
+            .iterator();
+        while (iterator.hasNext()) {
+          AccumulatorV2 accumulator = iterator.next();
+          if (taskEnd.stageId() == 1 &&
+              accumulator.isRegistered() &&
+              accumulator.name().isDefined() &&
+              accumulator.name().get().equals("internal.metrics.shuffle.read.recordsRead")) {
+            stageOneShuffleReadTaskRecordsCountMap
+                .put(taskEnd.taskInfo().taskId(), (Long) accumulator.value());
+          }
         }
-        assertEquals("Total number of records must add up", totalInserts, inserts1.size() + inserts2.size() + insert3.size());
+      }
+    });
+
+    // make 1 commit, with 100 files in one partition and 10 in other two
+    HoodieTestUtils.createCommitFiles(basePath, "000");
+    List<String> filesP0C0 = createFilesInPartition(partitionPaths[0], "000", 100);
+    List<String> filesP1C0 = createFilesInPartition(partitionPaths[1], "000", 10);
+    List<String> filesP2C0 = createFilesInPartition(partitionPaths[2], "000", 10);
+
+    HoodieTestUtils.createCommitFiles(basePath, "001");
+    updateAllFilesInPartition(filesP0C0, partitionPaths[0], "001");
+    updateAllFilesInPartition(filesP1C0, partitionPaths[1], "001");
+    updateAllFilesInPartition(filesP2C0, partitionPaths[2], "001");
+
+    HoodieTestUtils.createCommitFiles(basePath, "002");
+    updateAllFilesInPartition(filesP0C0, partitionPaths[0], "002");
+    updateAllFilesInPartition(filesP1C0, partitionPaths[1], "002");
+    updateAllFilesInPartition(filesP2C0, partitionPaths[2], "002");
+
+    HoodieTestUtils.createCommitFiles(basePath, "003");
+    updateAllFilesInPartition(filesP0C0, partitionPaths[0], "003");
+    updateAllFilesInPartition(filesP1C0, partitionPaths[1], "003");
+    updateAllFilesInPartition(filesP2C0, partitionPaths[2], "003");
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
+            config);
+    List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
+
+    assertEquals(100,
+        getCleanStat(hoodieCleanStats, partitionPaths[0]).getSuccessDeleteFiles().size());
+    assertEquals(10,
+        getCleanStat(hoodieCleanStats, partitionPaths[1]).getSuccessDeleteFiles().size());
+    assertEquals(10,
+        getCleanStat(hoodieCleanStats, partitionPaths[2]).getSuccessDeleteFiles().size());
+
+    // 3 tasks are expected since the number of partitions is 3
+    assertEquals(3, stageOneShuffleReadTaskRecordsCountMap.keySet().size());
+    // Sum of all records processed = total number of files to clean
+    assertEquals(120, stageOneShuffleReadTaskRecordsCountMap
+        .values().stream().reduce((a, b) -> a + b).get().intValue());
+    assertTrue("The skew in handling files to clean is not removed. "
+            + "Each task should handle more records than the partitionPath with least files "
+            + "and less records than the partitionPath with most files.",
+        stageOneShuffleReadTaskRecordsCountMap.values().stream().filter(a -> a > 10 && a < 100)
+            .count() == 3);
+  }
+
+  public void testCommitWritesRelativePaths() throws Exception {
+
+    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+    FileSystem fs = FSUtils.getFs();
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg);
+
+    String commitTime = "000";
+    client.startCommitWithTime(commitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
+
+    assertTrue("Commit should succeed", client.commit(commitTime, result));
+    assertTrue("After explicit commit, commit file should be created",
+        HoodieTestUtils.doesCommitExist(basePath, commitTime));
+
+    // Get parquet file paths from commit metadata
+    String actionType = table.getCompactedCommitActionType();
+    HoodieInstant commitInstant =
+        new HoodieInstant(false, actionType, commitTime);
+    HoodieTimeline commitTimeline = table.getCompletedCompactionCommitTimeline();
+    HoodieCommitMetadata commitMetadata =
+        HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commitInstant).get());
+    String basePath = table.getMetaClient().getBasePath();
+    Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();
+
+    // Read from commit file
+    String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
+    FileInputStream inputStream = new FileInputStream(filename);
+    String everything = IOUtils.toString(inputStream);
+    HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything.toString());
+    HashMap<String, String> paths = metadata.getFileIdAndFullPaths(basePath);
+    inputStream.close();
+
+    // Compare values in both to make sure they are equal.
+    for (String pathName : paths.values()) {
+      assertTrue(commitPathNames.contains(pathName));
     }
+  }
 
-    @Test
-    public void testKeepLatestFileVersions() throws IOException {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withAssumeDatePartitioning(true)
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
-                .retainFileVersions(1).build()).build();
+  private HoodieCleanStat getCleanStat(List<HoodieCleanStat> hoodieCleanStatsTwo,
+      String partitionPath) {
+    return hoodieCleanStatsTwo.stream()
+        .filter(e -> e.getPartitionPath().equals(partitionPath))
+        .findFirst().get();
+  }
 
-        // make 1 commit, with 1 file per partition
-        HoodieTestUtils.createCommitFiles(basePath, "000");
-
-        String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
-        String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "000");
-        HoodieTable table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsOne, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsOne, partitionPaths[1]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
-
-        // make next commit, with 1 insert & 1 update per partition
-        HoodieTestUtils.createCommitFiles(basePath, "001");
-        table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        String file2P0C1 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "001"); // insert
-        String file2P1C1 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "001"); // insert
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0C0); // update
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[1], "001", file1P1C0); // update
-
-        List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
-        assertEquals("Must clean 1 file" , 1, getCleanStat(hoodieCleanStatsTwo, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertEquals("Must clean 1 file" , 1, getCleanStat(hoodieCleanStatsTwo, partitionPaths[1]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "001", file2P1C1));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
-
-        // make next commit, with 2 updates to existing files, and 1 insert
-        HoodieTestUtils.createCommitFiles(basePath, "002");
-        table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file1P0C0); // update
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file2P0C1); // update
-        String file3P0C2 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "002");
-
-        List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
-        assertEquals("Must clean two files" , 2, getCleanStat(hoodieCleanStatsThree, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
-
-        // No cleaning on partially written file, with no commit.
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file3P0C2); // update
-        List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsFour, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
+  private void updateAllFilesInPartition(List<String> files, String partitionPath,
+      String commitTime) throws IOException {
+    for (String fileId : files) {
+      HoodieTestUtils.createDataFile(basePath, partitionPath, commitTime, fileId);
     }
+  }
 
-    @Test
-    public void testKeepLatestFileVersionsMOR() throws IOException {
-
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-                .withAssumeDatePartitioning(true)
-                .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                        .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
-                        .retainFileVersions(1).build()).build();
-
-
-        HoodieTableMetaClient metaClient = HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
-
-        // Make 3 files, one base file and 2 log files associated with base file
-        String file1P0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
-        String file2P0L0 = HoodieTestUtils.createNewLogFile(basePath, partitionPaths[0], "000", file1P0, Optional.empty());
-        String file2P0L1 = HoodieTestUtils.createNewLogFile(basePath, partitionPaths[0], "000", file1P0, Optional.of(2));
-        // make 1 compaction commit
-        HoodieTestUtils.createCompactionCommitFiles(basePath, "000");
-
-        // Make 4 files, one base file and 3 log files associated with base file
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0);
-        file2P0L0 = HoodieTestUtils.createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.empty());
-        file2P0L0 = HoodieTestUtils.createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.of(2));
-        file2P0L0 = HoodieTestUtils.createNewLogFile(basePath, partitionPaths[0], "001", file1P0, Optional.of(3));
-        // make 1 compaction commit
-        HoodieTestUtils.createCompactionCommitFiles(basePath, "001");
-
-        HoodieTable table = HoodieTable
-                .getHoodieTable(metaClient, config);
-        List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
-        assertEquals("Must clean three files, one parquet and 2 log files" , 3, getCleanStat(hoodieCleanStats, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0));
-        assertFalse(HoodieTestUtils.doesLogFileExist(basePath, partitionPaths[0], "000", file2P0L0, Optional.empty()));
-        assertFalse(HoodieTestUtils.doesLogFileExist(basePath, partitionPaths[0], "000", file2P0L0, Optional.of(2)));
+  private List<String> createFilesInPartition(String partitionPath, String commitTime, int numFiles)
+      throws IOException {
+    List<String> files = new ArrayList<>();
+    for (int i = 0; i < numFiles; i++) {
+      files.add(HoodieTestUtils.createNewDataFile(basePath, partitionPath, commitTime));
     }
+    return files;
+  }
 
-    @Test
-    public void testKeepLatestCommits() throws IOException {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withAssumeDatePartitioning(true)
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-                .retainCommits(2).build()).build();
-
-        // make 1 commit, with 1 file per partition
-        HoodieTestUtils.createCommitFiles(basePath, "000");
-
-        String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "000");
-        String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "000");
-
-        HoodieTable table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsOne, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsOne, partitionPaths[1]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
-
-        // make next commit, with 1 insert & 1 update per partition
-        HoodieTestUtils.createCommitFiles(basePath, "001");
-        table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        String file2P0C1 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "001"); // insert
-        String file2P1C1 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[1], "001"); // insert
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "001", file1P0C0); // update
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[1], "001", file1P1C0); // update
-
-        List<HoodieCleanStat> hoodieCleanStatsTwo = table.clean(jsc);
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsTwo, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertEquals("Must not clean any files" , 0, getCleanStat(hoodieCleanStatsTwo, partitionPaths[1]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "001", file2P1C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[1], "000", file1P1C0));
-
-        // make next commit, with 2 updates to existing files, and 1 insert
-        HoodieTestUtils.createCommitFiles(basePath, "002");
-        table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file1P0C0); // update
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "002", file2P0C1); // update
-        String file3P0C2 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "002");
-
-        List<HoodieCleanStat> hoodieCleanStatsThree = table.clean(jsc);
-        assertEquals(
-            "Must not clean any file. We have to keep 1 version before the latest commit time to keep",
-            0,  getCleanStat(hoodieCleanStatsThree, partitionPaths[0]).getSuccessDeleteFiles().size());
-
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-
-        // make next commit, with 2 updates to existing files, and 1 insert
-        HoodieTestUtils.createCommitFiles(basePath, "003");
-        table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file1P0C0); // update
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "003", file2P0C1); // update
-        String file4P0C3 = HoodieTestUtils.createNewDataFile(basePath, partitionPaths[0], "003");
-
-        List<HoodieCleanStat> hoodieCleanStatsFour = table.clean(jsc);
-        assertEquals(
-            "Must not clean one old file", 1,  getCleanStat(hoodieCleanStatsFour, partitionPaths[0]).getSuccessDeleteFiles().size());
-
-        assertFalse(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "000", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file2P0C1));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "002", file3P0C2));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "003", file4P0C3));
-
-        // No cleaning on partially written file, with no commit.
-        HoodieTestUtils.createDataFile(basePath, partitionPaths[0], "004", file3P0C2); // update
-        List<HoodieCleanStat> hoodieCleanStatsFive = table.clean(jsc);
-        assertEquals("Must not clean any files" , 0,  getCleanStat(hoodieCleanStatsFive, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file1P0C0));
-        assertTrue(HoodieTestUtils.doesDataFileExist(basePath, partitionPaths[0], "001", file2P0C1));
+  @After
+  public void clean() {
+    if (basePath != null) {
+      new File(basePath).delete();
     }
-
-    @Test
-    public void testCleaningWithZeroPartitonPaths() throws IOException {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withAssumeDatePartitioning(true)
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-                .retainCommits(2).build()).build();
-
-        // Make a commit, although there are no partitionPaths.
-        // Example use-case of this is when a client wants to create a table
-        // with just some commit metadata, but no data/partitionPaths.
-        HoodieTestUtils.createCommitFiles(basePath, "000");
-
-        HoodieTable table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true),
-                config);
-
-        List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc);
-        assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths",
-            hoodieCleanStatsOne.isEmpty());
-    }
-
-    @Test
-    public void testCleaningSkewedPartitons() throws IOException {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withAssumeDatePartitioning(true)
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-                .retainCommits(2).build()).build();
-        Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();
-
-        // Since clean involves repartition in order to uniformly distribute data,
-        // we can inspect the number of records read by various tasks in stage 1.
-        // There should not be skew in the number of records read in the task.
-
-        // SparkListener below listens to the stage end events and captures number of
-        // records read by various tasks in stage-1.
-        jsc.sc().addSparkListener(new SparkListener() {
-
-            @Override
-            public void onTaskEnd(SparkListenerTaskEnd taskEnd) {
-
-                Iterator<AccumulatorV2<?, ?>> iterator = taskEnd.taskMetrics().accumulators()
-                    .iterator();
-                while(iterator.hasNext()) {
-                    AccumulatorV2 accumulator = iterator.next();
-                    if (taskEnd.stageId() == 1 &&
-                        accumulator.isRegistered() &&
-                        accumulator.name().isDefined() &&
-                        accumulator.name().get().equals("internal.metrics.shuffle.read.recordsRead")) {
-                        stageOneShuffleReadTaskRecordsCountMap.put(taskEnd.taskInfo().taskId(), (Long) accumulator.value());
-                    }
-                }
-            }
-        });
-
-        // make 1 commit, with 100 files in one partition and 10 in other two
-        HoodieTestUtils.createCommitFiles(basePath, "000");
-        List<String> filesP0C0 = createFilesInPartition(partitionPaths[0], "000", 100);
-        List<String> filesP1C0 = createFilesInPartition(partitionPaths[1], "000", 10);
-        List<String> filesP2C0 = createFilesInPartition(partitionPaths[2], "000", 10);
-
-        HoodieTestUtils.createCommitFiles(basePath, "001");
-        updateAllFilesInPartition(filesP0C0, partitionPaths[0], "001");
-        updateAllFilesInPartition(filesP1C0, partitionPaths[1], "001");
-        updateAllFilesInPartition(filesP2C0, partitionPaths[2], "001");
-
-        HoodieTestUtils.createCommitFiles(basePath, "002");
-        updateAllFilesInPartition(filesP0C0, partitionPaths[0], "002");
-        updateAllFilesInPartition(filesP1C0, partitionPaths[1], "002");
-        updateAllFilesInPartition(filesP2C0, partitionPaths[2], "002");
-
-        HoodieTestUtils.createCommitFiles(basePath, "003");
-        updateAllFilesInPartition(filesP0C0, partitionPaths[0], "003");
-        updateAllFilesInPartition(filesP1C0, partitionPaths[1], "003");
-        updateAllFilesInPartition(filesP2C0, partitionPaths[2], "003");
-
-        HoodieTable table = HoodieTable
-            .getHoodieTable(new HoodieTableMetaClient(FSUtils.getFs(), config.getBasePath(), true), config);
-        List<HoodieCleanStat> hoodieCleanStats = table.clean(jsc);
-
-        assertEquals(100,  getCleanStat(hoodieCleanStats, partitionPaths[0]).getSuccessDeleteFiles().size());
-        assertEquals(10, getCleanStat(hoodieCleanStats, partitionPaths[1]).getSuccessDeleteFiles().size());
-        assertEquals(10, getCleanStat(hoodieCleanStats, partitionPaths[2]).getSuccessDeleteFiles().size());
-
-        // 3 tasks are expected since the number of partitions is 3
-        assertEquals(3, stageOneShuffleReadTaskRecordsCountMap.keySet().size());
-        // Sum of all records processed = total number of files to clean
-        assertEquals(120, stageOneShuffleReadTaskRecordsCountMap
-            .values().stream().reduce((a,b) -> a + b).get().intValue());
-        assertTrue("The skew in handling files to clean is not removed. "
-                + "Each task should handle more records than the partitionPath with least files "
-                + "and less records than the partitionPath with most files.",
-            stageOneShuffleReadTaskRecordsCountMap.values().stream().filter(a -> a > 10 && a < 100).count() == 3);
-    }
-
-    public void testCommitWritesRelativePaths() throws Exception {
-
-        HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-        FileSystem fs = FSUtils.getFs();
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg);
-
-        String commitTime = "000";
-        client.startCommitWithTime(commitTime);
-
-        List<HoodieRecord> records = dataGen.generateInserts(commitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        JavaRDD<WriteStatus> result = client.bulkInsert(writeRecords, commitTime);
-
-        assertTrue("Commit should succeed", client.commit(commitTime, result));
-        assertTrue("After explicit commit, commit file should be created",
-                HoodieTestUtils.doesCommitExist(basePath, commitTime));
-
-        // Get parquet file paths from commit metadata
-        String actionType = table.getCompactedCommitActionType();
-        HoodieInstant commitInstant =
-                new HoodieInstant(false, actionType, commitTime);
-        HoodieTimeline commitTimeline = table.getCompletedCompactionCommitTimeline();
-        HoodieCommitMetadata commitMetadata =
-                HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commitInstant).get());
-        String basePath = table.getMetaClient().getBasePath();
-        Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();
-
-        // Read from commit file
-        String filename = HoodieTestUtils.getCommitFilePath(basePath, commitTime);
-        FileInputStream inputStream = new FileInputStream(filename);
-        String everything = IOUtils.toString(inputStream);
-        HoodieCommitMetadata metadata = HoodieCommitMetadata.fromJsonString(everything.toString());
-        HashMap<String, String> paths = metadata.getFileIdAndFullPaths(basePath);
-        inputStream.close();
-
-        // Compare values in both to make sure they are equal.
-        for (String pathName : paths.values()) {
-            assertTrue(commitPathNames.contains(pathName));
-        }
-    }
-
-    private HoodieCleanStat getCleanStat(List<HoodieCleanStat> hoodieCleanStatsTwo,
-        String partitionPath) {
-        return hoodieCleanStatsTwo.stream()
-            .filter(e -> e.getPartitionPath().equals(partitionPath))
-            .findFirst().get();
-    }
-
-    private void updateAllFilesInPartition(List<String> files, String partitionPath,
-        String commitTime) throws IOException {
-        for (String fileId : files) {
-            HoodieTestUtils.createDataFile(basePath, partitionPath, commitTime, fileId);
-        }
-    }
-
-    private List<String> createFilesInPartition(String partitionPath, String commitTime, int numFiles) throws IOException {
-        List<String> files = new ArrayList<>();
-        for (int i = 0; i < numFiles; i++) {
-            files.add(HoodieTestUtils.createNewDataFile(basePath, partitionPath, commitTime));
-        }
-        return files;
-    }
-
-    @After
-    public void clean() {
-        if (basePath != null) {
-            new File(basePath).delete();
-        }
-        if (jsc != null) {
-            jsc.stop();
-        }
+    if (jsc != null) {
+      jsc.stop();
     }
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieClientTestUtils.java b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieClientTestUtils.java
index 002b6cd20..c2db12d5e 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieClientTestUtils.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieClientTestUtils.java
@@ -29,15 +29,6 @@ import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.table.HoodieTable;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.spark.sql.Dataset;
-import org.apache.spark.sql.Row;
-import org.apache.spark.sql.SQLContext;
-
-import org.apache.spark.SparkConf;
-
 import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
@@ -49,6 +40,12 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.SparkConf;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SQLContext;
 
 /**
  * Utility methods to aid testing inside the HoodieClient module.
@@ -56,133 +53,142 @@ import java.util.stream.Collectors;
 public class HoodieClientTestUtils {
 
 
-    public static List<WriteStatus> collectStatuses(Iterator<List<WriteStatus>> statusListItr) {
-        List<WriteStatus> statuses = new ArrayList<>();
-        while (statusListItr.hasNext()) {
-            statuses.addAll(statusListItr.next());
-        }
-        return statuses;
-    }
-
-    public static Set<String> getRecordKeys(List<HoodieRecord> hoodieRecords) {
-        Set<String> keys = new HashSet<>();
-        for (HoodieRecord rec: hoodieRecords) {
-            keys.add(rec.getRecordKey());
-        }
-        return keys;
-    }
-
-    private static void fakeMetaFile(String basePath, String commitTime, String suffix) throws IOException {
-        String parentPath = basePath + "/"+ HoodieTableMetaClient.METAFOLDER_NAME;
-        new File(parentPath).mkdirs();
-        new File(parentPath + "/" + commitTime + suffix).createNewFile();
-    }
-
-
-    public static void fakeCommitFile(String basePath, String commitTime) throws IOException {
-        fakeMetaFile(basePath, commitTime, HoodieTimeline.COMMIT_EXTENSION);
-    }
-
-    public static void fakeInFlightFile(String basePath, String commitTime) throws IOException {
-        fakeMetaFile(basePath, commitTime, HoodieTimeline.INFLIGHT_EXTENSION);
-    }
-
-    public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId) throws Exception {
-        fakeDataFile(basePath, partitionPath, commitTime, fileId, 0);
-    }
-
-    public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId, long length) throws Exception {
-        String parentPath = String.format("%s/%s", basePath, partitionPath);
-        new File(parentPath).mkdirs();
-        String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, 0, fileId));
-        new File(path).createNewFile();
-        new RandomAccessFile(path, "rw").setLength(length);
-    }
-
-    public static SparkConf getSparkConfForTest(String appName) {
-        SparkConf sparkConf = new SparkConf()
-                .setAppName(appName)
-                .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-                .setMaster("local[1]");
-        return HoodieReadClient.addHoodieSupport(sparkConf);
-    }
-
-    public static HashMap<String, String> getLatestFileIDsToFullPath(String basePath,
-                                                                     HoodieTimeline commitTimeline,
-                                                                     List<HoodieInstant> commitsToReturn) throws IOException {
-        HashMap<String, String> fileIdToFullPath = new HashMap<>();
-        for (HoodieInstant commit : commitsToReturn) {
-            HoodieCommitMetadata metadata =
-                    HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get());
-            fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths(basePath));
-        }
-        return fileIdToFullPath;
-    }
-
-    public static Dataset<Row> readCommit(String basePath,
-                                          SQLContext sqlContext,
-                                          HoodieTimeline commitTimeline,
-                                          String commitTime) {
-        HoodieInstant commitInstant =
-                new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-        if (!commitTimeline.containsInstant(commitInstant)) {
-            new HoodieException("No commit exists at " + commitTime);
-        }
-        try {
-            HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline, Arrays.asList(commitInstant));
-            return sqlContext.read()
-                    .parquet(paths.values().toArray(new String[paths.size()]))
-                    .filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime));
-        } catch (Exception e) {
-            throw new HoodieException("Error reading commit " + commitTime, e);
-        }
-    }
-
-    /**
-     * Obtain all new data written into the Hoodie dataset since the given timestamp.
-     */
-    public static Dataset<Row> readSince(String basePath,
-                                         SQLContext sqlContext,
-                                         HoodieTimeline commitTimeline,
-                                         String lastCommitTime) {
-        List<HoodieInstant> commitsToReturn =
-                commitTimeline.findInstantsAfter(lastCommitTime, Integer.MAX_VALUE)
-                        .getInstants().collect(Collectors.toList());
-        try {
-            // Go over the commit metadata, and obtain the new files that need to be read.
-            HashMap<String, String> fileIdToFullPath = getLatestFileIDsToFullPath(basePath, commitTimeline, commitsToReturn);
-            return sqlContext.read()
-                    .parquet(fileIdToFullPath.values().toArray(new String[fileIdToFullPath.size()]))
-                    .filter(String.format("%s >'%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, lastCommitTime));
-        } catch (IOException e) {
-            throw new HoodieException("Error pulling data incrementally from commitTimestamp :" + lastCommitTime, e);
-        }
-    }
-
-    /**
-     * Reads the paths under the a hoodie dataset out as a DataFrame
-     */
-    public static Dataset<Row> read(String basePath,
-                                    SQLContext sqlContext,
-                                    FileSystem fs,
-                                    String... paths) {
-        List<String> filteredPaths = new ArrayList<>();
-        try {
-            HoodieTable hoodieTable = HoodieTable
-                    .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
-            for (String path : paths) {
-                TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(hoodieTable.getMetaClient(),
-                        hoodieTable.getCompletedCommitTimeline(), fs.globStatus(new Path(path)));
-                List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(
-                        Collectors.toList());
-                for (HoodieDataFile file : latestFiles) {
-                    filteredPaths.add(file.getPath());
-                }
-            }
-            return sqlContext.read()
-                    .parquet(filteredPaths.toArray(new String[filteredPaths.size()]));
-        } catch (Exception e) {
-            throw new HoodieException("Error reading hoodie dataset as a dataframe", e);
+  public static List<WriteStatus> collectStatuses(Iterator<List<WriteStatus>> statusListItr) {
+    List<WriteStatus> statuses = new ArrayList<>();
+    while (statusListItr.hasNext()) {
+      statuses.addAll(statusListItr.next());
+    }
+    return statuses;
+  }
+
+  public static Set<String> getRecordKeys(List<HoodieRecord> hoodieRecords) {
+    Set<String> keys = new HashSet<>();
+    for (HoodieRecord rec : hoodieRecords) {
+      keys.add(rec.getRecordKey());
+    }
+    return keys;
+  }
+
+  private static void fakeMetaFile(String basePath, String commitTime, String suffix)
+      throws IOException {
+    String parentPath = basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME;
+    new File(parentPath).mkdirs();
+    new File(parentPath + "/" + commitTime + suffix).createNewFile();
+  }
+
+
+  public static void fakeCommitFile(String basePath, String commitTime) throws IOException {
+    fakeMetaFile(basePath, commitTime, HoodieTimeline.COMMIT_EXTENSION);
+  }
+
+  public static void fakeInFlightFile(String basePath, String commitTime) throws IOException {
+    fakeMetaFile(basePath, commitTime, HoodieTimeline.INFLIGHT_EXTENSION);
+  }
+
+  public static void fakeDataFile(String basePath, String partitionPath, String commitTime,
+      String fileId) throws Exception {
+    fakeDataFile(basePath, partitionPath, commitTime, fileId, 0);
+  }
+
+  public static void fakeDataFile(String basePath, String partitionPath, String commitTime,
+      String fileId, long length) throws Exception {
+    String parentPath = String.format("%s/%s", basePath, partitionPath);
+    new File(parentPath).mkdirs();
+    String path = String
+        .format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, 0, fileId));
+    new File(path).createNewFile();
+    new RandomAccessFile(path, "rw").setLength(length);
+  }
+
+  public static SparkConf getSparkConfForTest(String appName) {
+    SparkConf sparkConf = new SparkConf()
+        .setAppName(appName)
+        .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+        .setMaster("local[1]");
+    return HoodieReadClient.addHoodieSupport(sparkConf);
+  }
+
+  public static HashMap<String, String> getLatestFileIDsToFullPath(String basePath,
+      HoodieTimeline commitTimeline,
+      List<HoodieInstant> commitsToReturn) throws IOException {
+    HashMap<String, String> fileIdToFullPath = new HashMap<>();
+    for (HoodieInstant commit : commitsToReturn) {
+      HoodieCommitMetadata metadata =
+          HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get());
+      fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths(basePath));
+    }
+    return fileIdToFullPath;
+  }
+
+  public static Dataset<Row> readCommit(String basePath,
+      SQLContext sqlContext,
+      HoodieTimeline commitTimeline,
+      String commitTime) {
+    HoodieInstant commitInstant =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
+    if (!commitTimeline.containsInstant(commitInstant)) {
+      new HoodieException("No commit exists at " + commitTime);
+    }
+    try {
+      HashMap<String, String> paths = getLatestFileIDsToFullPath(basePath, commitTimeline,
+          Arrays.asList(commitInstant));
+      return sqlContext.read()
+          .parquet(paths.values().toArray(new String[paths.size()]))
+          .filter(String.format("%s ='%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime));
+    } catch (Exception e) {
+      throw new HoodieException("Error reading commit " + commitTime, e);
+    }
+  }
+
+  /**
+   * Obtain all new data written into the Hoodie dataset since the given timestamp.
+   */
+  public static Dataset<Row> readSince(String basePath,
+      SQLContext sqlContext,
+      HoodieTimeline commitTimeline,
+      String lastCommitTime) {
+    List<HoodieInstant> commitsToReturn =
+        commitTimeline.findInstantsAfter(lastCommitTime, Integer.MAX_VALUE)
+            .getInstants().collect(Collectors.toList());
+    try {
+      // Go over the commit metadata, and obtain the new files that need to be read.
+      HashMap<String, String> fileIdToFullPath = getLatestFileIDsToFullPath(basePath,
+          commitTimeline, commitsToReturn);
+      return sqlContext.read()
+          .parquet(fileIdToFullPath.values().toArray(new String[fileIdToFullPath.size()]))
+          .filter(
+              String.format("%s >'%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, lastCommitTime));
+    } catch (IOException e) {
+      throw new HoodieException(
+          "Error pulling data incrementally from commitTimestamp :" + lastCommitTime, e);
+    }
+  }
+
+  /**
+   * Reads the paths under the a hoodie dataset out as a DataFrame
+   */
+  public static Dataset<Row> read(String basePath,
+      SQLContext sqlContext,
+      FileSystem fs,
+      String... paths) {
+    List<String> filteredPaths = new ArrayList<>();
+    try {
+      HoodieTable hoodieTable = HoodieTable
+          .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
+      for (String path : paths) {
+        TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView(
+            hoodieTable.getMetaClient(),
+            hoodieTable.getCompletedCommitTimeline(), fs.globStatus(new Path(path)));
+        List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(
+            Collectors.toList());
+        for (HoodieDataFile file : latestFiles) {
+          filteredPaths.add(file.getPath());
         }
+      }
+      return sqlContext.read()
+          .parquet(filteredPaths.toArray(new String[filteredPaths.size()]));
+    } catch (Exception e) {
+      throw new HoodieException("Error reading hoodie dataset as a dataframe", e);
     }
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieMergeOnReadTestUtils.java b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieMergeOnReadTestUtils.java
index 4b2424eb7..ec3d5728c 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieMergeOnReadTestUtils.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieMergeOnReadTestUtils.java
@@ -16,9 +16,16 @@
 
 package com.uber.hoodie.common;
 
+import static com.uber.hoodie.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.GenericRecordBuilder;
@@ -30,69 +37,64 @@ import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import static com.uber.hoodie.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
-
 /**
  * Utility methods to aid in testing MergeOnRead (workaround for HoodieReadClient for MOR)
  */
 public class HoodieMergeOnReadTestUtils {
 
-    public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths) throws IOException {
-        JobConf jobConf = new JobConf();
-        Schema schema = HoodieAvroUtils.addMetadataFields(Schema.parse(TRIP_EXAMPLE_SCHEMA));
-        HoodieRealtimeInputFormat inputFormat = new HoodieRealtimeInputFormat();
-        setPropsForInputFormat(inputFormat, jobConf, schema);
-        return inputPaths.stream().map(path -> {
-            setInputPath(jobConf, path);
-            List<GenericRecord> records = new ArrayList<>();
-            try {
-                List<InputSplit> splits = Arrays.asList(inputFormat.getSplits(jobConf, 1));
-                RecordReader recordReader = inputFormat.getRecordReader(splits.get(0), jobConf, null);
-                Void key = (Void) recordReader.createKey();
-                ArrayWritable writable = (ArrayWritable) recordReader.createValue();
-                while (recordReader.next(key, writable)) {
-                    GenericRecordBuilder newRecord = new GenericRecordBuilder(schema);
-                    // writable returns an array with [field1, field2, _hoodie_commit_time, _hoodie_commit_seqno]
-                    Writable[] values = writable.get();
-                    schema.getFields().forEach(field -> {
-                        newRecord.set(field, values[2]);
-                    });
-                    records.add(newRecord.build());
-                }
-            } catch (IOException ie) {
-                ie.printStackTrace();
-            }
-            return records;
-        }).reduce((a, b) -> {
-            a.addAll(b);
-            return a;
-        }).get();
-    }
+  public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths)
+      throws IOException {
+    JobConf jobConf = new JobConf();
+    Schema schema = HoodieAvroUtils.addMetadataFields(Schema.parse(TRIP_EXAMPLE_SCHEMA));
+    HoodieRealtimeInputFormat inputFormat = new HoodieRealtimeInputFormat();
+    setPropsForInputFormat(inputFormat, jobConf, schema);
+    return inputPaths.stream().map(path -> {
+      setInputPath(jobConf, path);
+      List<GenericRecord> records = new ArrayList<>();
+      try {
+        List<InputSplit> splits = Arrays.asList(inputFormat.getSplits(jobConf, 1));
+        RecordReader recordReader = inputFormat.getRecordReader(splits.get(0), jobConf, null);
+        Void key = (Void) recordReader.createKey();
+        ArrayWritable writable = (ArrayWritable) recordReader.createValue();
+        while (recordReader.next(key, writable)) {
+          GenericRecordBuilder newRecord = new GenericRecordBuilder(schema);
+          // writable returns an array with [field1, field2, _hoodie_commit_time, _hoodie_commit_seqno]
+          Writable[] values = writable.get();
+          schema.getFields().forEach(field -> {
+            newRecord.set(field, values[2]);
+          });
+          records.add(newRecord.build());
+        }
+      } catch (IOException ie) {
+        ie.printStackTrace();
+      }
+      return records;
+    }).reduce((a, b) -> {
+      a.addAll(b);
+      return a;
+    }).get();
+  }
 
-    private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf, Schema schema) {
-        List<Schema.Field> fields = schema.getFields();
-        String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
-        String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
-        Configuration conf = FSUtils.getFs().getConf();
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
-        jobConf.set("partition_columns", "datestr");
-        conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
-        conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
-        conf.set("partition_columns", "datestr");
-        inputFormat.setConf(conf);
-        jobConf.addResource(conf);
-    }
+  private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf,
+      Schema schema) {
+    List<Schema.Field> fields = schema.getFields();
+    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
+    String postions = fields.stream().map(f -> String.valueOf(f.pos()))
+        .collect(Collectors.joining(","));
+    Configuration conf = FSUtils.getFs().getConf();
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
+    jobConf.set("partition_columns", "datestr");
+    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
+    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
+    conf.set("partition_columns", "datestr");
+    inputFormat.setConf(conf);
+    jobConf.addResource(conf);
+  }
 
-    private static void setInputPath(JobConf jobConf, String inputPath) {
-        jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
-        jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
-        jobConf.set("map.input.dir", inputPath);
-    }
+  private static void setInputPath(JobConf jobConf, String inputPath) {
+    jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
+    jobConf.set("mapreduce.input.fileinputformat.inputdir", inputPath);
+    jobConf.set("map.input.dir", inputPath);
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieTestDataGenerator.java b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieTestDataGenerator.java
index fe9c9fd49..c197e6b51 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieTestDataGenerator.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/common/HoodieTestDataGenerator.java
@@ -16,17 +16,21 @@
 
 package com.uber.hoodie.common;
 
-import com.uber.hoodie.avro.model.HoodieCleanMetadata;
-import com.uber.hoodie.common.model.HoodieCleaningPolicy;
 import com.uber.hoodie.common.model.HoodieCommitMetadata;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.common.util.AvroUtils;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Random;
+import java.util.UUID;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
@@ -34,15 +38,6 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Optional;
-import java.util.Random;
-import java.util.UUID;
-
 /**
  * Class to be used in tests to keep generating test inserts and updates against a corpus.
  *
@@ -51,153 +46,164 @@ import java.util.UUID;
 public class HoodieTestDataGenerator {
 
   static class KeyPartition {
-        HoodieKey key;
-        String partitionPath;
+
+    HoodieKey key;
+    String partitionPath;
+  }
+
+  public static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\","
+      + "\"name\": \"triprec\","
+      + "\"fields\": [ "
+      + "{\"name\": \"timestamp\",\"type\": \"double\"},"
+      + "{\"name\": \"_row_key\", \"type\": \"string\"},"
+      + "{\"name\": \"rider\", \"type\": \"string\"},"
+      + "{\"name\": \"driver\", \"type\": \"string\"},"
+      + "{\"name\": \"begin_lat\", \"type\": \"double\"},"
+      + "{\"name\": \"begin_lon\", \"type\": \"double\"},"
+      + "{\"name\": \"end_lat\", \"type\": \"double\"},"
+      + "{\"name\": \"end_lon\", \"type\": \"double\"},"
+      + "{\"name\":\"fare\",\"type\": \"double\"}]}";
+
+  // based on examination of sample file, the schema produces the following per record size
+  public static final int SIZE_PER_RECORD = 50 * 1024;
+
+  public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
+
+
+  public static void writePartitionMetadata(FileSystem fs, String[] partitionPaths,
+      String basePath) {
+    for (String partitionPath : partitionPaths) {
+      new HoodiePartitionMetadata(fs, "000", new Path(basePath), new Path(basePath, partitionPath))
+          .trySave(0);
     }
+  }
 
-    public static String TRIP_EXAMPLE_SCHEMA = "{\"type\": \"record\","
-            + "\"name\": \"triprec\","
-            + "\"fields\": [ "
-            + "{\"name\": \"timestamp\",\"type\": \"double\"},"
-            + "{\"name\": \"_row_key\", \"type\": \"string\"},"
-            + "{\"name\": \"rider\", \"type\": \"string\"},"
-            + "{\"name\": \"driver\", \"type\": \"string\"},"
-            + "{\"name\": \"begin_lat\", \"type\": \"double\"},"
-            + "{\"name\": \"begin_lon\", \"type\": \"double\"},"
-            + "{\"name\": \"end_lat\", \"type\": \"double\"},"
-            + "{\"name\": \"end_lon\", \"type\": \"double\"},"
-            + "{\"name\":\"fare\",\"type\": \"double\"}]}";
+  private List<KeyPartition> existingKeysList = new ArrayList<>();
+  public static Schema avroSchema = HoodieAvroUtils
+      .addMetadataFields(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
+  private static Random rand = new Random(46474747);
+  private String[] partitionPaths = DEFAULT_PARTITION_PATHS;
 
-    // based on examination of sample file, the schema produces the following per record size
-    public static final int SIZE_PER_RECORD = 50 * 1024;
+  public HoodieTestDataGenerator(String[] partitionPaths) {
+    this.partitionPaths = partitionPaths;
+  }
 
-    public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
+  public HoodieTestDataGenerator() {
+    this(new String[]{"2016/03/15", "2015/03/16", "2015/03/17"});
+  }
 
 
-    public static void writePartitionMetadata(FileSystem fs, String[] partitionPaths, String basePath) {
-        for (String partitionPath: partitionPaths) {
-            new HoodiePartitionMetadata(fs, "000", new Path(basePath), new Path(basePath, partitionPath)).trySave(0);
-        }
+  /**
+   * Generates new inserts, uniformly across the partition paths above. It also updates the list of
+   * existing keys.
+   */
+  public List<HoodieRecord> generateInserts(String commitTime, int n) throws IOException {
+    List<HoodieRecord> inserts = new ArrayList<>();
+    for (int i = 0; i < n; i++) {
+      String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
+      HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
+      HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
+      inserts.add(record);
+
+      KeyPartition kp = new KeyPartition();
+      kp.key = key;
+      kp.partitionPath = partitionPath;
+      existingKeysList.add(kp);
     }
+    return inserts;
+  }
 
-    private List<KeyPartition> existingKeysList = new ArrayList<>();
-    public static Schema avroSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
-    private static Random rand = new Random(46474747);
-    private String[] partitionPaths = DEFAULT_PARTITION_PATHS;
+  public List<HoodieRecord> generateDeletes(String commitTime, int n) throws IOException {
+    List<HoodieRecord> inserts = generateInserts(commitTime, n);
+    return generateDeletesFromExistingRecords(inserts);
+  }
+
+  public List<HoodieRecord> generateDeletesFromExistingRecords(List<HoodieRecord> existingRecords)
+      throws IOException {
+    List<HoodieRecord> deletes = new ArrayList<>();
+    for (HoodieRecord existingRecord : existingRecords) {
+      HoodieRecord record = generateDeleteRecord(existingRecord);
+      deletes.add(record);
 
-    public HoodieTestDataGenerator(String[] partitionPaths) {
-        this.partitionPaths = partitionPaths;
     }
+    return deletes;
+  }
 
-    public HoodieTestDataGenerator() {
-        this(new String[]{"2016/03/15", "2015/03/16", "2015/03/17"});
+  public HoodieRecord generateDeleteRecord(HoodieRecord existingRecord) throws IOException {
+    HoodieKey key = existingRecord.getKey();
+    TestRawTripPayload payload = new TestRawTripPayload(Optional.empty(), key.getRecordKey(),
+        key.getPartitionPath(), null, true);
+    return new HoodieRecord(key, payload);
+  }
+
+  public List<HoodieRecord> generateUpdates(String commitTime, List<HoodieRecord> baseRecords)
+      throws IOException {
+    List<HoodieRecord> updates = new ArrayList<>();
+    for (HoodieRecord baseRecord : baseRecords) {
+      HoodieRecord record = new HoodieRecord(baseRecord.getKey(),
+          generateRandomValue(baseRecord.getKey(), commitTime));
+      updates.add(record);
     }
+    return updates;
+  }
 
-
-    /**
-     * Generates new inserts, uniformly across the partition paths above. It also updates the list
-     * of existing keys.
-     */
-    public List<HoodieRecord> generateInserts(String commitTime, int n) throws IOException {
-        List<HoodieRecord> inserts = new ArrayList<>();
-        for (int i = 0; i < n; i++) {
-            String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
-            HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
-            HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, commitTime));
-            inserts.add(record);
-
-            KeyPartition kp = new KeyPartition();
-            kp.key = key;
-            kp.partitionPath = partitionPath;
-            existingKeysList.add(kp);
-        }
-        return inserts;
+  /**
+   * Generates new updates, randomly distributed across the keys above.
+   */
+  public List<HoodieRecord> generateUpdates(String commitTime, int n) throws IOException {
+    List<HoodieRecord> updates = new ArrayList<>();
+    for (int i = 0; i < n; i++) {
+      KeyPartition kp = existingKeysList.get(rand.nextInt(existingKeysList.size() - 1));
+      HoodieRecord record = new HoodieRecord(kp.key, generateRandomValue(kp.key, commitTime));
+      updates.add(record);
     }
+    return updates;
+  }
 
-    public List<HoodieRecord> generateDeletes(String commitTime, int n) throws IOException {
-        List<HoodieRecord> inserts = generateInserts(commitTime, n);
-        return generateDeletesFromExistingRecords(inserts);
-    }
-
-    public List<HoodieRecord> generateDeletesFromExistingRecords(List<HoodieRecord> existingRecords) throws IOException {
-        List<HoodieRecord> deletes = new ArrayList<>();
-        for (HoodieRecord existingRecord: existingRecords) {
-            HoodieRecord record = generateDeleteRecord(existingRecord);
-            deletes.add(record);
-
-        }
-        return deletes;
-    }
-
-    public HoodieRecord generateDeleteRecord(HoodieRecord existingRecord) throws IOException  {
-        HoodieKey key = existingRecord.getKey();
-        TestRawTripPayload payload = new TestRawTripPayload(Optional.empty(), key.getRecordKey(), key.getPartitionPath(), null, true);
-        return new HoodieRecord(key, payload);
-    }
-
-    public List<HoodieRecord> generateUpdates(String commitTime, List<HoodieRecord> baseRecords) throws IOException {
-        List<HoodieRecord> updates = new ArrayList<>();
-        for (HoodieRecord baseRecord: baseRecords) {
-            HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateRandomValue(baseRecord.getKey(), commitTime));
-            updates.add(record);
-        }
-        return updates;
-    }
-
-    /**
-     * Generates new updates, randomly distributed across the keys above.
-     */
-    public List<HoodieRecord> generateUpdates(String commitTime, int n) throws IOException {
-        List<HoodieRecord> updates = new ArrayList<>();
-        for (int i = 0; i < n; i++) {
-            KeyPartition kp = existingKeysList.get(rand.nextInt(existingKeysList.size() - 1));
-            HoodieRecord record = new HoodieRecord(kp.key, generateRandomValue(kp.key, commitTime));
-            updates.add(record);
-        }
-        return updates;
-    }
-
-
-    /**
-     * Generates a new avro record of the above schema format, retaining the key if optionally
-     * provided.
-     */
-    public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime) throws IOException {
-        GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime,
-            "driver-" + commitTime, 0.0);
-        HoodieAvroUtils.addCommitMetadataToRecord(rec, commitTime, "-1");
-        return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
-    }
-
-    public static GenericRecord generateGenericRecord(String rowKey, String riderName,
-        String driverName, double timestamp) {
-        GenericRecord rec = new GenericData.Record(avroSchema);
-        rec.put("_row_key", rowKey);
-        rec.put("timestamp", timestamp);
-        rec.put("rider", riderName);
-        rec.put("driver", driverName);
-        rec.put("begin_lat", rand.nextDouble());
-        rec.put("begin_lon", rand.nextDouble());
-        rec.put("end_lat", rand.nextDouble());
-        rec.put("end_lon", rand.nextDouble());
-        rec.put("fare", rand.nextDouble() * 100);
-        return rec;
-    }
-
-    public static void createCommitFile(String basePath, String commitTime) throws IOException {
-        Path commitFile =
-            new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCommitFileName(commitTime));
-        FileSystem fs = FSUtils.getFs();
-        FSDataOutputStream os = fs.create(commitFile, true);
-        HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
-        try {
-            // Write empty commit metadata
-            os.writeBytes(new String(commitMetadata.toJsonString().getBytes(
-                StandardCharsets.UTF_8)));
-        } finally {
-            os.close();
-        }
+
+  /**
+   * Generates a new avro record of the above schema format, retaining the key if optionally
+   * provided.
+   */
+  public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime)
+      throws IOException {
+    GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime,
+        "driver-" + commitTime, 0.0);
+    HoodieAvroUtils.addCommitMetadataToRecord(rec, commitTime, "-1");
+    return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(),
+        TRIP_EXAMPLE_SCHEMA);
+  }
+
+  public static GenericRecord generateGenericRecord(String rowKey, String riderName,
+      String driverName, double timestamp) {
+    GenericRecord rec = new GenericData.Record(avroSchema);
+    rec.put("_row_key", rowKey);
+    rec.put("timestamp", timestamp);
+    rec.put("rider", riderName);
+    rec.put("driver", driverName);
+    rec.put("begin_lat", rand.nextDouble());
+    rec.put("begin_lon", rand.nextDouble());
+    rec.put("end_lat", rand.nextDouble());
+    rec.put("end_lon", rand.nextDouble());
+    rec.put("fare", rand.nextDouble() * 100);
+    return rec;
+  }
+
+  public static void createCommitFile(String basePath, String commitTime) throws IOException {
+    Path commitFile =
+        new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
+            .makeCommitFileName(commitTime));
+    FileSystem fs = FSUtils.getFs();
+    FSDataOutputStream os = fs.create(commitFile, true);
+    HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
+    try {
+      // Write empty commit metadata
+      os.writeBytes(new String(commitMetadata.toJsonString().getBytes(
+          StandardCharsets.UTF_8)));
+    } finally {
+      os.close();
     }
+  }
 
   public static void createSavepointFile(String basePath, String commitTime) throws IOException {
     Path commitFile =
@@ -215,7 +221,7 @@ public class HoodieTestDataGenerator {
     }
   }
 
-    public String[] getPartitionPaths() {
-        return partitionPaths;
-    }
+  public String[] getPartitionPaths() {
+    return partitionPaths;
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/common/TestRawTripPayload.java b/hoodie-client/src/test/java/com/uber/hoodie/common/TestRawTripPayload.java
index 572792495..11c790f66 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/common/TestRawTripPayload.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/common/TestRawTripPayload.java
@@ -17,174 +17,182 @@
 package com.uber.hoodie.common;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
-
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.avro.MercifulJsonConverter;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.StringWriter;
 import java.util.HashMap;
 import java.util.List;
-import java.util.Map.Entry;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.commons.io.IOUtils;
-
-import java.io.*;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Optional;
 import java.util.zip.Deflater;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.InflaterInputStream;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.commons.io.IOUtils;
 
 /**
  * Example row change event based on some example data used by testcases. The data avro schema is
  * src/test/resources/schema1.
  */
 public class TestRawTripPayload implements HoodieRecordPayload<TestRawTripPayload> {
-    private transient static final ObjectMapper mapper = new ObjectMapper();
-    private String partitionPath;
-    private String rowKey;
-    private byte[] jsonDataCompressed;
-    private int dataSize;
-    private boolean isDeleted;
 
-    public TestRawTripPayload(Optional<String> jsonData, String rowKey, String partitionPath,
-                              String schemaStr, Boolean isDeleted) throws IOException {
-        if(jsonData.isPresent()) {
-            this.jsonDataCompressed = compressData(jsonData.get());
-            this.dataSize = jsonData.get().length();
-        }
-        this.rowKey = rowKey;
-        this.partitionPath = partitionPath;
-        this.isDeleted = isDeleted;
+  private transient static final ObjectMapper mapper = new ObjectMapper();
+  private String partitionPath;
+  private String rowKey;
+  private byte[] jsonDataCompressed;
+  private int dataSize;
+  private boolean isDeleted;
+
+  public TestRawTripPayload(Optional<String> jsonData, String rowKey, String partitionPath,
+      String schemaStr, Boolean isDeleted) throws IOException {
+    if (jsonData.isPresent()) {
+      this.jsonDataCompressed = compressData(jsonData.get());
+      this.dataSize = jsonData.get().length();
     }
+    this.rowKey = rowKey;
+    this.partitionPath = partitionPath;
+    this.isDeleted = isDeleted;
+  }
 
-    public TestRawTripPayload(String jsonData, String rowKey, String partitionPath,
-                              String schemaStr)throws IOException {
-        this(Optional.of(jsonData), rowKey, partitionPath, schemaStr, false);
+  public TestRawTripPayload(String jsonData, String rowKey, String partitionPath,
+      String schemaStr) throws IOException {
+    this(Optional.of(jsonData), rowKey, partitionPath, schemaStr, false);
+  }
+
+  public TestRawTripPayload(String jsonData) throws IOException {
+    this.jsonDataCompressed = compressData(jsonData);
+    this.dataSize = jsonData.length();
+    Map<String, Object> jsonRecordMap = mapper.readValue(jsonData, Map.class);
+    this.rowKey = jsonRecordMap.get("_row_key").toString();
+    this.partitionPath = jsonRecordMap.get("time").toString().split("T")[0].replace("-", "/");
+    this.isDeleted = false;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+
+  @Override
+  public TestRawTripPayload preCombine(TestRawTripPayload another) {
+    return another;
+  }
+
+  @Override
+  public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
+      throws IOException {
+    return this.getInsertValue(schema);
+  }
+
+  @Override
+  public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    if (isDeleted) {
+      return Optional.empty();
+    } else {
+      MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
+      return Optional.of(jsonConverter.convert(getJsonData()));
     }
+  }
 
-    public TestRawTripPayload(String jsonData) throws IOException {
-        this.jsonDataCompressed = compressData(jsonData);
-        this.dataSize = jsonData.length();
-        Map<String, Object> jsonRecordMap = mapper.readValue(jsonData, Map.class);
-        this.rowKey = jsonRecordMap.get("_row_key").toString();
-        this.partitionPath = jsonRecordMap.get("time").toString().split("T")[0].replace("-", "/");
-        this.isDeleted = false;
-    }
-
-    public String getPartitionPath() {
-        return partitionPath;
+  @Override
+  public Optional<Map<String, String>> getMetadata() {
+    // Let's assume we want to count the number of input row change events
+    // that are processed. Let the time-bucket for this row change event be 1506582000.
+    Map<String, String> metadataMap = new HashMap<>();
+    metadataMap.put("InputRecordCount_1506582000", "2");
+    return Optional.of(metadataMap);
+  }
+
+  public String getRowKey() {
+    return rowKey;
+  }
+
+  public String getJsonData() throws IOException {
+    return unCompressData(jsonDataCompressed);
+  }
+
+  private byte[] compressData(String jsonData) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DeflaterOutputStream dos =
+        new DeflaterOutputStream(baos, new Deflater(Deflater.BEST_COMPRESSION), true);
+    try {
+      dos.write(jsonData.getBytes());
+    } finally {
+      dos.flush();
+      dos.close();
     }
+    return baos.toByteArray();
+  }
 
 
-    @Override public TestRawTripPayload preCombine(TestRawTripPayload another) {
-        return another;
-    }
+  private String unCompressData(byte[] data) throws IOException {
+    InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
+    StringWriter sw = new StringWriter(dataSize);
+    IOUtils.copy(iis, sw);
+    return sw.toString();
+  }
 
-    @Override public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
-        return this.getInsertValue(schema);
-    }
+  /**
+   * A custom {@link WriteStatus} that merges passed metadata key value map to {@code
+   * WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()}.
+   */
+  public static class MetadataMergeWriteStatus extends WriteStatus {
 
-    @Override public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
-        if(isDeleted){
-            return Optional.empty();
-        } else {
-            MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
-            return Optional.of(jsonConverter.convert(getJsonData()));
-        }
+    private Map<String, String> mergedMetadataMap = new HashMap<>();
+
+    @Override
+    public void markSuccess(HoodieRecord record, Optional<Map<String, String>> recordMetadata) {
+      super.markSuccess(record, recordMetadata);
+      if (recordMetadata.isPresent()) {
+        mergeMetadataMaps(recordMetadata.get(), mergedMetadataMap);
+      }
     }
 
     @Override
-    public Optional<Map<String, String>> getMetadata() {
-        // Let's assume we want to count the number of input row change events
-        // that are processed. Let the time-bucket for this row change event be 1506582000.
-        Map<String, String> metadataMap = new HashMap<>();
-        metadataMap.put("InputRecordCount_1506582000", "2");
-        return Optional.of(metadataMap);
+    public void markFailure(HoodieRecord record, Throwable t,
+        Optional<Map<String, String>> recordMetadata) {
+      super.markFailure(record, t, recordMetadata);
+      if (recordMetadata.isPresent()) {
+        mergeMetadataMaps(recordMetadata.get(), mergedMetadataMap);
+      }
     }
 
-    public String getRowKey() {
-        return rowKey;
+    public static Map<String, String> mergeMetadataForWriteStatuses(
+        List<WriteStatus> writeStatuses) {
+      Map<String, String> allWriteStatusMergedMetadataMap = new HashMap<>();
+      for (WriteStatus writeStatus : writeStatuses) {
+        MetadataMergeWriteStatus.mergeMetadataMaps(
+            ((MetadataMergeWriteStatus) writeStatus).getMergedMetadataMap(),
+            allWriteStatusMergedMetadataMap);
+      }
+      return allWriteStatusMergedMetadataMap;
     }
 
-    public String getJsonData() throws IOException {
-        return unCompressData(jsonDataCompressed);
+    private static void mergeMetadataMaps(Map<String, String> mergeFromMap,
+        Map<String, String> mergeToMap) {
+      for (Entry<String, String> entry : mergeFromMap.entrySet()) {
+        String key = entry.getKey();
+        if (!mergeToMap.containsKey(key)) {
+          mergeToMap.put(key, "0");
+        }
+        mergeToMap
+            .put(key, addStrsAsInt(entry.getValue(), mergeToMap.get(key)));
+      }
     }
 
-    private byte[] compressData(String jsonData) throws IOException {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        DeflaterOutputStream dos =
-            new DeflaterOutputStream(baos, new Deflater(Deflater.BEST_COMPRESSION), true);
-        try {
-            dos.write(jsonData.getBytes());
-        } finally {
-            dos.flush();
-            dos.close();
-        }
-        return baos.toByteArray();
+    private Map<String, String> getMergedMetadataMap() {
+      return mergedMetadataMap;
     }
 
-
-    private String unCompressData(byte[] data) throws IOException {
-        InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
-        StringWriter sw = new StringWriter(dataSize);
-        IOUtils.copy(iis, sw);
-        return sw.toString();
-    }
-
-    /**
-     * A custom {@link WriteStatus} that merges passed metadata key value map
-     * to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()}.
-     */
-    public static class MetadataMergeWriteStatus extends WriteStatus {
-        private Map<String, String> mergedMetadataMap = new HashMap<>();
-
-        @Override
-        public void markSuccess(HoodieRecord record, Optional<Map<String, String>> recordMetadata) {
-            super.markSuccess(record, recordMetadata);
-            if(recordMetadata.isPresent()) {
-                mergeMetadataMaps(recordMetadata.get(), mergedMetadataMap);
-            }
-        }
-
-        @Override
-        public void markFailure(HoodieRecord record, Throwable t,
-            Optional<Map<String, String>> recordMetadata) {
-            super.markFailure(record, t, recordMetadata);
-            if(recordMetadata.isPresent()) {
-                mergeMetadataMaps(recordMetadata.get(), mergedMetadataMap);
-            }
-        }
-
-        public static Map<String, String> mergeMetadataForWriteStatuses(List<WriteStatus> writeStatuses) {
-            Map<String, String> allWriteStatusMergedMetadataMap = new HashMap<>();
-            for (WriteStatus writeStatus : writeStatuses) {
-                MetadataMergeWriteStatus.mergeMetadataMaps(
-                    ((MetadataMergeWriteStatus)writeStatus).getMergedMetadataMap(),
-                    allWriteStatusMergedMetadataMap);
-            }
-            return allWriteStatusMergedMetadataMap;
-        }
-
-        private static void mergeMetadataMaps(Map<String, String> mergeFromMap, Map<String, String> mergeToMap) {
-            for (Entry<String, String> entry : mergeFromMap.entrySet()) {
-                String key = entry.getKey();
-                if(!mergeToMap.containsKey(key)) {
-                    mergeToMap.put(key, "0");
-                }
-                mergeToMap
-                    .put(key, addStrsAsInt(entry.getValue(), mergeToMap.get(key)));
-            }
-        }
-
-        private Map<String, String> getMergedMetadataMap() {
-            return mergedMetadataMap;
-        }
-
-        private static String addStrsAsInt(String a, String b) {
-            return String.valueOf(Integer.parseInt(a) + Integer.parseInt(b));
-        }
+    private static String addStrsAsInt(String a, String b) {
+      return String.valueOf(Integer.parseInt(a) + Integer.parseInt(b));
     }
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/config/HoodieWriteConfigTest.java b/hoodie-client/src/test/java/com/uber/hoodie/config/HoodieWriteConfigTest.java
index 957b02c6b..395197bf9 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/config/HoodieWriteConfigTest.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/config/HoodieWriteConfigTest.java
@@ -16,7 +16,7 @@
 
 package com.uber.hoodie.config;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
 
 import com.google.common.collect.Maps;
 import com.uber.hoodie.config.HoodieWriteConfig.Builder;
@@ -29,6 +29,7 @@ import java.util.Properties;
 import org.junit.Test;
 
 public class HoodieWriteConfigTest {
+
   @Test
   public void testPropertyLoading() throws IOException {
     Builder builder = HoodieWriteConfig.newBuilder().withPath("/tmp");
@@ -46,13 +47,14 @@ public class HoodieWriteConfigTest {
     HoodieWriteConfig config = builder.build();
     assertEquals(config.getMaxCommitsToKeep(), 5);
     assertEquals(config.getMinCommitsToKeep(), 2);
-}
+  }
 
-  private ByteArrayOutputStream saveParamsIntoOutputStream(Map<String, String> params) throws IOException {
+  private ByteArrayOutputStream saveParamsIntoOutputStream(Map<String, String> params)
+      throws IOException {
     Properties properties = new Properties();
     properties.putAll(params);
     ByteArrayOutputStream outStream = new ByteArrayOutputStream();
     properties.store(outStream, "Saved on " + new Date(System.currentTimeMillis()));
     return outStream;
   }
-}
\ No newline at end of file
+}
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/func/TestUpdateMapFunction.java b/hoodie-client/src/test/java/com/uber/hoodie/func/TestUpdateMapFunction.java
index 955865e1f..8433c0366 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/func/TestUpdateMapFunction.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/func/TestUpdateMapFunction.java
@@ -16,105 +16,103 @@
 
 package com.uber.hoodie.func;
 
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.config.HoodieWriteConfig;
+import static org.junit.Assert.fail;
+
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.TestRawTripPayload;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieTestUtils;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieCopyOnWriteTable;
-
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.Path;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import static org.junit.Assert.fail;
-
 public class TestUpdateMapFunction {
-    private String basePath = null;
 
-    @Before
-    public void init() throws Exception {
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        this.basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.init(basePath);
+  private String basePath = null;
+
+  @Before
+  public void init() throws Exception {
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    this.basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.init(basePath);
+  }
+
+  @Test
+  public void testSchemaEvolutionOnUpdate() throws Exception {
+    // Create a bunch of records with a old version of schema
+    HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt");
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    String recordStr1 =
+        "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 =
+        "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 =
+        "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    List<HoodieRecord> records = new ArrayList<>();
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
+            rowChange1));
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
+            rowChange2));
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
+            rowChange3));
+    Iterator<List<WriteStatus>> insertResult = table.handleInsert("100", records.iterator());
+    Path commitFile =
+        new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
+    FSUtils.getFs().create(commitFile);
+
+    // Now try an update with an evolved schema
+    // Evolved schema does not have guarantee on preserving the original field ordering
+    config = makeHoodieClientConfig("/exampleEvolvedSchema.txt");
+    metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    String fileId = insertResult.next().get(0).getFileId();
+    System.out.println(fileId);
+
+    table = new HoodieCopyOnWriteTable(config, metadata);
+    // New content with values for the newly added field
+    recordStr1 =
+        "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
+    records = new ArrayList<>();
+    rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieRecord record1 =
+        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
+            rowChange1);
+    record1.setCurrentLocation(new HoodieRecordLocation("100", fileId));
+    records.add(record1);
+
+    try {
+      table.handleUpdate("101", fileId, records.iterator());
+    } catch (ClassCastException e) {
+      fail(
+          "UpdateFunction could not read records written with exampleSchema.txt using the exampleEvolvedSchema.txt");
     }
+  }
 
-    @Test
-    public void testSchemaEvolutionOnUpdate() throws Exception {
-        // Create a bunch of records with a old version of schema
-        HoodieWriteConfig config = makeHoodieClientConfig("/exampleSchema.txt");
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
-
-        String recordStr1 =
-            "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 =
-            "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 =
-            "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        List<HoodieRecord> records = new ArrayList<>();
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        records.add(
-            new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
-                rowChange1));
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        records.add(
-            new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
-                rowChange2));
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        records.add(
-            new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
-                rowChange3));
-        Iterator<List<WriteStatus>> insertResult = table.handleInsert("100", records.iterator());
-        Path commitFile =
-            new Path(config.getBasePath() + "/.hoodie/" + HoodieTimeline.makeCommitFileName("100"));
-        FSUtils.getFs().create(commitFile);
-
-        // Now try an update with an evolved schema
-        // Evolved schema does not have guarantee on preserving the original field ordering
-        config = makeHoodieClientConfig("/exampleEvolvedSchema.txt");
-        metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        String fileId = insertResult.next().get(0).getFileId();
-        System.out.println(fileId);
-
-
-        table = new HoodieCopyOnWriteTable(config, metadata);
-        // New content with values for the newly added field
-        recordStr1 =
-            "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12,\"added_field\":1}";
-        records = new ArrayList<>();
-        rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieRecord record1 =
-            new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
-                rowChange1);
-        record1.setCurrentLocation(new HoodieRecordLocation("100", fileId));
-        records.add(record1);
-
-        try {
-            table.handleUpdate("101", fileId, records.iterator());
-        } catch (ClassCastException e) {
-            fail(
-                "UpdateFunction could not read records written with exampleSchema.txt using the exampleEvolvedSchema.txt");
-        }
-    }
-
-    private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception {
-        // Prepare the AvroParquetIO
-        String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8");
-        return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build();
-    }
+  private HoodieWriteConfig makeHoodieClientConfig(String schema) throws Exception {
+    // Prepare the AvroParquetIO
+    String schemaStr = IOUtils.toString(getClass().getResourceAsStream(schema), "UTF-8");
+    return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr).build();
+  }
 
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/index/TestHoodieIndex.java b/hoodie-client/src/test/java/com/uber/hoodie/index/TestHoodieIndex.java
index de9c2d368..2fba00693 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/index/TestHoodieIndex.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/index/TestHoodieIndex.java
@@ -16,32 +16,31 @@
 
 package com.uber.hoodie.index;
 
-import com.uber.hoodie.config.HoodieWriteConfig;
+import static org.junit.Assert.assertTrue;
 
 import com.uber.hoodie.config.HoodieIndexConfig;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.index.bloom.HoodieBloomIndex;
 import com.uber.hoodie.index.hbase.HBaseIndex;
-
 import org.junit.Test;
 
-import static org.junit.Assert.*;
-
 public class TestHoodieIndex {
-    @Test
-    public void testCreateIndex() throws Exception {
-        HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
-        HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
-        // Different types
-        HoodieWriteConfig config = clientConfigBuilder.withPath("")
-            .withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE).build())
-            .build();
-        assertTrue(HoodieIndex.createIndex(config, null) instanceof HBaseIndex);
-        config = clientConfigBuilder.withPath("").withIndexConfig(
-            indexConfigBuilder.withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
-        assertTrue(HoodieIndex.createIndex(config, null) instanceof InMemoryHashIndex);
-        config = clientConfigBuilder.withPath("")
-            .withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.BLOOM).build())
-            .build();
-        assertTrue(HoodieIndex.createIndex(config, null) instanceof HoodieBloomIndex);
-    }
+
+  @Test
+  public void testCreateIndex() throws Exception {
+    HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
+    HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
+    // Different types
+    HoodieWriteConfig config = clientConfigBuilder.withPath("")
+        .withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.HBASE).build())
+        .build();
+    assertTrue(HoodieIndex.createIndex(config, null) instanceof HBaseIndex);
+    config = clientConfigBuilder.withPath("").withIndexConfig(
+        indexConfigBuilder.withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
+    assertTrue(HoodieIndex.createIndex(config, null) instanceof InMemoryHashIndex);
+    config = clientConfigBuilder.withPath("")
+        .withIndexConfig(indexConfigBuilder.withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .build();
+    assertTrue(HoodieIndex.createIndex(config, null) instanceof HoodieBloomIndex);
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/index/bloom/TestHoodieBloomIndex.java b/hoodie-client/src/test/java/com/uber/hoodie/index/bloom/TestHoodieBloomIndex.java
index 1a49b5953..8d4cc2558 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/index/bloom/TestHoodieBloomIndex.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/index/bloom/TestHoodieBloomIndex.java
@@ -18,28 +18,39 @@
 
 package com.uber.hoodie.index.bloom;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import com.google.common.base.Optional;
 import com.google.common.collect.Lists;
-
-import com.uber.hoodie.common.HoodieClientTestUtils;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieIndexConfig;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.BloomFilter;
+import com.uber.hoodie.common.HoodieClientTestUtils;
 import com.uber.hoodie.common.TestRawTripPayload;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieTestUtils;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
-
-import com.uber.hoodie.index.bloom.BloomIndexFileInfo;
-import com.uber.hoodie.index.bloom.HoodieBloomIndex;
-import com.uber.hoodie.index.bloom.HoodieBloomIndexCheckFunction;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.storage.HoodieParquetConfig;
 import com.uber.hoodie.io.storage.HoodieParquetWriter;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.File;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.io.IOUtils;
@@ -47,11 +58,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.avro.AvroSchemaConverter;
-import org.apache.parquet.avro.AvroWriteSupport;
 import org.apache.parquet.hadoop.ParquetWriter;
-import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
-import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -59,464 +67,489 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
-import org.mockito.Mockito;
-
 import scala.Tuple2;
 
-import java.io.File;
-import java.io.IOException;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import static org.junit.Assert.*;
-
 public class TestHoodieBloomIndex {
-    private JavaSparkContext jsc = null;
-    private String basePath = null;
-    private transient final FileSystem fs;
-    private String schemaStr;
-    private Schema schema;
 
-    public TestHoodieBloomIndex() throws Exception {
-        fs = FSUtils.getFs();
-    }
+  private JavaSparkContext jsc = null;
+  private String basePath = null;
+  private transient final FileSystem fs;
+  private String schemaStr;
+  private Schema schema;
 
-    @Before
-    public void init() throws IOException {
-        // Initialize a local spark env
-        jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieBloomIndex"));
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.init(basePath);
-        // We have some records to be tagged (two different partitions)
-        schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8");
-        schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr));
-    }
+  public TestHoodieBloomIndex() throws Exception {
+    fs = FSUtils.getFs();
+  }
 
-    @Test
-    public void testLoadUUIDsInMemory() throws IOException {
-        // Create one RDD of hoodie record
-        String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
+  @Before
+  public void init() throws IOException {
+    // Initialize a local spark env
+    jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieBloomIndex"));
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.init(basePath);
+    // We have some records to be tagged (two different partitions)
+    schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8");
+    schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(schemaStr));
+  }
 
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
-        TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
-        HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+  @Test
+  public void testLoadUUIDsInMemory() throws IOException {
+    // Create one RDD of hoodie record
+    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
 
-        JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieRecord record1 = new HoodieRecord(
+        new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    HoodieRecord record2 = new HoodieRecord(
+        new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    HoodieRecord record3 = new HoodieRecord(
+        new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
+    HoodieRecord record4 = new HoodieRecord(
+        new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
-        // Load to memory
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
+    JavaRDD<HoodieRecord> recordRDD = jsc
+        .parallelize(Arrays.asList(record1, record2, record3, record4));
 
-        Map<String, Iterable<String>> map = recordRDD
-                .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()))
-                .groupByKey().collectAsMap();
-        assertEquals(map.size(), 2);
-        List<String> list1 = Lists.newArrayList(map.get("2016/01/31"));
-        List<String> list2 = Lists.newArrayList(map.get("2015/01/31"));
-        assertEquals(list1.size(), 3);
-        assertEquals(list2.size(), 1);
-    }
+    // Load to memory
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
 
-    @Test
-    public void testLoadInvolvedFiles() throws IOException {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
-                .withPath(basePath)
-                .build();
-        HoodieBloomIndex index = new HoodieBloomIndex(config, jsc);
+    Map<String, Iterable<String>> map = recordRDD
+        .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()))
+        .groupByKey().collectAsMap();
+    assertEquals(map.size(), 2);
+    List<String> list1 = Lists.newArrayList(map.get("2016/01/31"));
+    List<String> list2 = Lists.newArrayList(map.get("2015/01/31"));
+    assertEquals(list1.size(), 3);
+    assertEquals(list2.size(), 1);
+  }
 
-        // Create some partitions, and put some files
-        // "2016/01/21": 0 file
-        // "2016/04/01": 1 file (2_0_20160401010101.parquet)
-        // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, 4_0_20150312101010.parquet)
-        new File(basePath + "/2016/01/21").mkdirs();
-        new File(basePath + "/2016/04/01").mkdirs();
-        new File(basePath + "/2015/03/12").mkdirs();
+  @Test
+  public void testLoadInvolvedFiles() throws IOException {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
+        .withPath(basePath)
+        .build();
+    HoodieBloomIndex index = new HoodieBloomIndex(config, jsc);
 
-        TestRawTripPayload rowChange1 = new TestRawTripPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
-        HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
-        HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
-        TestRawTripPayload rowChange3 = new TestRawTripPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
-        HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
-        TestRawTripPayload rowChange4 = new TestRawTripPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
-        HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+    // Create some partitions, and put some files
+    // "2016/01/21": 0 file
+    // "2016/04/01": 1 file (2_0_20160401010101.parquet)
+    // "2015/03/12": 3 files (1_0_20150312101010.parquet, 3_0_20150312101010.parquet, 4_0_20150312101010.parquet)
+    new File(basePath + "/2016/01/21").mkdirs();
+    new File(basePath + "/2016/04/01").mkdirs();
+    new File(basePath + "/2015/03/12").mkdirs();
 
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(
+        "{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
+    HoodieRecord record1 = new HoodieRecord(
+        new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(
+        "{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
+    HoodieRecord record2 = new HoodieRecord(
+        new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(
+        "{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
+    HoodieRecord record3 = new HoodieRecord(
+        new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(
+        "{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
+    HoodieRecord record4 = new HoodieRecord(
+        new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
-        writeParquetFile("2016/04/01","2_0_20160401010101.parquet", Lists.newArrayList(), schema, null, false);
-        writeParquetFile("2015/03/12","1_0_20150312101010.parquet", Lists.newArrayList(), schema, null, false);
-        writeParquetFile("2015/03/12","3_0_20150312101010.parquet", Arrays.asList(record1), schema, null, false);
-        writeParquetFile("2015/03/12","4_0_20150312101010.parquet", Arrays.asList(record2, record3, record4), schema, null, false);
+    writeParquetFile("2016/04/01", "2_0_20160401010101.parquet", Lists.newArrayList(), schema, null,
+        false);
+    writeParquetFile("2015/03/12", "1_0_20150312101010.parquet", Lists.newArrayList(), schema, null,
+        false);
+    writeParquetFile("2015/03/12", "3_0_20150312101010.parquet", Arrays.asList(record1), schema,
+        null, false);
+    writeParquetFile("2015/03/12", "4_0_20150312101010.parquet",
+        Arrays.asList(record2, record3, record4), schema, null, false);
 
-        List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01", "2015/03/12");
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
-        List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, table);
-        // Still 0, as no valid commit
-        assertEquals(filesList.size(), 0);
+    List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01", "2015/03/12");
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+    List<Tuple2<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, table);
+    // Still 0, as no valid commit
+    assertEquals(filesList.size(), 0);
 
-        // Add some commits
-        new File(basePath + "/.hoodie").mkdirs();
-        new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
-        new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
+    // Add some commits
+    new File(basePath + "/.hoodie").mkdirs();
+    new File(basePath + "/.hoodie/20160401010101.commit").createNewFile();
+    new File(basePath + "/.hoodie/20150312101010.commit").createNewFile();
 
-        filesList = index.loadInvolvedFiles(partitions, table);
-        assertEquals(filesList.size(), 4);
-        // these files will not have the key ranges
-        assertNull(filesList.get(0)._2().getMaxRecordKey());
-        assertNull(filesList.get(0)._2().getMinRecordKey());
-        assertFalse(filesList.get(1)._2().hasKeyRanges());
-        assertNotNull(filesList.get(2)._2().getMaxRecordKey());
-        assertNotNull(filesList.get(2)._2().getMinRecordKey());
-        assertTrue(filesList.get(3)._2().hasKeyRanges());
+    filesList = index.loadInvolvedFiles(partitions, table);
+    assertEquals(filesList.size(), 4);
+    // these files will not have the key ranges
+    assertNull(filesList.get(0)._2().getMaxRecordKey());
+    assertNull(filesList.get(0)._2().getMinRecordKey());
+    assertFalse(filesList.get(1)._2().hasKeyRanges());
+    assertNotNull(filesList.get(2)._2().getMaxRecordKey());
+    assertNotNull(filesList.get(2)._2().getMinRecordKey());
+    assertTrue(filesList.get(3)._2().hasKeyRanges());
 
-        // no longer sorted, but should have same files.
+    // no longer sorted, but should have same files.
 
-        List<Tuple2<String, BloomIndexFileInfo>> expected = Arrays.asList(
-                new Tuple2<>("2016/04/01", new BloomIndexFileInfo("2_0_20160401010101.parquet")),
-                new Tuple2<>("2015/03/12",new BloomIndexFileInfo("1_0_20150312101010.parquet")),
-                new Tuple2<>("2015/03/12",new BloomIndexFileInfo("3_0_20150312101010.parquet", "000", "000")),
-                new Tuple2<>("2015/03/12",new BloomIndexFileInfo("4_0_20150312101010.parquet", "001", "003"))
-        );
-        assertEquals(expected, filesList);
-    }
+    List<Tuple2<String, BloomIndexFileInfo>> expected = Arrays.asList(
+        new Tuple2<>("2016/04/01", new BloomIndexFileInfo("2_0_20160401010101.parquet")),
+        new Tuple2<>("2015/03/12", new BloomIndexFileInfo("1_0_20150312101010.parquet")),
+        new Tuple2<>("2015/03/12",
+            new BloomIndexFileInfo("3_0_20150312101010.parquet", "000", "000")),
+        new Tuple2<>("2015/03/12",
+            new BloomIndexFileInfo("4_0_20150312101010.parquet", "001", "003"))
+    );
+    assertEquals(expected, filesList);
+  }
 
-    @Test
-    public void testRangePruning() {
+  @Test
+  public void testRangePruning() {
 
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
-                .withPath(basePath)
-                .build();
-        HoodieBloomIndex index = new HoodieBloomIndex(config, jsc);
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
+        .withPath(basePath)
+        .build();
+    HoodieBloomIndex index = new HoodieBloomIndex(config, jsc);
 
+    final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
+    partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(
+        new BloomIndexFileInfo("f1"),
+        new BloomIndexFileInfo("f2", "000", "000"),
+        new BloomIndexFileInfo("f3", "001", "003"),
+        new BloomIndexFileInfo("f4", "002", "007"),
+        new BloomIndexFileInfo("f5", "009", "010")
+    ));
 
-        final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
-        partitionToFileIndexInfo.put("2017/10/22", Arrays.asList(
-                new BloomIndexFileInfo("f1"),
-                new BloomIndexFileInfo("f2", "000", "000"),
-                new BloomIndexFileInfo("f3", "001", "003"),
-                new BloomIndexFileInfo("f4", "002", "007"),
-                new BloomIndexFileInfo("f5", "009", "010")
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
+        .parallelize(Arrays.asList(
+            new Tuple2<>("2017/10/22", "003"),
+            new Tuple2<>("2017/10/22", "002"),
+            new Tuple2<>("2017/10/22", "005"),
+            new Tuple2<>("2017/10/22", "004")
+        ))
+        .mapToPair(t -> t);
+
+    List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
+        .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD)
+        .collect();
+
+    assertEquals(10, comparisonKeyList.size());
+    Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
+        .collect(Collectors.groupingBy(
+            t -> t._2()._2().getRecordKey(),
+            Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList()
+            )
         ));
 
-        JavaPairRDD<String, String> partitionRecordKeyPairRDD = jsc
-                .parallelize(Arrays.asList(
-                        new Tuple2<>("2017/10/22","003"),
-                        new Tuple2<>("2017/10/22","002"),
-                        new Tuple2<>("2017/10/22","005"),
-                        new Tuple2<>("2017/10/22","004")
-                        ))
-                .mapToPair(t -> t);
+    assertEquals(4, recordKeyToFileComps.size());
+    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
+    assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
+    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
+    assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
+  }
+
+  @Test
+  public void testCheckUUIDsAgainstOneFile()
+      throws IOException, InterruptedException, ClassNotFoundException {
+
+    // Create some records to use
+    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":32}";
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieRecord record1 = new HoodieRecord(
+        new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    HoodieRecord record2 = new HoodieRecord(
+        new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    HoodieRecord record3 = new HoodieRecord(
+        new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
+    HoodieRecord record4 = new HoodieRecord(
+        new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+
+    // We write record1, record2 to a parquet file, but the bloom filter contains (record1, record2, record3).
+    BloomFilter filter = new BloomFilter(10000, 0.0000001);
+    filter.add(record3.getRecordKey());
+    String filename = writeParquetFile("2016/01/31", Arrays.asList(record1, record2), schema,
+        filter, true);
+
+    // The bloom filter contains 3 records
+    assertTrue(filter.mightContain(record1.getRecordKey()));
+    assertTrue(filter.mightContain(record2.getRecordKey()));
+    assertTrue(filter.mightContain(record3.getRecordKey()));
+    assertFalse(filter.mightContain(record4.getRecordKey()));
+
+    // Compare with file
+    List<String> uuids = Arrays.asList(record1.getRecordKey(), record2.getRecordKey(),
+        record3.getRecordKey(), record4.getRecordKey());
+
+    List<String> results = HoodieBloomIndexCheckFunction.checkCandidatesAgainstFile(uuids,
+        new Path(basePath + "/2016/01/31/" + filename));
+    assertEquals(results.size(), 2);
+    assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")
+        || results.get(1).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
+    assertTrue(results.get(0).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")
+        || results.get(1).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0"));
+    // TODO(vc): Need more coverage on actual filenames
+    //assertTrue(results.get(0)._2().equals(filename));
+    //assertTrue(results.get(1)._2().equals(filename));
+  }
+
+  @Test
+  public void testTagLocationWithEmptyRDD() throws Exception {
+    // We have some records to be tagged (two different partitions)
+    JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
+    // Also create the metadata and config
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+
+    // Let's tag
+    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
+
+    try {
+      bloomIndex.tagLocation(recordRDD, table);
+    } catch (IllegalArgumentException e) {
+      fail(
+          "EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
+    }
+  }
 
 
-        List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index
-                .explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD)
-                .collect();
+  @Test
+  public void testTagLocation() throws Exception {
+    // We have some records to be tagged (two different partitions)
 
-        assertEquals(10, comparisonKeyList.size());
-        Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
-                .collect(Collectors.groupingBy(
-                        t -> t._2()._2().getRecordKey(),
-                        Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList()
-                        )
-                ));
+    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieRecord record1 = new HoodieRecord(
+        new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    HoodieRecord record2 = new HoodieRecord(
+        new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    HoodieRecord record3 = new HoodieRecord(
+        new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
+    HoodieRecord record4 = new HoodieRecord(
+        new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+    JavaRDD<HoodieRecord> recordRDD = jsc
+        .parallelize(Arrays.asList(record1, record2, record3, record4));
 
-        assertEquals(4, recordKeyToFileComps.size());
-        assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("002"));
-        assertEquals(Arrays.asList("f1", "f3", "f4"), recordKeyToFileComps.get("003"));
-        assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("004"));
-        assertEquals(Arrays.asList("f1", "f4"), recordKeyToFileComps.get("005"));
+    // Also create the metadata and config
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+
+    // Let's tag
+    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
+    JavaRDD<HoodieRecord> taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
+
+    // Should not find any files
+    for (HoodieRecord record : taggedRecordRDD.collect()) {
+      assertTrue(!record.isCurrentLocationKnown());
     }
 
-    @Test
-    public void testCheckUUIDsAgainstOneFile() throws IOException, InterruptedException, ClassNotFoundException {
+    // We create three parquet file, each having one record. (two different partitions)
+    String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true);
+    String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true);
+    String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true);
 
-        // Create some records to use
-        String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":32}";
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
-        TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
-        HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+    // We do the tag again
+    metadata = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metadata, config);
 
+    taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
 
-        // We write record1, record2 to a parquet file, but the bloom filter contains (record1, record2, record3).
-        BloomFilter filter = new BloomFilter(10000, 0.0000001);
-        filter.add(record3.getRecordKey());
-        String filename = writeParquetFile("2016/01/31", Arrays.asList(record1, record2), schema, filter, true);
+    // Check results
+    for (HoodieRecord record : taggedRecordRDD.collect()) {
+      if (record.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename1)));
+      } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename2)));
+      } else if (record.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(!record.isCurrentLocationKnown());
+      } else if (record.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename3)));
+      }
+    }
+  }
 
-        // The bloom filter contains 3 records
-        assertTrue(filter.mightContain(record1.getRecordKey()));
-        assertTrue(filter.mightContain(record2.getRecordKey()));
-        assertTrue(filter.mightContain(record3.getRecordKey()));
-        assertFalse(filter.mightContain(record4.getRecordKey()));
+  @Test
+  public void testCheckExists() throws Exception {
+    // We have some records to be tagged (two different partitions)
 
-        // Compare with file
-        List<String> uuids = Arrays.asList(record1.getRecordKey(), record2.getRecordKey(),
-                record3.getRecordKey(), record4.getRecordKey());
+    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath());
+    HoodieRecord record1 = new HoodieRecord(key1, rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath());
+    HoodieRecord record2 = new HoodieRecord(key2, rowChange2);
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath());
+    HoodieRecord record3 = new HoodieRecord(key3, rowChange3);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
+    HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath());
+    HoodieRecord record4 = new HoodieRecord(key4, rowChange4);
+    JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
 
-        List<String> results = HoodieBloomIndexCheckFunction.checkCandidatesAgainstFile(uuids,
-                new Path(basePath + "/2016/01/31/" + filename));
-        assertEquals(results.size(), 2);
-        assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")
-                || results.get(1).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
-        assertTrue(results.get(0).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")
-                || results.get(1).equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0"));
-        // TODO(vc): Need more coverage on actual filenames
-        //assertTrue(results.get(0)._2().equals(filename));
-        //assertTrue(results.get(1)._2().equals(filename));
+    // Also create the metadata and config
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+
+    // Let's tag
+    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
+    JavaPairRDD<HoodieKey, Optional<String>> taggedRecordRDD = bloomIndex
+        .fetchRecordLocation(keysRDD, table);
+
+    // Should not find any files
+    for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
+      assertTrue(!record._2.isPresent());
     }
 
-    @Test
-    public void testTagLocationWithEmptyRDD() throws Exception {
-        // We have some records to be tagged (two different partitions)
-        JavaRDD<HoodieRecord> recordRDD = jsc.emptyRDD();
-        // Also create the metadata and config
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+    // We create three parquet file, each having one record. (two different partitions)
+    String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true);
+    String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true);
+    String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true);
 
-        // Let's tag
-        HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
+    // We do the tag again
+    metadata = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metadata, config);
+    taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table);
 
-        try {
-            bloomIndex.tagLocation(recordRDD, table);
-        } catch (IllegalArgumentException e) {
-            fail("EmptyRDD should not result in IllegalArgumentException: Positive number of slices required");
-        }
+    // Check results
+    for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
+      if (record._1.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record._2.isPresent());
+        Path path1 = new Path(record._2.get());
+        assertEquals(FSUtils.getFileId(filename1), FSUtils.getFileId(path1.getName()));
+      } else if (record._1.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record._2.isPresent());
+        Path path2 = new Path(record._2.get());
+        assertEquals(FSUtils.getFileId(filename2), FSUtils.getFileId(path2.getName()));
+      } else if (record._1.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(!record._2.isPresent());
+      } else if (record._1.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record._2.isPresent());
+        Path path3 = new Path(record._2.get());
+        assertEquals(FSUtils.getFileId(filename3), FSUtils.getFileId(path3.getName()));
+      }
     }
+  }
 
 
-    @Test
-    public void testTagLocation() throws Exception {
-        // We have some records to be tagged (two different partitions)
+  @Test
+  public void testBloomFilterFalseError() throws IOException, InterruptedException {
+    // We have two hoodie records
+    String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
 
-        String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        HoodieRecord record3 = new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
-        TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
-        HoodieRecord record4 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
-        JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
+    // We write record1 to a parquet file, using a bloom filter having both records
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    HoodieRecord record1 = new HoodieRecord(
+        new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    HoodieRecord record2 = new HoodieRecord(
+        new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
 
-        // Also create the metadata and config
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+    BloomFilter filter = new BloomFilter(10000, 0.0000001);
+    filter.add(record2.getRecordKey());
+    String filename = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, filter, true);
+    assertTrue(filter.mightContain(record1.getRecordKey()));
+    assertTrue(filter.mightContain(record2.getRecordKey()));
 
-        // Let's tag
-        HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
-        JavaRDD<HoodieRecord> taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
+    // We do the tag
+    JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2));
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
+    HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
 
-        // Should not find any files
-        for (HoodieRecord record : taggedRecordRDD.collect()) {
-            assertTrue(!record.isCurrentLocationKnown());
-        }
+    HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
+    JavaRDD<HoodieRecord> taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
 
-        // We create three parquet file, each having one record. (two different partitions)
-        String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true);
-        String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true);
-        String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true);
-
-        // We do the tag again
-        metadata = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metadata, config);
-
-        taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
-
-        // Check results
-        for (HoodieRecord record : taggedRecordRDD.collect()) {
-            if (record.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename1)));
-            } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename2)));
-            } else if (record.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(!record.isCurrentLocationKnown());
-            } else if (record.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename3)));
-            }
-        }
+    // Check results
+    for (HoodieRecord record : taggedRecordRDD.collect()) {
+      if (record.getKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
+        assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename)));
+      } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
+        assertFalse(record.isCurrentLocationKnown());
+      }
     }
+  }
 
-    @Test
-    public void testCheckExists() throws Exception {
-        // We have some records to be tagged (two different partitions)
+  private String writeParquetFile(String partitionPath, List<HoodieRecord> records, Schema schema,
+      BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException {
+    Thread.sleep(1000);
+    String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+    String fileId = UUID.randomUUID().toString();
+    String filename = FSUtils.makeDataFileName(commitTime, 1, fileId);
 
-        String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"3eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        String recordStr4 = "{\"_row_key\":\"4eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath());
-        HoodieRecord record1 = new HoodieRecord(key1, rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath());
-        HoodieRecord record2 = new HoodieRecord(key2, rowChange2);
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath());
-        HoodieRecord record3 = new HoodieRecord(key3, rowChange3);
-        TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
-        HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath());
-        HoodieRecord record4 = new HoodieRecord(key4, rowChange4);
-        JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
+    return writeParquetFile(partitionPath, filename, records, schema, filter, createCommitTime);
+  }
 
-        // Also create the metadata and config
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
+  private String writeParquetFile(String partitionPath, String filename, List<HoodieRecord> records,
+      Schema schema,
+      BloomFilter filter, boolean createCommitTime) throws IOException {
 
-        // Let's tag
-        HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
-        JavaPairRDD<HoodieKey, Optional<String>> taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table);
-
-        // Should not find any files
-        for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
-            assertTrue(!record._2.isPresent());
-        }
-
-        // We create three parquet file, each having one record. (two different partitions)
-        String filename1 = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, null, true);
-        String filename2 = writeParquetFile("2016/01/31", Arrays.asList(record2), schema, null, true);
-        String filename3 = writeParquetFile("2015/01/31", Arrays.asList(record4), schema, null, true);
-
-        // We do the tag again
-        metadata = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metadata, config);
-        taggedRecordRDD = bloomIndex.fetchRecordLocation(keysRDD, table);
-
-        // Check results
-        for (Tuple2<HoodieKey, Optional<String>> record : taggedRecordRDD.collect()) {
-            if (record._1.getRecordKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record._2.isPresent());
-                Path path1 = new Path(record._2.get());
-                assertEquals(FSUtils.getFileId(filename1), FSUtils.getFileId(path1.getName()));
-            } else if (record._1.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record._2.isPresent());
-                Path path2 = new Path(record._2.get());
-                assertEquals(FSUtils.getFileId(filename2), FSUtils.getFileId(path2.getName()));
-            } else if (record._1.getRecordKey().equals("3eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(!record._2.isPresent());
-            } else if (record._1.getRecordKey().equals("4eb5b87c-1fej-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record._2.isPresent());
-                Path path3 = new Path(record._2.get());
-                assertEquals(FSUtils.getFileId(filename3), FSUtils.getFileId(path3.getName()));
-            }
-        }
+    if (filter == null) {
+      filter = new BloomFilter(10000, 0.0000001);
     }
-
-
-    @Test
-    public void testBloomFilterFalseError() throws IOException, InterruptedException {
-        // We have two hoodie records
-        String recordStr1 = "{\"_row_key\":\"1eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"2eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-
-        // We write record1 to a parquet file, using a bloom filter having both records
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
-
-        BloomFilter filter = new BloomFilter(10000, 0.0000001);
-        filter.add(record2.getRecordKey());
-        String filename = writeParquetFile("2016/01/31", Arrays.asList(record1), schema, filter, true);
-        assertTrue(filter.mightContain(record1.getRecordKey()));
-        assertTrue(filter.mightContain(record2.getRecordKey()));
-
-        // We do the tag
-        JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2));
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
-        HoodieTable table = HoodieTable.getHoodieTable(metadata, config);
-
-        HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, jsc);
-        JavaRDD<HoodieRecord> taggedRecordRDD = bloomIndex.tagLocation(recordRDD, table);
-
-        // Check results
-        for (HoodieRecord record : taggedRecordRDD.collect()) {
-            if (record.getKey().equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")) {
-                assertTrue(record.getCurrentLocation().getFileId().equals(FSUtils.getFileId(filename)));
-            } else if (record.getRecordKey().equals("2eb5b87b-1feu-4edd-87b4-6ec96dc405a0")) {
-                assertFalse(record.isCurrentLocationKnown());
-            }
-        }
+    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
+        new AvroSchemaConverter().convert(schema), schema, filter);
+    String commitTime = FSUtils.getCommitTime(filename);
+    HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
+        ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024,
+        new Configuration());
+    HoodieParquetWriter writer = new HoodieParquetWriter(
+        commitTime,
+        new Path(basePath + "/" + partitionPath + "/" + filename),
+        config,
+        schema);
+    int seqId = 1;
+    for (HoodieRecord record : records) {
+      GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
+      HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, commitTime, "" + seqId++);
+      HoodieAvroUtils
+          .addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(),
+              filename);
+      writer.writeAvro(record.getRecordKey(), avroRecord);
+      filter.add(record.getRecordKey());
     }
+    writer.close();
 
-    private String writeParquetFile(String partitionPath, List<HoodieRecord> records, Schema schema,
-                                    BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException {
-        Thread.sleep(1000);
-        String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
-        String fileId = UUID.randomUUID().toString();
-        String filename = FSUtils.makeDataFileName(commitTime, 1, fileId);
-
-
-        return writeParquetFile(partitionPath, filename, records, schema, filter, createCommitTime);
+    if (createCommitTime) {
+      // Also make sure the commit is valid
+      new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME).mkdirs();
+      new File(
+          basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + ".commit")
+          .createNewFile();
     }
+    return filename;
+  }
 
-    private String writeParquetFile(String partitionPath, String filename, List<HoodieRecord> records, Schema schema,
-                                    BloomFilter filter, boolean createCommitTime) throws IOException {
-
-
-        if (filter == null) {
-            filter = new BloomFilter(10000, 0.0000001);
-        }
-        HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
-        String commitTime = FSUtils.getCommitTime(filename);
-        HoodieParquetConfig config =  new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
-                ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, new Configuration());
-        HoodieParquetWriter writer = new HoodieParquetWriter(
-                commitTime,
-                new Path(basePath + "/" + partitionPath + "/" + filename),
-                config,
-                schema);
-        int seqId = 1;
-        for (HoodieRecord record : records) {
-            GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
-            HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, commitTime, "" + seqId++);
-            HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), filename);
-            writer.writeAvro(record.getRecordKey(), avroRecord);
-            filter.add(record.getRecordKey());
-        }
-        writer.close();
-
-        if (createCommitTime) {
-            // Also make sure the commit is valid
-            new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME).mkdirs();
-            new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + ".commit").createNewFile();
-        }
-        return filename;
+  @After
+  public void clean() {
+    if (jsc != null) {
+      jsc.stop();
     }
-
-    @After
-    public void clean() {
-        if (jsc != null) {
-            jsc.stop();
-        }
-        if (basePath != null) {
-            new File(basePath).delete();
-        }
+    if (basePath != null) {
+      new File(basePath).delete();
     }
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCommitArchiveLog.java b/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCommitArchiveLog.java
index 690f1d89d..fb19bf7e8 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCommitArchiveLog.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCommitArchiveLog.java
@@ -16,9 +16,11 @@
 
 package com.uber.hoodie.io;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.avro.model.HoodieArchivedMetaEntry;
 import com.uber.hoodie.common.HoodieTestDataGenerator;
-import com.uber.hoodie.common.model.HoodieArchivedLogFile;
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
@@ -29,6 +31,11 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.config.HoodieCompactionConfig;
 import com.uber.hoodie.config.HoodieWriteConfig;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
@@ -37,197 +44,196 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class TestHoodieCommitArchiveLog {
-    private String basePath;
-    private FileSystem fs;
 
-    @Before
-    public void init() throws Exception {
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.init(basePath);
-        fs = FSUtils.getFs();
+  private String basePath;
+  private FileSystem fs;
+
+  @Before
+  public void init() throws Exception {
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.init(basePath);
+    fs = FSUtils.getFs();
+  }
+
+  @Test
+  public void testArchiveEmptyDataset() throws IOException {
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .forTable("test-trip-table").build();
+    HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+    boolean result = archiveLog.archiveIfRequired();
+    assertTrue(result);
+  }
+
+  @Test
+  public void testArchiveDatasetWithArchival() throws IOException {
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 4).build())
+        .forTable("test-trip-table").build();
+    HoodieTestUtils.init(basePath);
+    HoodieTestDataGenerator.createCommitFile(basePath, "100");
+    HoodieTestDataGenerator.createCommitFile(basePath, "101");
+    HoodieTestDataGenerator.createCommitFile(basePath, "102");
+    HoodieTestDataGenerator.createCommitFile(basePath, "103");
+    HoodieTestDataGenerator.createCommitFile(basePath, "104");
+    HoodieTestDataGenerator.createCommitFile(basePath, "105");
+
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieTimeline timeline =
+        metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
+
+    assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
+
+    HoodieTestUtils.createCleanFiles(basePath, "100");
+    HoodieTestUtils.createCleanFiles(basePath, "101");
+    HoodieTestUtils.createCleanFiles(basePath, "102");
+    HoodieTestUtils.createCleanFiles(basePath, "103");
+    HoodieTestUtils.createCleanFiles(basePath, "104");
+    HoodieTestUtils.createCleanFiles(basePath, "105");
+
+    //reload the timeline and get all the commmits before archive
+    timeline = metadata.getActiveTimeline().reload().getAllCommitsTimeline()
+        .filterCompletedInstants();
+    List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
+
+    assertEquals("Loaded 6 commits and the count should match", 12, timeline.countInstants());
+
+    HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+
+    assertTrue(archiveLog.archiveIfRequired());
+
+    //reload the timeline and remove the remaining commits
+    timeline = metadata.getActiveTimeline().reload().getAllCommitsTimeline()
+        .filterCompletedInstants();
+    originalCommits.removeAll(timeline.getInstants().collect(Collectors.toList()));
+
+    //read the file
+    HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(),
+        new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1")),
+        HoodieArchivedMetaEntry.getClassSchema(), false);
+
+    int archivedRecordsCount = 0;
+    List<IndexedRecord> readRecords = new ArrayList<>();
+    //read the avro blocks and validate the number of records written in each avro block
+    while (reader.hasNext()) {
+      HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
+      List<IndexedRecord> records = blk.getRecords();
+      readRecords.addAll(records);
+      assertEquals("Archived and read records for each block are same", 8, records.size());
+      archivedRecordsCount += records.size();
     }
+    assertEquals("Total archived records and total read records are the same count", 8,
+        archivedRecordsCount);
 
-    @Test
-    public void testArchiveEmptyDataset() throws IOException {
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .forTable("test-trip-table").build();
-        HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
-        boolean result = archiveLog.archiveIfRequired();
-        assertTrue(result);
-    }
+    //make sure the archived commits are the same as the (originalcommits - commitsleft)
+    List<String> readCommits = readRecords.stream().map(r -> (GenericRecord) r).map(r -> {
+      return r.get("commitTime").toString();
+    }).collect(Collectors.toList());
+    Collections.sort(readCommits);
 
-    @Test
-    public void testArchiveDatasetWithArchival() throws IOException {
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
-                .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-                .withCompactionConfig(HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 4).build())
-                .forTable("test-trip-table").build();
-        HoodieTestUtils.init(basePath);
-        HoodieTestDataGenerator.createCommitFile(basePath, "100");
-        HoodieTestDataGenerator.createCommitFile(basePath, "101");
-        HoodieTestDataGenerator.createCommitFile(basePath, "102");
-        HoodieTestDataGenerator.createCommitFile(basePath, "103");
-        HoodieTestDataGenerator.createCommitFile(basePath, "104");
-        HoodieTestDataGenerator.createCommitFile(basePath, "105");
+    assertEquals(
+        "Read commits map should match the originalCommits - commitsLoadedFromArchival",
+        originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()),
+        readCommits);
+  }
 
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieTimeline timeline =
-                metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
+  @Test
+  public void testArchiveDatasetWithNoArchival() throws IOException {
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .forTable("test-trip-table").withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+    HoodieTestDataGenerator.createCommitFile(basePath, "100");
+    HoodieTestDataGenerator.createCommitFile(basePath, "101");
+    HoodieTestDataGenerator.createCommitFile(basePath, "102");
+    HoodieTestDataGenerator.createCommitFile(basePath, "103");
 
-        assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
+    HoodieTimeline timeline =
+        metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
 
-        HoodieTestUtils.createCleanFiles(basePath, "100");
-        HoodieTestUtils.createCleanFiles(basePath, "101");
-        HoodieTestUtils.createCleanFiles(basePath, "102");
-        HoodieTestUtils.createCleanFiles(basePath, "103");
-        HoodieTestUtils.createCleanFiles(basePath, "104");
-        HoodieTestUtils.createCleanFiles(basePath, "105");
+    assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants());
+    boolean result = archiveLog.archiveIfRequired();
+    assertTrue(result);
+    timeline =
+        metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline()
+            .filterCompletedInstants();
+    assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4,
+        timeline.countInstants());
+  }
 
-        //reload the timeline and get all the commmits before archive
-        timeline = metadata.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
-        List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
+  @Test
+  public void testArchiveCommitSafety() throws IOException {
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .forTable("test-trip-table").withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+    HoodieTestDataGenerator.createCommitFile(basePath, "100");
+    HoodieTestDataGenerator.createCommitFile(basePath, "101");
+    HoodieTestDataGenerator.createCommitFile(basePath, "102");
+    HoodieTestDataGenerator.createCommitFile(basePath, "103");
+    HoodieTestDataGenerator.createCommitFile(basePath, "104");
+    HoodieTestDataGenerator.createCommitFile(basePath, "105");
 
-        assertEquals("Loaded 6 commits and the count should match", 12, timeline.countInstants());
+    HoodieTimeline timeline =
+        metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
+    assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
+    boolean result = archiveLog.archiveIfRequired();
+    assertTrue(result);
+    timeline =
+        metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline()
+            .filterCompletedInstants();
+    assertTrue("Archived commits should always be safe",
+        timeline.containsOrBeforeTimelineStarts("100"));
+    assertTrue("Archived commits should always be safe",
+        timeline.containsOrBeforeTimelineStarts("101"));
+    assertTrue("Archived commits should always be safe",
+        timeline.containsOrBeforeTimelineStarts("102"));
+    assertTrue("Archived commits should always be safe",
+        timeline.containsOrBeforeTimelineStarts("103"));
+  }
 
-        HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+  @Test
+  public void testArchiveCommitSavepointNoHole() throws IOException {
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .forTable("test-trip-table").withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
+    HoodieTestDataGenerator.createCommitFile(basePath, "100");
+    HoodieTestDataGenerator.createCommitFile(basePath, "101");
+    HoodieTestDataGenerator.createSavepointFile(basePath, "101");
+    HoodieTestDataGenerator.createCommitFile(basePath, "102");
+    HoodieTestDataGenerator.createCommitFile(basePath, "103");
+    HoodieTestDataGenerator.createCommitFile(basePath, "104");
+    HoodieTestDataGenerator.createCommitFile(basePath, "105");
 
-        assertTrue(archiveLog.archiveIfRequired());
-
-        //reload the timeline and remove the remaining commits
-        timeline = metadata.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
-        originalCommits.removeAll(timeline.getInstants().collect(Collectors.toList()));
-
-        //read the file
-        HoodieLogFormat.Reader reader = HoodieLogFormat.newReader(FSUtils.getFs(),
-                new HoodieLogFile(new Path(basePath + "/.hoodie/.commits_.archive.1")), HoodieArchivedMetaEntry.getClassSchema(), false);
-
-        int archivedRecordsCount = 0;
-        List<IndexedRecord> readRecords = new ArrayList<>();
-        //read the avro blocks and validate the number of records written in each avro block
-        while(reader.hasNext()) {
-            HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-            List<IndexedRecord> records = blk.getRecords();
-            readRecords.addAll(records);
-            assertEquals("Archived and read records for each block are same", 8, records.size());
-            archivedRecordsCount += records.size();
-        }
-        assertEquals("Total archived records and total read records are the same count", 8, archivedRecordsCount);
-
-        //make sure the archived commits are the same as the (originalcommits - commitsleft)
-        List<String> readCommits = readRecords.stream().map(r -> (GenericRecord)r).map(r -> {
-            return r.get("commitTime").toString();
-        }).collect(Collectors.toList());
-        Collections.sort(readCommits);
-
-        assertEquals(
-                "Read commits map should match the originalCommits - commitsLoadedFromArchival",
-                originalCommits.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()),
-                readCommits);
-    }
-
-    @Test
-    public void testArchiveDatasetWithNoArchival() throws IOException {
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .forTable("test-trip-table").withCompactionConfig(
-                HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
-        HoodieTestDataGenerator.createCommitFile(basePath, "100");
-        HoodieTestDataGenerator.createCommitFile(basePath, "101");
-        HoodieTestDataGenerator.createCommitFile(basePath, "102");
-        HoodieTestDataGenerator.createCommitFile(basePath, "103");
-
-        HoodieTimeline timeline =
-            metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-
-        assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants());
-        boolean result = archiveLog.archiveIfRequired();
-        assertTrue(result);
-        timeline =
-            metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4,
-            timeline.countInstants());
-    }
-
-    @Test
-    public void testArchiveCommitSafety() throws IOException {
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .forTable("test-trip-table").withCompactionConfig(
-                HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
-        HoodieTestDataGenerator.createCommitFile(basePath, "100");
-        HoodieTestDataGenerator.createCommitFile(basePath, "101");
-        HoodieTestDataGenerator.createCommitFile(basePath, "102");
-        HoodieTestDataGenerator.createCommitFile(basePath, "103");
-        HoodieTestDataGenerator.createCommitFile(basePath, "104");
-        HoodieTestDataGenerator.createCommitFile(basePath, "105");
-
-        HoodieTimeline timeline =
-            metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
-        boolean result = archiveLog.archiveIfRequired();
-        assertTrue(result);
-        timeline =
-            metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        assertTrue("Archived commits should always be safe",
-            timeline.containsOrBeforeTimelineStarts("100"));
-        assertTrue("Archived commits should always be safe",
-            timeline.containsOrBeforeTimelineStarts("101"));
-        assertTrue("Archived commits should always be safe",
-            timeline.containsOrBeforeTimelineStarts("102"));
-        assertTrue("Archived commits should always be safe",
-            timeline.containsOrBeforeTimelineStarts("103"));
-    }
-
-    @Test
-    public void testArchiveCommitSavepointNoHole() throws IOException {
-        HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .forTable("test-trip-table").withCompactionConfig(
-                HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 5).build()).build();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, fs);
-        HoodieTestDataGenerator.createCommitFile(basePath, "100");
-        HoodieTestDataGenerator.createCommitFile(basePath, "101");
-        HoodieTestDataGenerator.createSavepointFile(basePath, "101");
-        HoodieTestDataGenerator.createCommitFile(basePath, "102");
-        HoodieTestDataGenerator.createCommitFile(basePath, "103");
-        HoodieTestDataGenerator.createCommitFile(basePath, "104");
-        HoodieTestDataGenerator.createCommitFile(basePath, "105");
-
-        HoodieTimeline timeline =
-            metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
-        boolean result = archiveLog.archiveIfRequired();
-        assertTrue(result);
-        timeline =
-            metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-        assertEquals(
-            "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)",
-            5, timeline.countInstants());
-        assertTrue("Archived commits should always be safe",
-            timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")));
-        assertTrue("Archived commits should always be safe",
-            timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
-        assertTrue("Archived commits should always be safe",
-            timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
-    }
+    HoodieTimeline timeline =
+        metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
+    assertEquals("Loaded 6 commits and the count should match", 6, timeline.countInstants());
+    boolean result = archiveLog.archiveIfRequired();
+    assertTrue(result);
+    timeline =
+        metadata.getActiveTimeline().reload().getCommitsAndCompactionsTimeline()
+            .filterCompletedInstants();
+    assertEquals(
+        "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)",
+        5, timeline.countInstants());
+    assertTrue("Archived commits should always be safe",
+        timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")));
+    assertTrue("Archived commits should always be safe",
+        timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")));
+    assertTrue("Archived commits should always be safe",
+        timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")));
+  }
 
 
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCompactor.java b/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCompactor.java
index 784e35c37..40ebc1829 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCompactor.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/io/TestHoodieCompactor.java
@@ -16,7 +16,9 @@
 
 package com.uber.hoodie.io;
 
-import com.uber.hoodie.HoodieReadClient;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.HoodieWriteClient;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.HoodieClientTestUtils;
@@ -34,13 +36,16 @@ import com.uber.hoodie.config.HoodieCompactionConfig;
 import com.uber.hoodie.config.HoodieIndexConfig;
 import com.uber.hoodie.config.HoodieStorageConfig;
 import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.index.bloom.HoodieBloomIndex;
 import com.uber.hoodie.index.HoodieIndex;
+import com.uber.hoodie.index.bloom.HoodieBloomIndex;
 import com.uber.hoodie.io.compact.HoodieCompactor;
 import com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.junit.After;
@@ -48,161 +53,154 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class TestHoodieCompactor {
-    private transient JavaSparkContext jsc = null;
-    private String basePath = null;
-    private HoodieCompactor compactor;
-    private transient HoodieTestDataGenerator dataGen = null;
 
-    @Before
-    public void init() throws IOException {
-        // Initialize a local spark env
-        jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieCompactor"));
+  private transient JavaSparkContext jsc = null;
+  private String basePath = null;
+  private HoodieCompactor compactor;
+  private transient HoodieTestDataGenerator dataGen = null;
 
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
+  @Before
+  public void init() throws IOException {
+    // Initialize a local spark env
+    jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieCompactor"));
 
-        dataGen = new HoodieTestDataGenerator();
-        compactor = new HoodieRealtimeTableCompactor();
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
+
+    dataGen = new HoodieTestDataGenerator();
+    compactor = new HoodieRealtimeTableCompactor();
+  }
+
+  @After
+  public void clean() {
+    if (basePath != null) {
+      new File(basePath).delete();
     }
-
-    @After
-    public void clean() {
-        if (basePath != null) {
-            new File(basePath).delete();
-        }
-        if (jsc != null) {
-            jsc.stop();
-        }
+    if (jsc != null) {
+      jsc.stop();
     }
+  }
 
-    private HoodieWriteConfig getConfig() {
-        return getConfigBuilder().build();
-    }
+  private HoodieWriteConfig getConfig() {
+    return getConfigBuilder().build();
+  }
 
-    private HoodieWriteConfig.Builder getConfigBuilder() {
-        return HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .withCompactionConfig(
-                HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
-                    .withInlineCompaction(false).build())
-            .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
-            .forTable("test-trip-table").withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
-    }
+  private HoodieWriteConfig.Builder getConfigBuilder() {
+    return HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
+                .withInlineCompaction(false).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
+        .forTable("test-trip-table").withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
+  }
 
-    @Test(expected = IllegalArgumentException.class)
-    public void testCompactionOnCopyOnWriteFail() throws Exception {
-        HoodieTestUtils.initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
+  @Test(expected = IllegalArgumentException.class)
+  public void testCompactionOnCopyOnWriteFail() throws Exception {
+    HoodieTestUtils.initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
 
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig());
 
+    compactor.compact(jsc, getConfig(), table);
+  }
+
+  @Test
+  public void testCompactionEmpty() throws Exception {
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieWriteConfig config = getConfig();
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
+    HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
+
+    String newCommitTime = writeClient.startCommit();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
+    JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
+    writeClient.insert(recordsRDD, newCommitTime).collect();
+
+    HoodieCompactionMetadata result =
         compactor.compact(jsc, getConfig(), table);
+    String basePath = table.getMetaClient().getBasePath();
+    assertTrue("If there is nothing to compact, result will be empty",
+        result.getFileIdAndFullPaths(basePath).isEmpty());
+  }
+
+  @Test
+  public void testLogFileCountsAfterCompaction() throws Exception {
+    FileSystem fs = FSUtils.getFs();
+    // insert 100 records
+    HoodieWriteConfig config = getConfig();
+    HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
+    String newCommitTime = "100";
+    writeClient.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
+    JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
+    List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
+
+    // Update all the 100 records
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
+
+    newCommitTime = "101";
+    writeClient.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> updatedRecords = dataGen.generateUpdates(newCommitTime, records);
+    JavaRDD<HoodieRecord> updatedRecordsRDD = jsc.parallelize(updatedRecords, 1);
+    HoodieIndex index = new HoodieBloomIndex<>(config, jsc);
+    updatedRecords = index.tagLocation(updatedRecordsRDD, table).collect();
+
+    // Write them to corresponding avro logfiles
+    HoodieTestUtils
+        .writeRecordsToLogFiles(metaClient.getBasePath(), HoodieTestDataGenerator.avroSchema,
+            updatedRecords);
+
+    // Verify that all data file has one log file
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, config);
+    for (String partitionPath : dataGen.getPartitionPaths()) {
+      List<FileSlice> groupedLogFiles =
+          table.getRTFileSystemView().getLatestFileSlices(partitionPath)
+              .collect(Collectors.toList());
+      for (FileSlice fileSlice : groupedLogFiles) {
+        assertEquals("There should be 1 log file written for every data file", 1,
+            fileSlice.getLogFiles().count());
+      }
     }
 
-    @Test
-    public void testCompactionEmpty() throws Exception {
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieWriteConfig config = getConfig();
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
-        HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
+    // Do a compaction
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, config);
 
-        String newCommitTime = writeClient.startCommit();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
-        JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
-        writeClient.insert(recordsRDD, newCommitTime).collect();
+    HoodieCompactionMetadata result =
+        compactor.compact(jsc, getConfig(), table);
 
-        HoodieCompactionMetadata result =
-            compactor.compact(jsc, getConfig(), table);
-        String basePath = table.getMetaClient().getBasePath();
-        assertTrue("If there is nothing to compact, result will be empty",
-            result.getFileIdAndFullPaths(basePath).isEmpty());
+    // Verify that recently written compacted data file has no log file
+    metaClient = new HoodieTableMetaClient(fs, basePath);
+    table = HoodieTable.getHoodieTable(metaClient, config);
+    HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
+
+    assertTrue("Compaction commit should be > than last insert",
+        HoodieTimeline.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime,
+            HoodieTimeline.GREATER));
+
+    for (String partitionPath : dataGen.getPartitionPaths()) {
+      List<FileSlice> groupedLogFiles = table.getRTFileSystemView()
+          .getLatestFileSlices(partitionPath)
+          .collect(Collectors.toList());
+      for (FileSlice slice : groupedLogFiles) {
+        assertTrue(
+            "After compaction there should be no log files visiable on a Realtime view",
+            slice.getLogFiles().collect(Collectors.toList()).isEmpty());
+      }
+      assertTrue(result.getPartitionToCompactionWriteStats().containsKey(partitionPath));
     }
+  }
 
-    @Test
-    public void testLogFileCountsAfterCompaction() throws Exception {
-        FileSystem fs = FSUtils.getFs();
-        // insert 100 records
-        HoodieWriteConfig config = getConfig();
-        HoodieWriteClient writeClient = new HoodieWriteClient(jsc, config);
-        String newCommitTime = "100";
-        writeClient.startCommitWithTime(newCommitTime);
-
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
-        JavaRDD<HoodieRecord> recordsRDD = jsc.parallelize(records, 1);
-        List<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime).collect();
-
-        // Update all the 100 records
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
-
-        newCommitTime = "101";
-        writeClient.startCommitWithTime(newCommitTime);
-
-        List<HoodieRecord> updatedRecords = dataGen.generateUpdates(newCommitTime, records);
-        JavaRDD<HoodieRecord> updatedRecordsRDD = jsc.parallelize(updatedRecords, 1);
-        HoodieIndex index = new HoodieBloomIndex<>(config, jsc);
-        updatedRecords = index.tagLocation(updatedRecordsRDD, table).collect();
-
-        // Write them to corresponding avro logfiles
-        HoodieTestUtils
-            .writeRecordsToLogFiles(metaClient.getBasePath(), HoodieTestDataGenerator.avroSchema,
-                updatedRecords);
-
-        // Verify that all data file has one log file
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, config);
-        for (String partitionPath : dataGen.getPartitionPaths()) {
-            List<FileSlice> groupedLogFiles =
-                table.getRTFileSystemView().getLatestFileSlices(partitionPath)
-                    .collect(Collectors.toList());
-            for (FileSlice fileSlice : groupedLogFiles) {
-                assertEquals("There should be 1 log file written for every data file", 1,
-                    fileSlice.getLogFiles().count());
-            }
-        }
-
-        // Do a compaction
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, config);
-
-        HoodieCompactionMetadata result =
-            compactor.compact(jsc, getConfig(), table);
-
-        // Verify that recently written compacted data file has no log file
-        metaClient = new HoodieTableMetaClient(fs, basePath);
-        table = HoodieTable.getHoodieTable(metaClient, config);
-        HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
-
-        assertTrue("Compaction commit should be > than last insert",
-                HoodieTimeline.compareTimestamps(timeline.lastInstant().get().getTimestamp(), newCommitTime,
-                HoodieTimeline.GREATER));
-
-        for (String partitionPath : dataGen.getPartitionPaths()) {
-            List<FileSlice> groupedLogFiles = table.getRTFileSystemView()
-                    .getLatestFileSlices(partitionPath)
-                    .collect(Collectors.toList());
-            for (FileSlice slice: groupedLogFiles) {
-                assertTrue(
-                    "After compaction there should be no log files visiable on a Realtime view",
-                    slice.getLogFiles().collect(Collectors.toList()).isEmpty());
-            }
-            assertTrue(result.getPartitionToCompactionWriteStats().containsKey(partitionPath));
-        }
-    }
-
-    // TODO - after modifying HoodieReadClient to support realtime tables - add more tests to make sure the data read is the updated data (compaction correctness)
-    // TODO - add more test cases for compactions after a failed commit/compaction
+  // TODO - after modifying HoodieReadClient to support realtime tables - add more tests to make sure the data read is the updated data (compaction correctness)
+  // TODO - add more test cases for compactions after a failed commit/compaction
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java
index cc1a1219c..c01e21522 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieCompactionStrategy.java
@@ -17,12 +17,10 @@
 package com.uber.hoodie.io.strategy;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.beust.jcommander.internal.Lists;
 import com.google.common.collect.Maps;
-
 import com.uber.hoodie.config.HoodieCompactionConfig;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.compact.CompactionOperation;
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieDataFile.java b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieDataFile.java
index 6d6219ff6..564d95218 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieDataFile.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieDataFile.java
@@ -17,9 +17,7 @@
 package com.uber.hoodie.io.strategy;
 
 import com.uber.hoodie.common.model.HoodieDataFile;
-import com.uber.hoodie.common.util.FSUtils;
 import java.util.UUID;
-import org.apache.hadoop.fs.FileStatus;
 
 public class TestHoodieDataFile extends HoodieDataFile {
 
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieLogFile.java b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieLogFile.java
index 0d2a2bd68..d23cbf27c 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieLogFile.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/io/strategy/TestHoodieLogFile.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.io.strategy;
 
 import com.uber.hoodie.common.model.HoodieLogFile;
 import java.util.Optional;
-
 import org.apache.hadoop.fs.Path;
 
 public class TestHoodieLogFile extends HoodieLogFile {
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/metrics/TestHoodieMetrics.java b/hoodie-client/src/test/java/com/uber/hoodie/metrics/TestHoodieMetrics.java
index 7e33ad579..911f97417 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/metrics/TestHoodieMetrics.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/metrics/TestHoodieMetrics.java
@@ -16,30 +16,31 @@
 
 package com.uber.hoodie.metrics;
 
-import com.uber.hoodie.config.HoodieWriteConfig;
-
-import org.apache.commons.configuration.ConfigurationException;
-import org.junit.Before;
-import org.junit.Test;
-
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import com.uber.hoodie.config.HoodieWriteConfig;
+import org.apache.commons.configuration.ConfigurationException;
+import org.junit.Before;
+import org.junit.Test;
+
 public class TestHoodieMetrics {
-    private HoodieMetrics metrics = null;
 
-    @Before
-    public void start() throws ConfigurationException {
-        HoodieWriteConfig config = mock(HoodieWriteConfig.class);
-        when(config.isMetricsOn()).thenReturn(true);
-        when(config.getMetricsReporterType()).thenReturn(MetricsReporterType.INMEMORY);
-        metrics = new HoodieMetrics(config, "raw_table");
-    }
+  private HoodieMetrics metrics = null;
 
-    @Test
-    public void testRegisterGauge() {
-        metrics.registerGauge("metric1", 123L);
-        assertTrue(Metrics.getInstance().getRegistry().getGauges().get("metric1").getValue().toString().equals("123"));
-    }
+  @Before
+  public void start() throws ConfigurationException {
+    HoodieWriteConfig config = mock(HoodieWriteConfig.class);
+    when(config.isMetricsOn()).thenReturn(true);
+    when(config.getMetricsReporterType()).thenReturn(MetricsReporterType.INMEMORY);
+    metrics = new HoodieMetrics(config, "raw_table");
+  }
+
+  @Test
+  public void testRegisterGauge() {
+    metrics.registerGauge("metric1", 123L);
+    assertTrue(Metrics.getInstance().getRegistry().getGauges().get("metric1").getValue().toString()
+        .equals("123"));
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/table/TestCopyOnWriteTable.java b/hoodie-client/src/test/java/com/uber/hoodie/table/TestCopyOnWriteTable.java
index 020166d5a..a98b76838 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/table/TestCopyOnWriteTable.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/table/TestCopyOnWriteTable.java
@@ -16,26 +16,37 @@
 
 package com.uber.hoodie.table;
 
-import com.uber.hoodie.common.TestRawTripPayload.MetadataMergeWriteStatus;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.config.HoodieWriteConfig;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.BloomFilter;
 import com.uber.hoodie.common.HoodieClientTestUtils;
 import com.uber.hoodie.common.HoodieTestDataGenerator;
 import com.uber.hoodie.common.TestRawTripPayload;
+import com.uber.hoodie.common.TestRawTripPayload.MetadataMergeWriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieTestUtils;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.ParquetUtils;
-
 import com.uber.hoodie.config.HoodieCompactionConfig;
-import com.uber.hoodie.io.HoodieCreateHandle;
 import com.uber.hoodie.config.HoodieStorageConfig;
+import com.uber.hoodie.config.HoodieWriteConfig;
+import com.uber.hoodie.io.HoodieCreateHandle;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
+import java.util.UUID;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FileSystem;
@@ -47,424 +58,452 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
-
-import java.io.File;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.UUID;
-
 import scala.Option;
 import scala.Tuple2;
 
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
 public class TestCopyOnWriteTable {
-    private String basePath = null;
-    private transient JavaSparkContext jsc = null;
 
-    @Before
-    public void init() throws Exception {
+  private String basePath = null;
+  private transient JavaSparkContext jsc = null;
 
-        // Initialize a local spark env
-        jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestCopyOnWriteTable"));
+  @Before
+  public void init() throws Exception {
 
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        this.basePath = folder.getRoot().getAbsolutePath();
-        HoodieTestUtils.init(basePath);
+    // Initialize a local spark env
+    jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestCopyOnWriteTable"));
+
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    this.basePath = folder.getRoot().getAbsolutePath();
+    HoodieTestUtils.init(basePath);
+  }
+
+  @Test
+  public void testMakeNewPath() throws Exception {
+    String fileName = UUID.randomUUID().toString();
+    String partitionPath = "2016/05/04";
+    int unitNumber = (int) (Math.random() * 10);
+    HoodieRecord record = mock(HoodieRecord.class);
+    when(record.getPartitionPath()).thenReturn(partitionPath);
+
+    String commitTime = HoodieTestUtils.makeNewCommitTime();
+    HoodieWriteConfig config = makeHoodieClientConfig();
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
+
+    HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath);
+    Path newPath = io.makeNewPath(record.getPartitionPath(), unitNumber, fileName);
+    assertTrue(newPath.toString().equals(this.basePath + "/" + partitionPath + "/" + FSUtils
+        .makeDataFileName(commitTime, unitNumber, fileName)));
+  }
+
+  private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
+    return makeHoodieClientConfigBuilder().build();
+  }
+
+  private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception {
+    // Prepare the AvroParquetIO
+    String schemaStr = IOUtils
+        .toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8");
+    return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr);
+  }
+
+  // TODO (weiy): Add testcases for crossing file writing.
+  @Test
+  public void testUpdateRecords() throws Exception {
+    // Prepare the AvroParquetIO
+    HoodieWriteConfig config = makeHoodieClientConfig();
+    String firstCommitTime = HoodieTestUtils.makeNewCommitTime();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+
+    String partitionPath = "/2016/01/31";
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    // Get some records belong to the same partition (2016/01/31)
+    String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    String recordStr4 = "{\"_row_key\":\"8eb5b87d-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":51}";
+
+    List<HoodieRecord> records = new ArrayList<>();
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
+            rowChange1));
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
+            rowChange2));
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
+            rowChange3));
+
+    // Insert new records
+    HoodieClientTestUtils.collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
+    // We should have a parquet file generated (TODO: better control # files after we revise AvroParquetIO)
+    File parquetFile = null;
+    for (File file : new File(this.basePath + partitionPath).listFiles()) {
+      if (file.getName().endsWith(".parquet")) {
+        parquetFile = file;
+        break;
+      }
+    }
+    assertTrue(parquetFile != null);
+
+    // Read out the bloom filter and make sure filter can answer record exist or not
+    Path parquetFilePath = new Path(parquetFile.getAbsolutePath());
+    BloomFilter filter = ParquetUtils.readBloomFilterFromParquetMetadata(parquetFilePath);
+    for (HoodieRecord record : records) {
+      assertTrue(filter.mightContain(record.getRecordKey()));
+    }
+    // Create a commit file
+    new File(this.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
+        + FSUtils.getCommitTime(parquetFile.getName()) + ".commit").createNewFile();
+
+    // Read the parquet file, check the record content
+    List<GenericRecord> fileRecords = ParquetUtils.readAvroRecords(parquetFilePath);
+    GenericRecord newRecord;
+    int index = 0;
+    for (GenericRecord record : fileRecords) {
+      assertTrue(record.get("_row_key").toString().equals(records.get(index).getRecordKey()));
+      index++;
     }
 
-    @Test
-    public void testMakeNewPath() throws Exception {
-        String fileName = UUID.randomUUID().toString();
-        String partitionPath = "2016/05/04";
-        int unitNumber = (int) (Math.random() * 10);
-        HoodieRecord record = mock(HoodieRecord.class);
-        when(record.getPartitionPath()).thenReturn(partitionPath);
+    // We update the 1st record & add a new record
+    String updateRecordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+    TestRawTripPayload updateRowChanges1 = new TestRawTripPayload(updateRecordStr1);
+    HoodieRecord updatedRecord1 = new HoodieRecord(
+        new HoodieKey(updateRowChanges1.getRowKey(), updateRowChanges1.getPartitionPath()),
+        updateRowChanges1);
+    updatedRecord1.setCurrentLocation(
+        new HoodieRecordLocation(null, FSUtils.getFileId(parquetFile.getName())));
 
-        String commitTime = HoodieTestUtils.makeNewCommitTime();
-        HoodieWriteConfig config = makeHoodieClientConfig();
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, config);
+    TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
+    HoodieRecord insertedRecord1 = new HoodieRecord(
+        new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
-        HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath);
-        Path newPath = io.makeNewPath(record.getPartitionPath(), unitNumber, fileName);
-        assertTrue(newPath.toString().equals(this.basePath + "/" + partitionPath + "/" + FSUtils
-                .makeDataFileName(commitTime, unitNumber, fileName)));
-    }
+    List<HoodieRecord> updatedRecords = Arrays.asList(updatedRecord1, insertedRecord1);
 
-    private HoodieWriteConfig makeHoodieClientConfig() throws Exception {
-        return makeHoodieClientConfigBuilder().build();
-    }
+    Thread.sleep(1000);
+    String newCommitTime = HoodieTestUtils.makeNewCommitTime();
+    metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    table = new HoodieCopyOnWriteTable(config, metadata);
+    Iterator<List<WriteStatus>> iter = table
+        .handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(),
+            updatedRecords.iterator());
 
-    private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() throws Exception {
-        // Prepare the AvroParquetIO
-        String schemaStr = IOUtils.toString(getClass().getResourceAsStream("/exampleSchema.txt"), "UTF-8");
-        return HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(schemaStr);
-    }
-
-    // TODO (weiy): Add testcases for crossing file writing.
-    @Test
-    public void testUpdateRecords() throws Exception {
-        // Prepare the AvroParquetIO
-        HoodieWriteConfig config = makeHoodieClientConfig();
-        String firstCommitTime = HoodieTestUtils.makeNewCommitTime();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-
-        String partitionPath = "/2016/01/31";
-        HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
-
-        // Get some records belong to the same partition (2016/01/31)
-        String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        String recordStr4 = "{\"_row_key\":\"8eb5b87d-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":51}";
-
-        List<HoodieRecord> records = new ArrayList<>();
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
-
-        // Insert new records
-        HoodieClientTestUtils.collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
-        // We should have a parquet file generated (TODO: better control # files after we revise AvroParquetIO)
-        File parquetFile = null;
-        for (File file : new File(this.basePath + partitionPath).listFiles()) {
-            if (file.getName().endsWith(".parquet")) {
-                parquetFile = file;
-                break;
-            }
+    // Check the updated file
+    File updatedParquetFile = null;
+    for (File file : new File(basePath + "/2016/01/31").listFiles()) {
+      if (file.getName().endsWith(".parquet")) {
+        if (FSUtils.getFileId(file.getName())
+            .equals(FSUtils.getFileId(parquetFile.getName())) &&
+            HoodieTimeline.compareTimestamps(FSUtils.getCommitTime(file.getName()),
+                FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
+          updatedParquetFile = file;
+          break;
         }
-        assertTrue(parquetFile != null);
-
-        // Read out the bloom filter and make sure filter can answer record exist or not
-        Path parquetFilePath = new Path(parquetFile.getAbsolutePath());
-        BloomFilter filter = ParquetUtils.readBloomFilterFromParquetMetadata(parquetFilePath);
-        for (HoodieRecord record : records) {
-            assertTrue(filter.mightContain(record.getRecordKey()));
-        }
-        // Create a commit file
-        new File(this.basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
-                + FSUtils.getCommitTime(parquetFile.getName()) + ".commit").createNewFile();
-
-        // Read the parquet file, check the record content
-        List<GenericRecord> fileRecords = ParquetUtils.readAvroRecords(parquetFilePath);
-        GenericRecord newRecord;
-        int index = 0;
-        for (GenericRecord record: fileRecords) {
-            assertTrue(record.get("_row_key").toString().equals(records.get(index).getRecordKey()));
-            index++;
-        }
-
-        // We update the 1st record & add a new record
-        String updateRecordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-        TestRawTripPayload updateRowChanges1 = new TestRawTripPayload(updateRecordStr1);
-        HoodieRecord updatedRecord1 = new HoodieRecord(new HoodieKey(updateRowChanges1.getRowKey(), updateRowChanges1.getPartitionPath()), updateRowChanges1);
-        updatedRecord1.setCurrentLocation(new HoodieRecordLocation(null, FSUtils.getFileId(parquetFile.getName())));
-
-        TestRawTripPayload rowChange4 = new TestRawTripPayload(recordStr4);
-        HoodieRecord insertedRecord1 = new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
-
-        List<HoodieRecord> updatedRecords = Arrays.asList(updatedRecord1, insertedRecord1);
-
-        Thread.sleep(1000);
-        String newCommitTime = HoodieTestUtils.makeNewCommitTime();
-        metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        table = new HoodieCopyOnWriteTable(config, metadata);
-        Iterator<List<WriteStatus>> iter = table.handleUpdate(newCommitTime, updatedRecord1.getCurrentLocation().getFileId(), updatedRecords.iterator());
-
-        // Check the updated file
-        File updatedParquetFile = null;
-        for (File file : new File(basePath + "/2016/01/31").listFiles()) {
-            if (file.getName().endsWith(".parquet")) {
-                if (FSUtils.getFileId(file.getName())
-                    .equals(FSUtils.getFileId(parquetFile.getName())) &&
-                        HoodieTimeline.compareTimestamps(FSUtils.getCommitTime(file.getName()),
-                        FSUtils.getCommitTime(parquetFile.getName()), HoodieTimeline.GREATER)) {
-                    updatedParquetFile = file;
-                    break;
-                }
-            }
-        }
-        assertTrue(updatedParquetFile != null);
-        // Check whether the record has been updated
-        Path updatedParquetFilePath = new Path(updatedParquetFile.getAbsolutePath());
-        BloomFilter updatedFilter = ParquetUtils.readBloomFilterFromParquetMetadata(updatedParquetFilePath);
-        for (HoodieRecord record : records) {
-            // No change to the _row_key
-            assertTrue(updatedFilter.mightContain(record.getRecordKey()));
-        }
-
-        assertTrue(updatedFilter.mightContain(insertedRecord1.getRecordKey()));
-        records.add(insertedRecord1);// add this so it can further check below
-
-        ParquetReader updatedReader = ParquetReader.builder(new AvroReadSupport<>(), updatedParquetFilePath).build();
-        index = 0;
-        while ((newRecord = (GenericRecord) updatedReader.read()) != null) {
-            assertTrue(newRecord.get("_row_key").toString().equals(records.get(index).getRecordKey()));
-            if (index == 0) {
-                assertTrue(newRecord.get("number").toString().equals("15"));
-            }
-            index++;
-        }
-        updatedReader.close();
-        // Also check the numRecordsWritten
-        List<WriteStatus> statuses = HoodieClientTestUtils.collectStatuses(iter);
-        WriteStatus writeStatus = statuses.get(0);
-        assertTrue("Should be only one file generated", statuses.size() == 1);
-        assertEquals(4, writeStatus.getStat().getNumWrites());//3 rewritten records + 1 new record
+      }
+    }
+    assertTrue(updatedParquetFile != null);
+    // Check whether the record has been updated
+    Path updatedParquetFilePath = new Path(updatedParquetFile.getAbsolutePath());
+    BloomFilter updatedFilter = ParquetUtils
+        .readBloomFilterFromParquetMetadata(updatedParquetFilePath);
+    for (HoodieRecord record : records) {
+      // No change to the _row_key
+      assertTrue(updatedFilter.mightContain(record.getRecordKey()));
     }
 
+    assertTrue(updatedFilter.mightContain(insertedRecord1.getRecordKey()));
+    records.add(insertedRecord1);// add this so it can further check below
 
-    private List<HoodieRecord> newHoodieRecords(int n, String time) throws Exception {
-        List<HoodieRecord> records = new ArrayList<>();
-        for (int i = 0; i < n; i++) {
-            String recordStr = String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}",
-                    UUID.randomUUID().toString(),
-                    time,
-                    i);
-            TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
-            records.add(new HoodieRecord(
-                    new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()),
-                    rowChange));
-        }
-        return records;
+    ParquetReader updatedReader = ParquetReader
+        .builder(new AvroReadSupport<>(), updatedParquetFilePath).build();
+    index = 0;
+    while ((newRecord = (GenericRecord) updatedReader.read()) != null) {
+      assertTrue(newRecord.get("_row_key").toString().equals(records.get(index).getRecordKey()));
+      if (index == 0) {
+        assertTrue(newRecord.get("number").toString().equals("15"));
+      }
+      index++;
+    }
+    updatedReader.close();
+    // Also check the numRecordsWritten
+    List<WriteStatus> statuses = HoodieClientTestUtils.collectStatuses(iter);
+    WriteStatus writeStatus = statuses.get(0);
+    assertTrue("Should be only one file generated", statuses.size() == 1);
+    assertEquals(4, writeStatus.getStat().getNumWrites());//3 rewritten records + 1 new record
+  }
+
+
+  private List<HoodieRecord> newHoodieRecords(int n, String time) throws Exception {
+    List<HoodieRecord> records = new ArrayList<>();
+    for (int i = 0; i < n; i++) {
+      String recordStr = String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}",
+          UUID.randomUUID().toString(),
+          time,
+          i);
+      TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
+      records.add(new HoodieRecord(
+          new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()),
+          rowChange));
+    }
+    return records;
+  }
+
+  // Check if record level metadata is aggregated properly at the end of write.
+  @Test
+  public void testMetadataAggregateFromWriteStatus() throws Exception {
+    // Prepare the AvroParquetIO
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder()
+        .withWriteStatusClass(MetadataMergeWriteStatus.class).build();
+    String firstCommitTime = HoodieTestUtils.makeNewCommitTime();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    // Get some records belong to the same partition (2016/01/31)
+    String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
+    String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
+    String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
+
+    List<HoodieRecord> records = new ArrayList<>();
+    TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()),
+            rowChange1));
+    TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()),
+            rowChange2));
+    TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
+    records.add(
+        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()),
+            rowChange3));
+
+    // Insert new records
+    List<WriteStatus> writeStatuses = HoodieClientTestUtils
+        .collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
+    Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
+        .mergeMetadataForWriteStatuses(writeStatuses);
+    assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
+    // For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this should be 2 * 3
+    assertEquals("6", allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
+  }
+
+  @Test
+  public void testInsertWithPartialFailures() throws Exception {
+    HoodieWriteConfig config = makeHoodieClientConfig();
+    String commitTime = HoodieTestUtils.makeNewCommitTime();
+    FileSystem fs = FSUtils.getFs();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    // Write a few records, and get atleast one file
+    // 10 records for partition 1, 1 record for partition 2.
+    List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
+    records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
+
+    // Simulate crash after first file
+    List<WriteStatus> statuses = HoodieClientTestUtils
+        .collectStatuses(table.handleInsert(commitTime, records.iterator()));
+    WriteStatus status = statuses.get(0);
+    Path partialFile = new Path(String.format("%s/%s/%s",
+        basePath,
+        status.getPartitionPath(),
+        FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))
+    );
+    assertTrue(fs.exists(partialFile));
+
+    // When we retry
+    records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
+    records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
+
+    statuses = HoodieClientTestUtils
+        .collectStatuses(table.handleInsert(commitTime, records.iterator()));
+    status = statuses.get(0);
+
+    Path retriedFIle = new Path(String.format("%s/%s/%s",
+        basePath,
+        status.getPartitionPath(),
+        FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))
+    );
+    assertTrue(fs.exists(retriedFIle));
+    assertFalse(fs.exists(partialFile));
+  }
+
+
+  @Test
+  public void testInsertRecords() throws Exception {
+    HoodieWriteConfig config = makeHoodieClientConfig();
+    String commitTime = HoodieTestUtils.makeNewCommitTime();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    // Case 1:
+    // 10 records for partition 1, 1 record for partition 2.
+    List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
+    records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
+
+    // Insert new records
+    List<WriteStatus> returnedStatuses = HoodieClientTestUtils
+        .collectStatuses(table.handleInsert(commitTime, records.iterator()));
+
+    // TODO: check the actual files and make sure 11 records, total were written.
+    assertEquals(2, returnedStatuses.size());
+    assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
+    assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
+    assertEquals(10, returnedStatuses.get(0).getWrittenRecords().size());
+    assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
+    assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
+    assertEquals(1, returnedStatuses.get(1).getWrittenRecords().size());
+
+    // Case 2:
+    // 1 record for partition 1, 5 record for partition 2, 1 records for partition 3.
+    records = newHoodieRecords(1, "2016-01-31T03:16:41.415Z");
+    records.addAll(newHoodieRecords(5, "2016-02-01T03:16:41.415Z"));
+    records.addAll(newHoodieRecords(1, "2016-02-02T03:16:41.415Z"));
+
+    // Insert new records
+    returnedStatuses = HoodieClientTestUtils
+        .collectStatuses(table.handleInsert(commitTime, records.iterator()));
+
+    assertEquals(3, returnedStatuses.size());
+    assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
+    assertEquals(1, returnedStatuses.get(0).getWrittenRecords().size());
+
+    assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
+    assertEquals(5, returnedStatuses.get(1).getWrittenRecords().size());
+
+    assertEquals("2016/02/02", returnedStatuses.get(2).getPartitionPath());
+    assertEquals(1, returnedStatuses.get(2).getWrittenRecords().size());
+
+  }
+
+  @Test
+  public void testFileSizeUpsertRecords() throws Exception {
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
+        HoodieStorageConfig.newBuilder().limitFileSize(64 * 1024).parquetBlockSize(64 * 1024)
+            .parquetPageSize(64 * 1024).build()).build();
+    String commitTime = HoodieTestUtils.makeNewCommitTime();
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
+
+    List<HoodieRecord> records = new ArrayList<>();
+    // Approx 1150 records are written for block size of 64KB
+    for (int i = 0; i < 2000; i++) {
+      String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString()
+          + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
+      TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
+      records
+          .add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()),
+              rowChange));
     }
 
-    // Check if record level metadata is aggregated properly at the end of write.
-    @Test
-    public void testMetadataAggregateFromWriteStatus() throws Exception {
-        // Prepare the AvroParquetIO
-        HoodieWriteConfig config = makeHoodieClientConfigBuilder().withWriteStatusClass(MetadataMergeWriteStatus.class).build();
-        String firstCommitTime = HoodieTestUtils.makeNewCommitTime();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    // Insert new records
+    HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
 
-        HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
-
-        // Get some records belong to the same partition (2016/01/31)
-        String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
-        String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
-        String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
-
-        List<HoodieRecord> records = new ArrayList<>();
-        TestRawTripPayload rowChange1 = new TestRawTripPayload(recordStr1);
-        records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
-        TestRawTripPayload rowChange2 = new TestRawTripPayload(recordStr2);
-        records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
-        TestRawTripPayload rowChange3 = new TestRawTripPayload(recordStr3);
-        records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
-
-        // Insert new records
-        List<WriteStatus> writeStatuses = HoodieClientTestUtils
-            .collectStatuses(table.handleInsert(firstCommitTime, records.iterator()));
-        Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
-            .mergeMetadataForWriteStatuses(writeStatuses);
-        assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
-        // For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this should be 2 * 3
-        assertEquals("6", allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
+    // Check the updated file
+    int counts = 0;
+    for (File file : new File(basePath + "/2016/01/31").listFiles()) {
+      if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName())
+          .equals(commitTime)) {
+        System.out.println(file.getName() + "-" + file.length());
+        counts++;
+      }
     }
+    assertEquals(
+        "If the number of records are more than 1150, then there should be a new file", 3,
+        counts);
+  }
 
-    @Test
-    public void testInsertWithPartialFailures() throws Exception {
-        HoodieWriteConfig config = makeHoodieClientConfig();
-        String commitTime = HoodieTestUtils.makeNewCommitTime();
-        FileSystem fs = FSUtils.getFs();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, basePath);
-        HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
 
-        // Write a few records, and get atleast one file
-        // 10 records for partition 1, 1 record for partition 2.
-        List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
-        records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
+  private List<HoodieCopyOnWriteTable.InsertBucket> testUpsertPartitioner(int smallFileSize,
+      int numInserts,
+      int numUpdates,
+      int fileSize,
+      boolean autoSplitInserts) throws Exception {
+    final String TEST_PARTITION_PATH = "2016/09/26";
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder()
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .compactionSmallFileSize(smallFileSize).insertSplitSize(100)
+            .autoTuneInsertSplits(autoSplitInserts).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build())
+        .build();
 
-        // Simulate crash after first file
-        List<WriteStatus> statuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
-        WriteStatus status = statuses.get(0);
-        Path partialFile = new Path(String.format("%s/%s/%s",
-                basePath,
-                status.getPartitionPath(),
-                FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))
-        );
-        assertTrue(fs.exists(partialFile));
+    HoodieClientTestUtils.fakeCommitFile(basePath, "001");
+    HoodieClientTestUtils.fakeDataFile(basePath, TEST_PARTITION_PATH, "001", "file1", fileSize);
 
-        // When we retry
-        records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
-        records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
+    HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
 
-        statuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
-        status = statuses.get(0);
-
-        Path retriedFIle = new Path(String.format("%s/%s/%s",
-                basePath,
-                status.getPartitionPath(),
-                FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))
-        );
-        assertTrue(fs.exists(retriedFIle));
-        assertFalse(fs.exists(partialFile));
+    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(
+        new String[]{TEST_PARTITION_PATH});
+    List<HoodieRecord> insertRecords = dataGenerator.generateInserts("001", numInserts);
+    List<HoodieRecord> updateRecords = dataGenerator.generateUpdates("001", numUpdates);
+    for (HoodieRecord updateRec : updateRecords) {
+      updateRec.setCurrentLocation(new HoodieRecordLocation("001", "file1"));
     }
+    List<HoodieRecord> records = new ArrayList<>();
+    records.addAll(insertRecords);
+    records.addAll(updateRecords);
+    WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
+    HoodieCopyOnWriteTable.UpsertPartitioner partitioner = (HoodieCopyOnWriteTable.UpsertPartitioner)
+        table.getUpsertPartitioner(profile);
+
+    assertEquals("Should have 3 partitions", 3, partitioner.numPartitions());
+    assertEquals("Bucket 0 is UPDATE", HoodieCopyOnWriteTable.BucketType.UPDATE,
+        partitioner.getBucketInfo(0).bucketType);
+    assertEquals("Bucket 1 is INSERT", HoodieCopyOnWriteTable.BucketType.INSERT,
+        partitioner.getBucketInfo(1).bucketType);
+    assertEquals("Bucket 2 is INSERT", HoodieCopyOnWriteTable.BucketType.INSERT,
+        partitioner.getBucketInfo(2).bucketType);
+    assertEquals("Update record should have gone to the 1 update partiton", 0,
+        partitioner.getPartition(new Tuple2<>(updateRecords.get(0).getKey(),
+            Option.apply(updateRecords.get(0).getCurrentLocation()))));
+    return partitioner.getInsertBuckets(TEST_PARTITION_PATH);
+  }
 
 
-    @Test public void testInsertRecords() throws Exception {
-        HoodieWriteConfig config = makeHoodieClientConfig();
-        String commitTime = HoodieTestUtils.makeNewCommitTime();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metadata);
-
-        // Case 1:
-        // 10 records for partition 1, 1 record for partition 2.
-        List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
-        records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
-
-        // Insert new records
-        List<WriteStatus> returnedStatuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
+  @Test
+  public void testUpsertPartitioner() throws Exception {
+    // Inserts + Updates... Check all updates go together & inserts subsplit
+    List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = testUpsertPartitioner(0, 200, 100,
+        1024, false);
+    assertEquals("Total of 2 insert buckets", 2, insertBuckets.size());
+  }
 
 
-        // TODO: check the actual files and make sure 11 records, total were written.
-        assertEquals(2, returnedStatuses.size());
-        assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
-        assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
-        assertEquals(10, returnedStatuses.get(0).getWrittenRecords().size());
-        assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
-        assertEquals(0, returnedStatuses.get(0).getFailedRecords().size());
-        assertEquals(1, returnedStatuses.get(1).getWrittenRecords().size());
+  @Test
+  public void testUpsertPartitionerWithSmallInsertHandling() throws Exception {
+    // Inserts + Updates .. Check updates go together & inserts subsplit, after expanding smallest file
+    List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = testUpsertPartitioner(1000 * 1024,
+        400, 100, 800 * 1024, false);
+    assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
+    assertEquals("First insert bucket must be same as update bucket", 0,
+        insertBuckets.get(0).bucketNumber);
+    assertEquals("First insert bucket should have weight 0.5", 0.5, insertBuckets.get(0).weight,
+        0.01);
 
-        // Case 2:
-        // 1 record for partition 1, 5 record for partition 2, 1 records for partition 3.
-        records = newHoodieRecords(1, "2016-01-31T03:16:41.415Z");
-        records.addAll(newHoodieRecords(5, "2016-02-01T03:16:41.415Z"));
-        records.addAll(newHoodieRecords(1, "2016-02-02T03:16:41.415Z"));
-
-        // Insert new records
-        returnedStatuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
-
-        assertEquals(3, returnedStatuses.size());
-        assertEquals("2016/01/31", returnedStatuses.get(0).getPartitionPath());
-        assertEquals(1, returnedStatuses.get(0).getWrittenRecords().size());
-
-        assertEquals("2016/02/01", returnedStatuses.get(1).getPartitionPath());
-        assertEquals(5, returnedStatuses.get(1).getWrittenRecords().size());
-
-        assertEquals("2016/02/02", returnedStatuses.get(2).getPartitionPath());
-        assertEquals(1, returnedStatuses.get(2).getWrittenRecords().size());
+    // Now with insert split size auto tuned
+    insertBuckets = testUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, true);
+    assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
+    assertEquals("First insert bucket must be same as update bucket", 0,
+        insertBuckets.get(0).bucketNumber);
+    assertEquals("First insert bucket should have weight 0.5", 200.0 / 2400,
+        insertBuckets.get(0).weight, 0.01);
+  }
 
+  @After
+  public void cleanup() {
+    if (basePath != null) {
+      new File(basePath).delete();
     }
-
-    @Test public void testFileSizeUpsertRecords() throws Exception {
-        HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(
-            HoodieStorageConfig.newBuilder().limitFileSize(64 * 1024).parquetBlockSize(64 * 1024)
-                .parquetPageSize(64 * 1024).build()).build();
-        String commitTime = HoodieTestUtils.makeNewCommitTime();
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieCopyOnWriteTable table  = new HoodieCopyOnWriteTable(config, metadata);
-
-        List<HoodieRecord> records = new ArrayList<>();
-        // Approx 1150 records are written for block size of 64KB
-        for (int i = 0; i < 2000; i++) {
-            String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString() + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
-            TestRawTripPayload rowChange = new TestRawTripPayload(recordStr);
-            records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()),
-                rowChange));
-        }
-
-        // Insert new records
-        HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator()));
-
-        // Check the updated file
-        int counts = 0;
-        for (File file : new File(basePath + "/2016/01/31").listFiles()) {
-            if (file.getName().endsWith(".parquet") && FSUtils.getCommitTime(file.getName()).equals(commitTime)) {
-                System.out.println(file.getName() + "-" + file.length());
-                counts++;
-            }
-        }
-        assertEquals(
-            "If the number of records are more than 1150, then there should be a new file", 3,
-            counts);
-    }
-
-
-
-    private List<HoodieCopyOnWriteTable.InsertBucket> testUpsertPartitioner(int smallFileSize,
-                                                                            int numInserts,
-                                                                            int numUpdates,
-                                                                            int fileSize,
-                                                                            boolean autoSplitInserts) throws Exception {
-        final String TEST_PARTITION_PATH = "2016/09/26";
-        HoodieWriteConfig config = makeHoodieClientConfigBuilder()
-            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                    .compactionSmallFileSize(smallFileSize).insertSplitSize(100).autoTuneInsertSplits(autoSplitInserts).build())
-            .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1000 * 1024).build()).build();
-
-        HoodieClientTestUtils.fakeCommitFile(basePath, "001");
-        HoodieClientTestUtils.fakeDataFile(basePath, TEST_PARTITION_PATH, "001", "file1", fileSize);
-
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(FSUtils.getFs(), basePath);
-        HoodieCopyOnWriteTable table  = new HoodieCopyOnWriteTable(config, metadata);
-
-        HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[]{TEST_PARTITION_PATH});
-        List<HoodieRecord> insertRecords = dataGenerator.generateInserts("001", numInserts);
-        List<HoodieRecord> updateRecords = dataGenerator.generateUpdates("001", numUpdates);
-        for (HoodieRecord updateRec: updateRecords) {
-            updateRec.setCurrentLocation(new HoodieRecordLocation("001", "file1"));
-        }
-        List<HoodieRecord> records = new ArrayList<>();
-        records.addAll(insertRecords);
-        records.addAll(updateRecords);
-        WorkloadProfile profile = new WorkloadProfile(jsc.parallelize(records));
-        HoodieCopyOnWriteTable.UpsertPartitioner partitioner = (HoodieCopyOnWriteTable.UpsertPartitioner)
-                table.getUpsertPartitioner(profile);
-
-        assertEquals("Should have 3 partitions", 3, partitioner.numPartitions());
-        assertEquals("Bucket 0 is UPDATE", HoodieCopyOnWriteTable.BucketType.UPDATE,
-                partitioner.getBucketInfo(0).bucketType);
-        assertEquals("Bucket 1 is INSERT", HoodieCopyOnWriteTable.BucketType.INSERT,
-                partitioner.getBucketInfo(1).bucketType);
-        assertEquals("Bucket 2 is INSERT", HoodieCopyOnWriteTable.BucketType.INSERT,
-                partitioner.getBucketInfo(2).bucketType);
-        assertEquals("Update record should have gone to the 1 update partiton", 0,
-                partitioner.getPartition(new Tuple2<>(updateRecords.get(0).getKey(), Option.apply(updateRecords.get(0).getCurrentLocation()))));
-        return partitioner.getInsertBuckets(TEST_PARTITION_PATH);
-    }
-
-
-    @Test
-    public void testUpsertPartitioner() throws Exception {
-        // Inserts + Updates... Check all updates go together & inserts subsplit
-        List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = testUpsertPartitioner(0, 200, 100, 1024, false);
-        assertEquals("Total of 2 insert buckets", 2, insertBuckets.size());
-    }
-
-
-    @Test
-    public void testUpsertPartitionerWithSmallInsertHandling() throws Exception {
-        // Inserts + Updates .. Check updates go together & inserts subsplit, after expanding smallest file
-        List<HoodieCopyOnWriteTable.InsertBucket> insertBuckets = testUpsertPartitioner(1000 * 1024, 400, 100, 800 * 1024, false);
-        assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
-        assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
-        assertEquals("First insert bucket should have weight 0.5", 0.5, insertBuckets.get(0).weight, 0.01);
-
-        // Now with insert split size auto tuned
-        insertBuckets = testUpsertPartitioner(1000 * 1024, 2400, 100, 800 * 1024, true);
-        assertEquals("Total of 3 insert buckets", 3, insertBuckets.size());
-        assertEquals("First insert bucket must be same as update bucket", 0, insertBuckets.get(0).bucketNumber);
-        assertEquals("First insert bucket should have weight 0.5", 200.0/2400, insertBuckets.get(0).weight, 0.01);
-    }
-
-    @After
-    public void cleanup() {
-        if (basePath != null) {
-            new File(basePath).delete();
-        }
-        if (jsc != null) {
-            jsc.stop();
-        }
+    if (jsc != null) {
+      jsc.stop();
     }
+  }
 }
diff --git a/hoodie-client/src/test/java/com/uber/hoodie/table/TestMergeOnReadTable.java b/hoodie-client/src/test/java/com/uber/hoodie/table/TestMergeOnReadTable.java
index 69cd2ce4b..9602a02ff 100644
--- a/hoodie-client/src/test/java/com/uber/hoodie/table/TestMergeOnReadTable.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/table/TestMergeOnReadTable.java
@@ -19,6 +19,11 @@
 package com.uber.hoodie.table;
 
 
+import static com.uber.hoodie.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.HoodieWriteClient;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.HoodieClientTestUtils;
@@ -44,6 +49,14 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.io.compact.HoodieCompactor;
 import com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor;
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -60,494 +73,506 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import static com.uber.hoodie.common.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 public class TestMergeOnReadTable {
-    private transient JavaSparkContext jsc = null;
-    private transient SQLContext sqlContext;
-    private String basePath = null;
-    private HoodieCompactor compactor;
-    private FileSystem fs;
 
-    //NOTE : Be careful in using DFS (FileSystem.class) vs LocalFs(RawLocalFileSystem.class)
-    //The implementation and gurantees of many API's differ, for example check rename(src,dst)
-    private static MiniDFSCluster dfsCluster;
-    private static DistributedFileSystem dfs;
-    private static HdfsTestService hdfsTestService;
+  private transient JavaSparkContext jsc = null;
+  private transient SQLContext sqlContext;
+  private String basePath = null;
+  private HoodieCompactor compactor;
+  private FileSystem fs;
 
-    @AfterClass
-    public static void cleanUp() throws Exception {
-        if (hdfsTestService != null) {
-            hdfsTestService.stop();
-            dfsCluster.shutdown();;
-        }
-        FSUtils.setFs(null);
-        // Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the same JVM
-        FileSystem.closeAll();
-        HoodieTestUtils.resetFS();
+  //NOTE : Be careful in using DFS (FileSystem.class) vs LocalFs(RawLocalFileSystem.class)
+  //The implementation and gurantees of many API's differ, for example check rename(src,dst)
+  private static MiniDFSCluster dfsCluster;
+  private static DistributedFileSystem dfs;
+  private static HdfsTestService hdfsTestService;
+
+  @AfterClass
+  public static void cleanUp() throws Exception {
+    if (hdfsTestService != null) {
+      hdfsTestService.stop();
+      dfsCluster.shutdown();
+      ;
+    }
+    FSUtils.setFs(null);
+    // Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the same JVM
+    FileSystem.closeAll();
+    HoodieTestUtils.resetFS();
+  }
+
+  @BeforeClass
+  public static void setUpDFS() throws IOException {
+    // Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the same JVM
+    FileSystem.closeAll();
+    if (hdfsTestService == null) {
+      hdfsTestService = new HdfsTestService();
+      dfsCluster = hdfsTestService.start(true);
+      // Create a temp folder as the base path
+      dfs = dfsCluster.getFileSystem();
+    }
+    FSUtils.setFs(dfs);
+    HoodieTestUtils.resetFS();
+  }
+
+  @Before
+  public void init() throws IOException {
+    this.fs = FSUtils.getFs();
+
+    // Initialize a local spark env
+    jsc = new JavaSparkContext(
+        HoodieClientTestUtils.getSparkConfForTest("TestHoodieMergeOnReadTable"));
+    jsc.hadoopConfiguration().addResource(FSUtils.getFs().getConf());
+
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+    dfs.mkdirs(new Path(basePath));
+    FSUtils.setFs(dfs);
+    HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
+
+    compactor = new HoodieRealtimeTableCompactor();
+
+    //SQLContext stuff
+    sqlContext = new SQLContext(jsc);
+  }
+
+  @After
+  public void clean() {
+    if (basePath != null) {
+      new File(basePath).delete();
+    }
+    if (jsc != null) {
+      jsc.stop();
+    }
+  }
+
+  @Test
+  public void testSimpleInsertAndUpdate() throws Exception {
+    HoodieWriteConfig cfg = getConfig(true);
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+
+    Optional<HoodieInstant> deltaCommit =
+        metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
+
+    Optional<HoodieInstant> commit =
+        metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    FileStatus[] allFiles = HoodieTestUtils
+        .listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
+        hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
+    Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue(!dataFilesToRead.findAny().isPresent());
+
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
+        dataFilesToRead.findAny().isPresent());
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, 100);
+    Map<HoodieKey, HoodieRecord> recordsMap = new HashMap<>();
+    for (HoodieRecord rec : records) {
+      if (!recordsMap.containsKey(rec.getKey())) {
+        recordsMap.put(rec.getKey(), rec);
+      }
     }
 
-    @BeforeClass
-    public static void setUpDFS() throws IOException {
-        // Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the same JVM
-        FileSystem.closeAll();
-        if (hdfsTestService == null) {
-            hdfsTestService = new HdfsTestService();
-            dfsCluster = hdfsTestService.start(true);
-            // Create a temp folder as the base path
-            dfs = dfsCluster.getFileSystem();
-        }
-        FSUtils.setFs(dfs);
-        HoodieTestUtils.resetFS();
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
+
+    commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    HoodieCompactor compactor = new HoodieRealtimeTableCompactor();
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(true));
+
+    compactor.compact(jsc, getConfig(true), table);
+
+    allFiles = HoodieTestUtils.listAllDataFilesInPath(fs, cfg.getBasePath());
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue(dataFilesToRead.findAny().isPresent());
+
+    // verify that there is a commit
+    table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, cfg.getBasePath(), true), getConfig(false));
+    HoodieTimeline timeline = table.getCompletedCompactionCommitTimeline();
+    assertEquals("Expecting a single commit.", 1,
+        timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
+    String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
+    assertTrue(HoodieTimeline
+        .compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER));
+
+    assertEquals("Must contain 200 records", 200,
+        HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
+  }
+
+  // Check if record level metadata is aggregated properly at the end of write.
+  @Test
+  public void testMetadataAggregateFromWriteStatus() throws Exception {
+    HoodieWriteConfig cfg = getConfigBuilder(false)
+        .withWriteStatusClass(MetadataMergeWriteStatus.class).build();
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    String newCommitTime = "001";
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    client.startCommit();
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+    Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus
+        .mergeMetadataForWriteStatuses(statuses);
+    assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
+    // For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this should be 2 * records.size()
+    assertEquals(String.valueOf(2 * records.size()),
+        allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
+  }
+
+  @Test
+  public void testSimpleInsertAndDelete() throws Exception {
+    HoodieWriteConfig cfg = getConfig(true);
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    /**
+     * Write 1 (only inserts, written as parquet file)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+
+    Optional<HoodieInstant> deltaCommit =
+        metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
+
+    Optional<HoodieInstant> commit =
+        metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    FileStatus[] allFiles = HoodieTestUtils
+        .listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
+        hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
+    Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue(!dataFilesToRead.findAny().isPresent());
+
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
+        dataFilesToRead.findAny().isPresent());
+
+    /**
+     * Write 2 (only inserts, written to .log file)
+     */
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateInserts(newCommitTime, 20);
+    writeRecords = jsc.parallelize(records, 1);
+    statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    /**
+     * Write 2 (only deletes, written to .log file)
+     */
+    newCommitTime = "004";
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletesFromExistingRecords(records);
+
+    statuses = client.upsert(jsc.parallelize(fewRecordsForDelete, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
+
+    commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    allFiles = HoodieTestUtils.listAllDataFilesInPath(fs, cfg.getBasePath());
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue(dataFilesToRead.findAny().isPresent());
+
+    List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath())
+        .collect(Collectors.toList());
+    List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils
+        .getRecordsUsingInputFormat(dataFiles);
+    //Wrote 40 records and deleted 20 records, so remaining 40-20 = 20
+    assertEquals("Must contain 20 records", 20, recordsRead.size());
+  }
+
+  @Test
+  public void testCOWToMORConvertedDatasetRollback() throws Exception {
+
+    //Set TableType to COW
+    HoodieTestUtils.initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
+
+    HoodieWriteConfig cfg = getConfig(true);
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    //verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+
+    Optional<HoodieInstant> commit =
+        metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertTrue(commit.isPresent());
+    assertEquals("commit should be 001", "001", commit.get().getTimestamp());
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, records);
+
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+
+    //Set TableType to MOR
+    HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
+
+    //rollback a COW commit when TableType is MOR
+    client.rollback(newCommitTime);
+
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+    FileStatus[] allFiles = HoodieTestUtils
+        .listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    HoodieTableFileSystemView roView = new HoodieTableFileSystemView(metaClient,
+        hoodieTable.getCompletedCommitTimeline(), allFiles);
+
+    final String absentCommit = newCommitTime;
+    assertFalse(roView.getLatestDataFiles().filter(file -> {
+      if (absentCommit.equals(file.getCommitTime())) {
+        return true;
+      } else {
+        return false;
+      }
+    }).findAny().isPresent());
+  }
+
+  @Test
+  public void testRollbackWithDeltaAndCompactionCommit() throws Exception {
+
+    HoodieWriteConfig cfg = getConfig(true);
+    HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
+
+    // Test delta commit rollback (with all log files)
+    /**
+     * Write 1 (only inserts)
+     */
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+
+    List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+
+    Optional<HoodieInstant> deltaCommit =
+        metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
+
+    Optional<HoodieInstant> commit =
+        metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    FileStatus[] allFiles = HoodieTestUtils
+        .listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
+        hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
+    Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue(!dataFilesToRead.findAny().isPresent());
+
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFilesToRead = roView.getLatestDataFiles();
+    assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
+        dataFilesToRead.findAny().isPresent());
+
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, 200);
+
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    // Verify there are no errors
+    assertNoWriteErrors(statuses);
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
+    assertTrue(deltaCommit.isPresent());
+    assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp());
+
+    commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
+    assertFalse(commit.isPresent());
+
+    List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath())
+        .collect(Collectors.toList());
+    List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils
+        .getRecordsUsingInputFormat(dataFiles);
+
+    assertEquals(recordsRead.size(), 200);
+
+    // Test delta commit rollback
+    client.rollback(newCommitTime);
+
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(),
+        allFiles);
+    dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
+    recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles);
+
+    assertEquals(recordsRead.size(), 200);
+
+    //Test compaction commit rollback
+    /**
+     * Write 2 (updates)
+     */
+    newCommitTime = "003";
+    client.startCommitWithTime(newCommitTime);
+
+    records = dataGen.generateUpdates(newCommitTime, 400);
+
+    statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
+    assertNoWriteErrors(statuses);
+
+    HoodieCompactor compactor = new HoodieRealtimeTableCompactor();
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(true));
+
+    compactor.compact(jsc, getConfig(true), table);
+
+    allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompactionCommitTimeline(),
+        allFiles);
+
+    final String compactedCommitTime = metaClient.getActiveTimeline().reload()
+        .getCommitsAndCompactionsTimeline().lastInstant().get().getTimestamp();
+
+    assertTrue(roView.getLatestDataFiles().filter(file -> {
+      if (compactedCommitTime.equals(file.getCommitTime())) {
+        return true;
+      } else {
+        return false;
+      }
+    }).findAny().isPresent());
+
+    client.rollback(compactedCommitTime);
+
+    allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
+    metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
+    hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
+    roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompactionCommitTimeline(),
+        allFiles);
+
+    assertFalse(roView.getLatestDataFiles().filter(file -> {
+      if (compactedCommitTime.equals(file.getCommitTime())) {
+        return true;
+      } else {
+        return false;
+      }
+    }).findAny().isPresent());
+  }
+
+  private HoodieWriteConfig getConfig(Boolean autoCommit) {
+    return getConfigBuilder(autoCommit).build();
+  }
+
+  private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) {
+    return HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withAutoCommit(autoCommit)
+        .withCompactionConfig(
+            HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
+                .withInlineCompaction(false).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
+        .forTable("test-trip-table").withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
+  }
+
+  private void assertNoWriteErrors(List<WriteStatus> statuses) {
+    // Verify there are no errors
+    for (WriteStatus status : statuses) {
+      assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
     }
-
-    @Before
-    public void init() throws IOException {
-        this.fs = FSUtils.getFs();
-
-        // Initialize a local spark env
-        jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestHoodieMergeOnReadTable"));
-        jsc.hadoopConfiguration().addResource(FSUtils.getFs().getConf());
-
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
-        dfs.mkdirs(new Path(basePath));
-        FSUtils.setFs(dfs);
-        HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
-
-        compactor = new HoodieRealtimeTableCompactor();
-
-        //SQLContext stuff
-        sqlContext = new SQLContext(jsc);
-    }
-
-    @After
-    public void clean() {
-        if (basePath != null) {
-            new File(basePath).delete();
-        }
-        if (jsc != null) {
-            jsc.stop();
-        }
-    }
-
-    @Test
-    public void testSimpleInsertAndUpdate() throws Exception {
-        HoodieWriteConfig cfg = getConfig(true);
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
-
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-
-        Optional<HoodieInstant> deltaCommit =
-                metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
-
-        Optional<HoodieInstant> commit =
-                metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-        FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
-                hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
-        Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue(!dataFilesToRead.findAny().isPresent());
-
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
-            dataFilesToRead.findAny().isPresent());
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, 100);
-        Map<HoodieKey, HoodieRecord> recordsMap = new HashMap<>();
-        for (HoodieRecord rec : records) {
-            if (!recordsMap.containsKey(rec.getKey())) {
-                recordsMap.put(rec.getKey(), rec);
-            }
-        }
-
-
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
-
-        commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-
-        HoodieCompactor compactor = new HoodieRealtimeTableCompactor();
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(true));
-
-        compactor.compact(jsc, getConfig(true), table);
-
-        allFiles = HoodieTestUtils.listAllDataFilesInPath(fs, cfg.getBasePath());
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue(dataFilesToRead.findAny().isPresent());
-
-        // verify that there is a commit
-        table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(fs, cfg.getBasePath(), true), getConfig(false));
-        HoodieTimeline timeline = table.getCompletedCompactionCommitTimeline();
-        assertEquals("Expecting a single commit.", 1, timeline.findInstantsAfter("000", Integer.MAX_VALUE).countInstants());
-        String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp();
-        assertTrue(HoodieTimeline
-            .compareTimestamps("000", latestCompactionCommitTime, HoodieTimeline.LESSER));
-
-        assertEquals("Must contain 200 records", 200,
-                HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count());
-    }
-
-    // Check if record level metadata is aggregated properly at the end of write.
-    @Test
-    public void testMetadataAggregateFromWriteStatus() throws Exception {
-        HoodieWriteConfig cfg = getConfigBuilder(false).withWriteStatusClass(MetadataMergeWriteStatus.class).build();
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        String newCommitTime = "001";
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        client.startCommit();
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-        Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus .mergeMetadataForWriteStatuses(statuses);
-        assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
-        // For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this should be 2 * records.size()
-        assertEquals(String.valueOf(2 * records.size()), allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
-    }
-
-        @Test
-    public void testSimpleInsertAndDelete() throws Exception {
-        HoodieWriteConfig cfg = getConfig(true);
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        /**
-         * Write 1 (only inserts, written as parquet file)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
-
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-
-        Optional<HoodieInstant> deltaCommit =
-                metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
-
-        Optional<HoodieInstant> commit =
-                metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-        FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
-                hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
-        Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue(!dataFilesToRead.findAny().isPresent());
-
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
-                dataFilesToRead.findAny().isPresent());
-
-        /**
-         * Write 2 (only inserts, written to .log file)
-         */
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateInserts(newCommitTime, 20);
-        writeRecords = jsc.parallelize(records, 1);
-        statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        /**
-         * Write 2 (only deletes, written to .log file)
-         */
-        newCommitTime = "004";
-        client.startCommitWithTime(newCommitTime);
-
-        List<HoodieRecord> fewRecordsForDelete = dataGen.generateDeletesFromExistingRecords(records);
-
-        statuses = client.upsert(jsc.parallelize(fewRecordsForDelete, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Latest Delta commit should be 004", "004", deltaCommit.get().getTimestamp());
-
-        commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-        allFiles = HoodieTestUtils.listAllDataFilesInPath(fs, cfg.getBasePath());
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue(dataFilesToRead.findAny().isPresent());
-
-        List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
-        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles);
-        //Wrote 40 records and deleted 20 records, so remaining 40-20 = 20
-        assertEquals("Must contain 20 records", 20, recordsRead.size());
-    }
-
-    @Test
-    public void testCOWToMORConvertedDatasetRollback() throws Exception {
-
-        //Set TableType to COW
-        HoodieTestUtils.initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
-
-        HoodieWriteConfig cfg = getConfig(true);
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
-
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        //verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-
-        Optional<HoodieInstant> commit =
-                metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertTrue(commit.isPresent());
-        assertEquals("commit should be 001", "001", commit.get().getTimestamp());
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, records);
-
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-
-        //Set TableType to MOR
-        HoodieTestUtils.initTableType(basePath, HoodieTableType.MERGE_ON_READ);
-
-        //rollback a COW commit when TableType is MOR
-        client.rollback(newCommitTime);
-
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-        FileStatus [] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        HoodieTableFileSystemView roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-
-        final String absentCommit = newCommitTime;
-        assertFalse(roView.getLatestDataFiles().filter(file -> {
-            if(absentCommit.equals(file.getCommitTime()))
-                return true;
-            else
-                return false;
-        }).findAny().isPresent());
-    }
-
-    @Test
-    public void testRollbackWithDeltaAndCompactionCommit() throws Exception {
-
-        HoodieWriteConfig cfg = getConfig(true);
-        HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
-
-        // Test delta commit rollback (with all log files)
-        /**
-         * Write 1 (only inserts)
-         */
-        String newCommitTime = "001";
-        client.startCommitWithTime(newCommitTime);
-
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
-        List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
-        JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
-
-        List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-
-        Optional<HoodieInstant> deltaCommit =
-                metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp());
-
-        Optional<HoodieInstant> commit =
-                metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-        FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient,
-                hoodieTable.getCompletedCompactionCommitTimeline(), allFiles);
-        Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue(!dataFilesToRead.findAny().isPresent());
-
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFilesToRead = roView.getLatestDataFiles();
-        assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit",
-                dataFilesToRead.findAny().isPresent());
-
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "002";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, 200);
-
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        // Verify there are no errors
-        assertNoWriteErrors(statuses);
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
-        assertTrue(deltaCommit.isPresent());
-        assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp());
-
-        commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
-        assertFalse(commit.isPresent());
-
-        List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
-        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles);
-
-        assertEquals(recordsRead.size(), 200);
-
-        // Test delta commit rollback
-        client.rollback(newCommitTime);
-
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitTimeline(), allFiles);
-        dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
-        recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles);
-
-        assertEquals(recordsRead.size(), 200);
-
-
-        //Test compaction commit rollback
-        /**
-         * Write 2 (updates)
-         */
-        newCommitTime = "003";
-        client.startCommitWithTime(newCommitTime);
-
-        records = dataGen.generateUpdates(newCommitTime, 400);
-
-        statuses = client.upsert(jsc.parallelize(records, 1), newCommitTime).collect();
-        assertNoWriteErrors(statuses);
-
-        HoodieCompactor compactor = new HoodieRealtimeTableCompactor();
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(true));
-
-        compactor.compact(jsc, getConfig(true), table);
-
-        allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompactionCommitTimeline(), allFiles);
-
-        final String compactedCommitTime = metaClient.getActiveTimeline().reload().getCommitsAndCompactionsTimeline().lastInstant().get().getTimestamp();
-
-        assertTrue(roView.getLatestDataFiles().filter(file -> {
-            if(compactedCommitTime.equals(file.getCommitTime()))
-                return true;
-            else
-                return false;
-        }).findAny().isPresent());
-
-        client.rollback(compactedCommitTime);
-
-        allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath());
-        metaClient = new HoodieTableMetaClient(fs, cfg.getBasePath());
-        hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg);
-        roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompactionCommitTimeline(), allFiles);
-
-        assertFalse(roView.getLatestDataFiles().filter(file -> {
-            if(compactedCommitTime.equals(file.getCommitTime()))
-                return true;
-            else
-                return false;
-        }).findAny().isPresent());
-    }
-
-    private HoodieWriteConfig getConfig(Boolean autoCommit) {
-        return getConfigBuilder(autoCommit).build();
-    }
-
-    private HoodieWriteConfig.Builder getConfigBuilder(Boolean autoCommit) {
-        return HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withSchema(TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
-            .withAutoCommit(autoCommit)
-            .withCompactionConfig(
-                HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024)
-                    .withInlineCompaction(false).build())
-            .withStorageConfig(HoodieStorageConfig.newBuilder().limitFileSize(1024 * 1024).build())
-            .forTable("test-trip-table").withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build());
-    }
-
-    private void assertNoWriteErrors(List<WriteStatus> statuses) {
-        // Verify there are no errors
-        for (WriteStatus status : statuses) {
-            assertFalse("Errors found in write of " + status.getFileId(), status.hasErrors());
-        }
-    }
-}
\ No newline at end of file
+  }
+}
diff --git a/hoodie-client/src/test/resources/log4j-surefire.properties b/hoodie-client/src/test/resources/log4j-surefire.properties
index 490c6411d..daf8d28c1 100644
--- a/hoodie-client/src/test/resources/log4j-surefire.properties
+++ b/hoodie-client/src/test/resources/log4j-surefire.properties
@@ -20,7 +20,6 @@ log4j.category.com.uber.hoodie.io=WARN
 log4j.category.com.uber.hoodie.common=WARN
 log4j.category.com.uber.hoodie.table.log=WARN
 log4j.category.org.apache.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/hoodie-common/pom.xml b/hoodie-common/pom.xml
index b010ac3ec..db973dfa7 100644
--- a/hoodie-common/pom.xml
+++ b/hoodie-common/pom.xml
@@ -15,128 +15,130 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>hoodie</artifactId>
-        <groupId>com.uber.hoodie</groupId>
-        <version>0.4.1-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hoodie</artifactId>
+    <groupId>com.uber.hoodie</groupId>
+    <version>0.4.1-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
 
-    <artifactId>hoodie-common</artifactId>
+  <artifactId>hoodie-common</artifactId>
 
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.jacoco</groupId>
-                <artifactId>jacoco-maven-plugin</artifactId>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-jar-plugin</artifactId>
-                <version>2.5</version>
-                <executions>
-                    <execution>
-                        <goals>
-                            <goal>test-jar</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.avro</groupId>
-                <artifactId>avro-maven-plugin</artifactId>
-                <configuration>
-                    <imports>
-                        <!-- import avro files -->
-                        <import>${basedir}/src/main/avro/HoodieCommitMetadata.avsc</import>
-                        <import>${basedir}/src/main/avro/HoodieSavePointMetadata.avsc</import>
-                        <import>${basedir}/src/main/avro/HoodieCompactionMetadata.avsc</import>
-                        <import>${basedir}/src/main/avro/HoodieCleanMetadata.avsc</import>
-                        <import>${basedir}/src/main/avro/HoodieRollbackMetadata.avsc</import>
-                    </imports>
-                </configuration>
-            </plugin>
-        </plugins>
-    </build>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.5</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <configuration>
+          <imports>
+            <!-- import avro files -->
+            <import>${basedir}/src/main/avro/HoodieCommitMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieSavePointMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieCompactionMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieCleanMetadata.avsc</import>
+            <import>${basedir}/src/main/avro/HoodieRollbackMetadata.avsc</import>
+          </imports>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
 
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-client</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>${junit.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-avro</artifactId>
-            <version>${parquet.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-all</artifactId>
-            <version>1.10.19</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-hdfs</artifactId>
-            <classifier>tests</classifier>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <classifier>tests</classifier>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-core-asl</artifactId>
-            <version>1.9.13</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.esotericsoftware</groupId>
-            <artifactId>kryo</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro-mapred</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-    </dependencies>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+      <version>${parquet.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.10.19</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <classifier>tests</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-core-asl</artifactId>
+      <version>1.9.13</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.esotericsoftware</groupId>
+      <artifactId>kryo</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+  </dependencies>
 </project>
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/avro/HoodieAvroWriteSupport.java b/hoodie-common/src/main/java/com/uber/hoodie/avro/HoodieAvroWriteSupport.java
index dd3318228..f793c1539 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/avro/HoodieAvroWriteSupport.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/avro/HoodieAvroWriteSupport.java
@@ -17,60 +17,59 @@
 package com.uber.hoodie.avro;
 
 import com.uber.hoodie.common.BloomFilter;
-
+import java.util.HashMap;
 import org.apache.avro.Schema;
 import org.apache.parquet.avro.AvroWriteSupport;
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.schema.MessageType;
 
-import java.util.HashMap;
-
 /**
  * Wrap AvroWriterSupport for plugging in the bloom filter.
  */
 public class HoodieAvroWriteSupport extends AvroWriteSupport {
-    private BloomFilter bloomFilter;
-    private String minRecordKey;
-    private String maxRecordKey;
+
+  private BloomFilter bloomFilter;
+  private String minRecordKey;
+  private String maxRecordKey;
 
 
-    public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
-        "com.uber.hoodie.bloomfilter";
-    public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
-    public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
+  public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
+      "com.uber.hoodie.bloomfilter";
+  public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
+  public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
 
 
-    public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
-        super(schema, avroSchema);
-        this.bloomFilter = bloomFilter;
+  public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
+    super(schema, avroSchema);
+    this.bloomFilter = bloomFilter;
+  }
+
+  @Override
+  public WriteSupport.FinalizedWriteContext finalizeWrite() {
+    HashMap<String, String> extraMetaData = new HashMap<>();
+    if (bloomFilter != null) {
+      extraMetaData
+          .put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
+      if (minRecordKey != null && maxRecordKey != null) {
+        extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
+        extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
+      }
+    }
+    return new WriteSupport.FinalizedWriteContext(extraMetaData);
+  }
+
+  public void add(String recordKey) {
+    this.bloomFilter.add(recordKey);
+    if (minRecordKey != null) {
+      minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
+    } else {
+      minRecordKey = recordKey;
     }
 
-    @Override
-    public WriteSupport.FinalizedWriteContext finalizeWrite() {
-        HashMap<String, String> extraMetaData = new HashMap<>();
-        if (bloomFilter != null) {
-            extraMetaData
-                .put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
-            if (minRecordKey != null && maxRecordKey != null) {
-                extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
-                extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
-            }
-        }
-        return new WriteSupport.FinalizedWriteContext(extraMetaData);
-    }
-
-    public void add(String recordKey) {
-        this.bloomFilter.add(recordKey);
-        if (minRecordKey != null) {
-            minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
-        } else {
-            minRecordKey = recordKey;
-        }
-
-        if (maxRecordKey != null) {
-            maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
-        } else {
-            maxRecordKey = recordKey;
-        }
+    if (maxRecordKey != null) {
+      maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
+    } else {
+      maxRecordKey = recordKey;
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/avro/MercifulJsonConverter.java b/hoodie-common/src/main/java/com/uber/hoodie/avro/MercifulJsonConverter.java
index 9efe8408d..b775d9068 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/avro/MercifulJsonConverter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/avro/MercifulJsonConverter.java
@@ -17,151 +17,148 @@
 package com.uber.hoodie.avro;
 
 import java.io.IOException;
-import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.GenericRecordBuilder;
 import org.codehaus.jackson.map.ObjectMapper;
 
 /**
- * Marjority of this is copied from
- * https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
+ * Marjority of this is copied from https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
  * Adjusted for expected behavior of our use cases
  */
 public class MercifulJsonConverter {
-    private final ObjectMapper mapper = new ObjectMapper();
-    private final Schema baseSchema;
 
-    public MercifulJsonConverter(Schema schema) {
-        this.baseSchema = schema;
+  private final ObjectMapper mapper = new ObjectMapper();
+  private final Schema baseSchema;
+
+  public MercifulJsonConverter(Schema schema) {
+    this.baseSchema = schema;
+  }
+
+
+  public GenericRecord convert(String json) throws IOException {
+    try {
+      return convert(mapper.readValue(json, Map.class), baseSchema);
+    } catch (IOException e) {
+      throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
+    }
+  }
+
+  private GenericRecord convert(Map<String, Object> raw, Schema schema)
+      throws IOException {
+    GenericRecord result = new GenericData.Record(schema);
+    for (Schema.Field f : schema.getFields()) {
+      String name = f.name();
+      Object rawValue = raw.get(name);
+      if (rawValue != null) {
+        result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
+      }
     }
 
+    return result;
+  }
 
-    public GenericRecord convert(String json) throws IOException {
-        try {
-            return convert(mapper.readValue(json, Map.class), baseSchema);
-        } catch (IOException e) {
-            throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
+  private Object typeConvert(Object value, String name, Schema schema) throws IOException {
+    if (isOptional(schema)) {
+      if (value == null) {
+        return null;
+      } else {
+        schema = getNonNull(schema);
+      }
+    } else if (value == null) {
+      // Always fail on null for non-nullable schemas
+      throw new JsonConversionException(null, name, schema);
+    }
+
+    switch (schema.getType()) {
+      case BOOLEAN:
+        if (value instanceof Boolean) {
+          return (Boolean) value;
         }
-    }
-
-    private GenericRecord convert(Map<String, Object> raw, Schema schema)
-            throws IOException {
-        GenericRecord result = new GenericData.Record(schema);
-        for (Schema.Field f : schema.getFields()) {
-            String name = f.name();
-            Object rawValue = raw.get(name);
-            if (rawValue != null) {
-                result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
-            }
+        break;
+      case DOUBLE:
+        if (value instanceof Number) {
+          return ((Number) value).doubleValue();
         }
-
-        return result;
-    }
-
-    private Object typeConvert(Object value, String name, Schema schema) throws IOException {
-        if (isOptional(schema)) {
-            if (value == null) {
-                return null;
-            } else {
-                schema = getNonNull(schema);
-            }
-        } else if (value == null) {
-            // Always fail on null for non-nullable schemas
-            throw new JsonConversionException(null, name, schema);
+        break;
+      case FLOAT:
+        if (value instanceof Number) {
+          return ((Number) value).floatValue();
         }
-
-        switch (schema.getType()) {
-            case BOOLEAN:
-                if (value instanceof Boolean) {
-                    return (Boolean) value;
-                }
-                break;
-            case DOUBLE:
-                if (value instanceof Number) {
-                    return ((Number) value).doubleValue();
-                }
-                break;
-            case FLOAT:
-                if (value instanceof Number) {
-                    return ((Number) value).floatValue();
-                }
-                break;
-            case INT:
-                if (value instanceof Number) {
-                    return ((Number) value).intValue();
-                }
-                break;
-            case LONG:
-                if (value instanceof Number) {
-                    return ((Number) value).longValue();
-                }
-                break;
-            case STRING:
-                return value.toString();
-            case ENUM:
-                if (schema.getEnumSymbols().contains(value.toString())) {
-                    return new GenericData.EnumSymbol(schema, value.toString());
-                }
-                throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
-                        schema.getFullName(), schema);
-            case RECORD:
-                return convert((Map<String, Object>) value, schema);
-            case ARRAY:
-                Schema elementSchema = schema.getElementType();
-                List listRes = new ArrayList();
-                for (Object v : (List) value) {
-                    listRes.add(typeConvert(v, name, elementSchema));
-                }
-                return listRes;
-            case MAP:
-                Schema valueSchema = schema.getValueType();
-                Map<String, Object> mapRes = new HashMap<String, Object>();
-                for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
-                    mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
-                }
-                return mapRes;
-            default:
-                throw new IllegalArgumentException(
-                        "JsonConverter cannot handle type: " + schema.getType());
+        break;
+      case INT:
+        if (value instanceof Number) {
+          return ((Number) value).intValue();
         }
-        throw new JsonConversionException(value, name, schema);
-    }
-
-    private boolean isOptional(Schema schema) {
-        return schema.getType().equals(Schema.Type.UNION) &&
-                schema.getTypes().size() == 2 &&
-                (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
-                        schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
-    }
-
-    private Schema getNonNull(Schema schema) {
-        List<Schema> types = schema.getTypes();
-        return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
-    }
-
-    public static class JsonConversionException extends RuntimeException {
-
-        private Object value;
-        private String fieldName;
-        private Schema schema;
-
-        public JsonConversionException(Object value, String fieldName, Schema schema) {
-            this.value = value;
-            this.fieldName = fieldName;
-            this.schema = schema;
+        break;
+      case LONG:
+        if (value instanceof Number) {
+          return ((Number) value).longValue();
         }
-
-        @Override
-        public String toString() {
-            return String.format("Type conversion error for field %s, %s for %s",
-                    fieldName, value, schema);
+        break;
+      case STRING:
+        return value.toString();
+      case ENUM:
+        if (schema.getEnumSymbols().contains(value.toString())) {
+          return new GenericData.EnumSymbol(schema, value.toString());
         }
+        throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
+            schema.getFullName(), schema);
+      case RECORD:
+        return convert((Map<String, Object>) value, schema);
+      case ARRAY:
+        Schema elementSchema = schema.getElementType();
+        List listRes = new ArrayList();
+        for (Object v : (List) value) {
+          listRes.add(typeConvert(v, name, elementSchema));
+        }
+        return listRes;
+      case MAP:
+        Schema valueSchema = schema.getValueType();
+        Map<String, Object> mapRes = new HashMap<String, Object>();
+        for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
+          mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
+        }
+        return mapRes;
+      default:
+        throw new IllegalArgumentException(
+            "JsonConverter cannot handle type: " + schema.getType());
     }
+    throw new JsonConversionException(value, name, schema);
+  }
+
+  private boolean isOptional(Schema schema) {
+    return schema.getType().equals(Schema.Type.UNION) &&
+        schema.getTypes().size() == 2 &&
+        (schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
+            schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
+  }
+
+  private Schema getNonNull(Schema schema) {
+    List<Schema> types = schema.getTypes();
+    return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
+  }
+
+  public static class JsonConversionException extends RuntimeException {
+
+    private Object value;
+    private String fieldName;
+    private Schema schema;
+
+    public JsonConversionException(Object value, String fieldName, Schema schema) {
+      this.value = value;
+      this.fieldName = fieldName;
+      this.schema = schema;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Type conversion error for field %s, %s for %s",
+          fieldName, value, schema);
+    }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/BloomFilter.java b/hoodie-common/src/main/java/com/uber/hoodie/common/BloomFilter.java
index d81e31df3..ce2249179 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/BloomFilter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/BloomFilter.java
@@ -17,84 +17,86 @@
 package com.uber.hoodie.common;
 
 import com.uber.hoodie.exception.HoodieIndexException;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import javax.xml.bind.DatatypeConverter;
 import org.apache.commons.io.output.ByteArrayOutputStream;
 import org.apache.hadoop.util.bloom.Key;
 import org.apache.hadoop.util.hash.Hash;
 
-import javax.xml.bind.DatatypeConverter;
-
-import java.io.*;
-import java.nio.charset.StandardCharsets;
-
 /**
  * A Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}.
  */
 public class BloomFilter {
-    /**
-     * Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
-     */
-    public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
 
-    private org.apache.hadoop.util.bloom.BloomFilter filter = null;
+  /**
+   * Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
+   */
+  public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
 
-    public BloomFilter(int numEntries, double errorRate) {
-        this(numEntries, errorRate, Hash.MURMUR_HASH);
+  private org.apache.hadoop.util.bloom.BloomFilter filter = null;
+
+  public BloomFilter(int numEntries, double errorRate) {
+    this(numEntries, errorRate, Hash.MURMUR_HASH);
+  }
+
+  /**
+   * Create a new Bloom filter with the given configurations.
+   */
+  public BloomFilter(int numEntries, double errorRate, int hashType) {
+    // Bit size
+    int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
+    // Number of the hash functions
+    int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
+    // The filter
+    this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
+  }
+
+  /**
+   * Create the bloom filter from serialized string.
+   */
+  public BloomFilter(String filterStr) {
+    this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
+    byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
+    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
+    try {
+      this.filter.readFields(dis);
+      dis.close();
+    } catch (IOException e) {
+      throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
     }
+  }
 
-    /**
-     * Create a new Bloom filter with the given configurations.
-     */
-    public BloomFilter(int numEntries, double errorRate, int hashType) {
-        // Bit size
-        int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
-        // Number of the hash functions
-        int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
-        // The filter
-        this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
+  public void add(String key) {
+    if (key == null) {
+      throw new NullPointerException("Key cannot by null");
     }
+    filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
+  }
 
-    /**
-     * Create the bloom filter from serialized string.
-     */
-    public BloomFilter(String filterStr) {
-        this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
-        byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
-        DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
-        try {
-            this.filter.readFields(dis);
-            dis.close();
-        } catch (IOException e) {
-            throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
-        }
+  public boolean mightContain(String key) {
+    if (key == null) {
+      throw new NullPointerException("Key cannot by null");
     }
+    return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
+  }
 
-    public void add(String key) {
-        if (key == null) {
-            throw new NullPointerException("Key cannot by null");
-        }
-        filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
-    }
-
-    public boolean mightContain(String key) {
-        if (key == null) {
-            throw new NullPointerException("Key cannot by null");
-        }
-        return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
-    }
-
-    /**
-     * Serialize the bloom filter as a string.
-     */
-    public String serializeToString() {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        DataOutputStream dos = new DataOutputStream(baos);
-        try {
-            filter.write(dos);
-            byte[] bytes = baos.toByteArray();
-            dos.close();
-            return DatatypeConverter.printBase64Binary(bytes);
-        } catch (IOException e) {
-            throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
-        }
+  /**
+   * Serialize the bloom filter as a string.
+   */
+  public String serializeToString() {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    try {
+      filter.write(dos);
+      byte[] bytes = baos.toByteArray();
+      dos.close();
+      return DatatypeConverter.printBase64Binary(bytes);
+    } catch (IOException e) {
+      throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieCleanStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieCleanStat.java
index ee3177f1d..d2de837ef 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieCleanStat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieCleanStat.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.common;
 
 import com.uber.hoodie.common.model.HoodieCleaningPolicy;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
-
 import java.io.Serializable;
 import java.util.List;
 import java.util.Optional;
@@ -27,100 +26,102 @@ import java.util.Optional;
  * Collects stats about a single partition clean operation
  */
 public class HoodieCleanStat implements Serializable {
-    // Policy used
-    private final HoodieCleaningPolicy policy;
-    // Partition path cleaned
-    private final String partitionPath;
-    // The patterns that were generated for the delete operation
-    private final List<String> deletePathPatterns;
-    private final List<String> successDeleteFiles;
-    // Files that could not be deleted
-    private final List<String> failedDeleteFiles;
-    // Earliest commit that was retained in this clean
-    private final String earliestCommitToRetain;
 
-    public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
-        List<String> deletePathPatterns, List<String> successDeleteFiles,
-        List<String> failedDeleteFiles, String earliestCommitToRetain) {
-        this.policy = policy;
-        this.partitionPath = partitionPath;
-        this.deletePathPatterns = deletePathPatterns;
-        this.successDeleteFiles = successDeleteFiles;
-        this.failedDeleteFiles = failedDeleteFiles;
-        this.earliestCommitToRetain = earliestCommitToRetain;
+  // Policy used
+  private final HoodieCleaningPolicy policy;
+  // Partition path cleaned
+  private final String partitionPath;
+  // The patterns that were generated for the delete operation
+  private final List<String> deletePathPatterns;
+  private final List<String> successDeleteFiles;
+  // Files that could not be deleted
+  private final List<String> failedDeleteFiles;
+  // Earliest commit that was retained in this clean
+  private final String earliestCommitToRetain;
+
+  public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
+      List<String> deletePathPatterns, List<String> successDeleteFiles,
+      List<String> failedDeleteFiles, String earliestCommitToRetain) {
+    this.policy = policy;
+    this.partitionPath = partitionPath;
+    this.deletePathPatterns = deletePathPatterns;
+    this.successDeleteFiles = successDeleteFiles;
+    this.failedDeleteFiles = failedDeleteFiles;
+    this.earliestCommitToRetain = earliestCommitToRetain;
+  }
+
+  public HoodieCleaningPolicy getPolicy() {
+    return policy;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  public List<String> getDeletePathPatterns() {
+    return deletePathPatterns;
+  }
+
+  public List<String> getSuccessDeleteFiles() {
+    return successDeleteFiles;
+  }
+
+  public List<String> getFailedDeleteFiles() {
+    return failedDeleteFiles;
+  }
+
+  public String getEarliestCommitToRetain() {
+    return earliestCommitToRetain;
+  }
+
+  public static HoodieCleanStat.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private HoodieCleaningPolicy policy;
+    private List<String> deletePathPatterns;
+    private List<String> successDeleteFiles;
+    private List<String> failedDeleteFiles;
+    private String partitionPath;
+    private String earliestCommitToRetain;
+
+    public Builder withPolicy(HoodieCleaningPolicy policy) {
+      this.policy = policy;
+      return this;
     }
 
-    public HoodieCleaningPolicy getPolicy() {
-        return policy;
+    public Builder withDeletePathPattern(List<String> deletePathPatterns) {
+      this.deletePathPatterns = deletePathPatterns;
+      return this;
     }
 
-    public String getPartitionPath() {
-        return partitionPath;
+    public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
+      this.successDeleteFiles = successDeleteFiles;
+      return this;
     }
 
-    public List<String> getDeletePathPatterns() {
-        return deletePathPatterns;
+    public Builder withFailedDeletes(List<String> failedDeleteFiles) {
+      this.failedDeleteFiles = failedDeleteFiles;
+      return this;
     }
 
-    public List<String> getSuccessDeleteFiles() {
-        return successDeleteFiles;
+    public Builder withPartitionPath(String partitionPath) {
+      this.partitionPath = partitionPath;
+      return this;
     }
 
-    public List<String> getFailedDeleteFiles() {
-        return failedDeleteFiles;
+    public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
+      this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
+          earliestCommitToRetain.get().getTimestamp() :
+          "-1";
+      return this;
     }
 
-    public String getEarliestCommitToRetain() {
-        return earliestCommitToRetain;
-    }
-
-    public static HoodieCleanStat.Builder newBuilder() {
-        return new Builder();
-    }
-
-    public static class Builder {
-        private HoodieCleaningPolicy policy;
-        private List<String> deletePathPatterns;
-        private List<String> successDeleteFiles;
-        private List<String> failedDeleteFiles;
-        private String partitionPath;
-        private String earliestCommitToRetain;
-
-        public Builder withPolicy(HoodieCleaningPolicy policy) {
-            this.policy = policy;
-            return this;
-        }
-
-        public Builder withDeletePathPattern(List<String> deletePathPatterns) {
-            this.deletePathPatterns = deletePathPatterns;
-            return this;
-        }
-
-        public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
-            this.successDeleteFiles = successDeleteFiles;
-            return this;
-        }
-
-        public Builder withFailedDeletes(List<String> failedDeleteFiles) {
-            this.failedDeleteFiles= failedDeleteFiles;
-            return this;
-        }
-
-        public Builder withPartitionPath(String partitionPath) {
-            this.partitionPath = partitionPath;
-            return this;
-        }
-
-        public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
-            this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
-                earliestCommitToRetain.get().getTimestamp() :
-                "-1";
-            return this;
-        }
-
-        public HoodieCleanStat build() {
-            return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
-                successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
-        }
+    public HoodieCleanStat build() {
+      return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
+          successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieJsonPayload.java b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieJsonPayload.java
index 85ccfc744..c8d89d96b 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieJsonPayload.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieJsonPayload.java
@@ -19,13 +19,6 @@ package com.uber.hoodie.common;
 import com.uber.hoodie.avro.MercifulJsonConverter;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.exception.HoodieException;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.commons.io.IOUtils;
-import org.codehaus.jackson.JsonNode;
-import org.codehaus.jackson.map.ObjectMapper;
-
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -34,75 +27,85 @@ import java.util.Optional;
 import java.util.zip.Deflater;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.InflaterInputStream;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.commons.io.IOUtils;
+import org.codehaus.jackson.JsonNode;
+import org.codehaus.jackson.map.ObjectMapper;
 
 public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload> {
-    private byte[] jsonDataCompressed;
-    private int dataSize;
 
-    public HoodieJsonPayload(String json) throws IOException {
-        this.jsonDataCompressed = compressData(json);
-        this.dataSize = json.length();
+  private byte[] jsonDataCompressed;
+  private int dataSize;
+
+  public HoodieJsonPayload(String json) throws IOException {
+    this.jsonDataCompressed = compressData(json);
+    this.dataSize = json.length();
+  }
+
+  @Override
+  public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
+    return this;
+  }
+
+  @Override
+  public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
+      throws IOException {
+    return getInsertValue(schema);
+  }
+
+  @Override
+  public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
+    return Optional.of(jsonConverter.convert(getJsonData()));
+  }
+
+  private String getJsonData() throws IOException {
+    return unCompressData(jsonDataCompressed);
+  }
+
+  private byte[] compressData(String jsonData) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
+    DeflaterOutputStream dos =
+        new DeflaterOutputStream(baos, deflater, true);
+    try {
+      dos.write(jsonData.getBytes());
+    } finally {
+      dos.flush();
+      dos.close();
+      // Its important to call this.
+      // Deflater takes off-heap native memory and does not release until GC kicks in
+      deflater.end();
     }
+    return baos.toByteArray();
+  }
 
-    @Override public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
-        return this;
+
+  private String unCompressData(byte[] data) throws IOException {
+    InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
+    try {
+      StringWriter sw = new StringWriter(dataSize);
+      IOUtils.copy(iis, sw);
+      return sw.toString();
+    } finally {
+      iis.close();
     }
+  }
 
-    @Override public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
-        return getInsertValue(schema);
+  private String getFieldFromJsonOrFail(String field) throws IOException {
+    JsonNode node = new ObjectMapper().readTree(getJsonData());
+    if (!node.has(field)) {
+      throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
     }
+    return node.get(field).getTextValue();
+  }
 
-    @Override public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
-        MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
-        return Optional.of(jsonConverter.convert(getJsonData()));
-    }
+  public String getRowKey(String keyColumnField) throws IOException {
+    return getFieldFromJsonOrFail(keyColumnField);
+  }
 
-    private String getJsonData() throws IOException {
-        return unCompressData(jsonDataCompressed);
-    }
-
-    private byte[] compressData(String jsonData) throws IOException {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
-        DeflaterOutputStream dos =
-            new DeflaterOutputStream(baos, deflater, true);
-        try {
-            dos.write(jsonData.getBytes());
-        } finally {
-            dos.flush();
-            dos.close();
-            // Its important to call this.
-            // Deflater takes off-heap native memory and does not release until GC kicks in
-            deflater.end();
-        }
-        return baos.toByteArray();
-    }
-
-
-    private String unCompressData(byte[] data) throws IOException {
-        InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
-        try {
-            StringWriter sw = new StringWriter(dataSize);
-            IOUtils.copy(iis, sw);
-            return sw.toString();
-        } finally {
-            iis.close();
-        }
-    }
-
-    private String getFieldFromJsonOrFail(String field) throws IOException {
-        JsonNode node = new ObjectMapper().readTree(getJsonData());
-        if(!node.has(field)) {
-            throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
-        }
-        return node.get(field).getTextValue();
-    }
-
-    public String getRowKey(String keyColumnField) throws IOException {
-        return getFieldFromJsonOrFail(keyColumnField);
-    }
-
-    public String getPartitionPath(String partitionPathField) throws IOException {
-        return getFieldFromJsonOrFail(partitionPathField);
-    }
+  public String getPartitionPath(String partitionPathField) throws IOException {
+    return getFieldFromJsonOrFail(partitionPathField);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieRollbackStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieRollbackStat.java
index e9d271dbc..72afe1c73 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieRollbackStat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/HoodieRollbackStat.java
@@ -16,81 +16,82 @@
 
 package com.uber.hoodie.common;
 
-import org.apache.hadoop.fs.FileStatus;
-
-import java.io.File;
 import java.io.Serializable;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileStatus;
 
 /**
  * Collects stats about a single partition clean operation
  */
 public class HoodieRollbackStat implements Serializable {
-    // Partition path
-    private final String partitionPath;
-    private final List<String> successDeleteFiles;
-    // Files that could not be deleted
-    private final List<String> failedDeleteFiles;
-    // Count of HoodieLogFile to commandBlocks written for a particular rollback
-    private final Map<FileStatus, Long> commandBlocksCount;
 
-    public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
-        List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
-        this.partitionPath = partitionPath;
-        this.successDeleteFiles = successDeleteFiles;
-        this.failedDeleteFiles = failedDeleteFiles;
-        this.commandBlocksCount = commandBlocksCount;
+  // Partition path
+  private final String partitionPath;
+  private final List<String> successDeleteFiles;
+  // Files that could not be deleted
+  private final List<String> failedDeleteFiles;
+  // Count of HoodieLogFile to commandBlocks written for a particular rollback
+  private final Map<FileStatus, Long> commandBlocksCount;
+
+  public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
+      List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
+    this.partitionPath = partitionPath;
+    this.successDeleteFiles = successDeleteFiles;
+    this.failedDeleteFiles = failedDeleteFiles;
+    this.commandBlocksCount = commandBlocksCount;
+  }
+
+  public Map<FileStatus, Long> getCommandBlocksCount() {
+    return commandBlocksCount;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  public List<String> getSuccessDeleteFiles() {
+    return successDeleteFiles;
+  }
+
+  public List<String> getFailedDeleteFiles() {
+    return failedDeleteFiles;
+  }
+
+  public static HoodieRollbackStat.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private List<String> successDeleteFiles;
+    private List<String> failedDeleteFiles;
+    private Map<FileStatus, Long> commandBlocksCount;
+    private String partitionPath;
+
+    public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
+      //noinspection Convert2MethodRef
+      successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
+          .map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
+      failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
+          .map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
+      return this;
     }
 
-    public Map<FileStatus, Long> getCommandBlocksCount() {
-        return commandBlocksCount;
+    public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
+      this.commandBlocksCount = commandBlocksCount;
+      return this;
     }
 
-    public String getPartitionPath() {
-        return partitionPath;
+    public Builder withPartitionPath(String partitionPath) {
+      this.partitionPath = partitionPath;
+      return this;
     }
 
-    public List<String> getSuccessDeleteFiles() {
-        return successDeleteFiles;
-    }
-
-    public List<String> getFailedDeleteFiles() {
-        return failedDeleteFiles;
-    }
-
-    public static HoodieRollbackStat.Builder newBuilder() {
-        return new Builder();
-    }
-
-    public static class Builder {
-        private List<String> successDeleteFiles;
-        private List<String> failedDeleteFiles;
-        private Map<FileStatus, Long> commandBlocksCount;
-        private String partitionPath;
-
-        public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
-            //noinspection Convert2MethodRef
-            successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
-                .map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
-            failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
-                .map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
-            return this;
-        }
-
-        public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
-            this.commandBlocksCount = commandBlocksCount;
-            return this;
-        }
-
-        public Builder withPartitionPath(String partitionPath) {
-            this.partitionPath = partitionPath;
-            return this;
-        }
-
-        public HoodieRollbackStat build() {
-            return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
-        }
+    public HoodieRollbackStat build() {
+      return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles,
+          commandBlocksCount);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/ActionType.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/ActionType.java
index 5e8a8c2a4..4a4427696 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/ActionType.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/ActionType.java
@@ -17,5 +17,5 @@
 package com.uber.hoodie.common.model;
 
 public enum ActionType {
-    commit, savepoint, compaction, clean, rollback;
+  commit, savepoint, compaction, clean, rollback;
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/CompactionWriteStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/CompactionWriteStat.java
index 1ff704bbb..40f7fc363 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/CompactionWriteStat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/CompactionWriteStat.java
@@ -17,13 +17,7 @@
 package com.uber.hoodie.common.model;
 
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.uber.hoodie.common.util.FSUtils;
 import java.io.Serializable;
-import java.util.List;
-import java.util.Optional;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.stream.Collectors;
-import org.apache.hadoop.fs.Path;
 
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class CompactionWriteStat implements Serializable {
@@ -34,7 +28,8 @@ public class CompactionWriteStat implements Serializable {
   private long totalLogFiles;
   private long totalRecordsToBeUpdate;
 
-  public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles, long totalLogRecords,
+  public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles,
+      long totalLogRecords,
       long totalRecordsToUpdate) {
     this.writeStat = writeStat;
     this.partitionPath = partitionPath;
@@ -58,6 +53,7 @@ public class CompactionWriteStat implements Serializable {
   public long getTotalRecordsToBeUpdate() {
     return totalRecordsToBeUpdate;
   }
+
   public HoodieWriteStat getHoodieWriteStat() {
     return writeStat;
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/FileSlice.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/FileSlice.java
index be8b6c1c4..b0f4c4182 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/FileSlice.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/FileSlice.java
@@ -19,79 +19,75 @@
 package com.uber.hoodie.common.model;
 
 import java.io.Serializable;
-import java.util.List;
 import java.util.Optional;
 import java.util.TreeSet;
-import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
- * Within a file group, a slice is a combination of data file written at a commit time
- * and list of log files, containing changes to the data file from that commit time
+ * Within a file group, a slice is a combination of data file written at a commit time and list of
+ * log files, containing changes to the data file from that commit time
  */
 public class FileSlice implements Serializable {
 
-    /**
-     * id of the slice
-     */
-    private String fileId;
+  /**
+   * id of the slice
+   */
+  private String fileId;
 
-    /**
-     * Point in the timeline, at which the slice was created
-     */
-    private String baseCommitTime;
+  /**
+   * Point in the timeline, at which the slice was created
+   */
+  private String baseCommitTime;
 
-    /**
-     * data file, with the compacted data, for this slice
-     *
-     */
-    private HoodieDataFile dataFile;
+  /**
+   * data file, with the compacted data, for this slice
+   */
+  private HoodieDataFile dataFile;
 
-    /**
-     * List of appendable log files with real time data
-     *  - Sorted with greater log version first
-     *  - Always empty for copy_on_write storage.
-     */
-    private final TreeSet<HoodieLogFile> logFiles;
+  /**
+   * List of appendable log files with real time data - Sorted with greater log version first -
+   * Always empty for copy_on_write storage.
+   */
+  private final TreeSet<HoodieLogFile> logFiles;
 
-    public FileSlice(String baseCommitTime, String fileId) {
-        this.fileId = fileId;
-        this.baseCommitTime = baseCommitTime;
-        this.dataFile = null;
-        this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
-    }
+  public FileSlice(String baseCommitTime, String fileId) {
+    this.fileId = fileId;
+    this.baseCommitTime = baseCommitTime;
+    this.dataFile = null;
+    this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
+  }
 
-    public void setDataFile(HoodieDataFile dataFile) {
-        this.dataFile = dataFile;
-    }
+  public void setDataFile(HoodieDataFile dataFile) {
+    this.dataFile = dataFile;
+  }
 
-    public void addLogFile(HoodieLogFile logFile) {
-        this.logFiles.add(logFile);
-    }
+  public void addLogFile(HoodieLogFile logFile) {
+    this.logFiles.add(logFile);
+  }
 
-    public Stream<HoodieLogFile> getLogFiles() {
-        return logFiles.stream();
-    }
+  public Stream<HoodieLogFile> getLogFiles() {
+    return logFiles.stream();
+  }
 
-    public String getBaseCommitTime() {
-        return baseCommitTime;
-    }
+  public String getBaseCommitTime() {
+    return baseCommitTime;
+  }
 
-    public String getFileId() {
-        return fileId;
-    }
+  public String getFileId() {
+    return fileId;
+  }
 
-    public Optional<HoodieDataFile> getDataFile() {
-        return Optional.ofNullable(dataFile);
-    }
+  public Optional<HoodieDataFile> getDataFile() {
+    return Optional.ofNullable(dataFile);
+  }
 
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("FileSlice {");
-        sb.append("baseCommitTime=").append(baseCommitTime);
-        sb.append(", dataFile='").append(dataFile).append('\'');
-        sb.append(", logFiles='").append(logFiles).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("FileSlice {");
+    sb.append("baseCommitTime=").append(baseCommitTime);
+    sb.append(", dataFile='").append(dataFile).append('\'');
+    sb.append(", logFiles='").append(logFiles).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieArchivedLogFile.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieArchivedLogFile.java
index 100cd8382..fb2038f38 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieArchivedLogFile.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieArchivedLogFile.java
@@ -23,18 +23,18 @@ import org.apache.hadoop.fs.Path;
 
 public class HoodieArchivedLogFile extends HoodieLogFile {
 
-    public static final String ARCHIVE_EXTENSION = ".archive";
+  public static final String ARCHIVE_EXTENSION = ".archive";
 
-    public HoodieArchivedLogFile(FileStatus fileStatus) {
-        super(fileStatus);
-    }
+  public HoodieArchivedLogFile(FileStatus fileStatus) {
+    super(fileStatus);
+  }
 
-    public HoodieArchivedLogFile(Path logPath) {
-        super(logPath);
-    }
+  public HoodieArchivedLogFile(Path logPath) {
+    super(logPath);
+  }
 
-    @Override
-    public String toString() {
-        return "HoodieArchivedLogFile {" + super.getPath() + '}';
-    }
+  @Override
+  public String toString() {
+    return "HoodieArchivedLogFile {" + super.getPath() + '}';
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieAvroPayload.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieAvroPayload.java
index 9fc0b3570..a6c45c737 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieAvroPayload.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieAvroPayload.java
@@ -17,40 +17,37 @@
 package com.uber.hoodie.common.model;
 
 import com.uber.hoodie.common.util.HoodieAvroUtils;
-
+import java.io.IOException;
 import java.util.Optional;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 
-import java.io.IOException;
-
 /**
- * This is a payload to wrap a existing Hoodie Avro Record.
- * Useful to create a HoodieRecord over existing GenericRecords in a hoodie datasets (useful in compactions)
- *
+ * This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over
+ * existing GenericRecords in a hoodie datasets (useful in compactions)
  */
 public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
-    private final Optional<GenericRecord> record;
 
-    public HoodieAvroPayload(Optional<GenericRecord> record) {
-        this.record = record;
-    }
+  private final Optional<GenericRecord> record;
 
-    @Override
-    public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
-        return this;
-    }
+  public HoodieAvroPayload(Optional<GenericRecord> record) {
+    this.record = record;
+  }
 
-    @Override
-    public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
-            throws IOException {
-        return getInsertValue(schema);
-    }
+  @Override
+  public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
+    return this;
+  }
 
-    @Override
-    public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
-        return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
-    }
+  @Override
+  public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
+      throws IOException {
+    return getInsertValue(schema);
+  }
+
+  @Override
+  public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCleaningPolicy.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCleaningPolicy.java
index c351ef1b9..4b12b19e6 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCleaningPolicy.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCleaningPolicy.java
@@ -17,6 +17,6 @@
 package com.uber.hoodie.common.model;
 
 public enum HoodieCleaningPolicy {
-    KEEP_LATEST_FILE_VERSIONS,
-    KEEP_LATEST_COMMITS
+  KEEP_LATEST_FILE_VERSIONS,
+  KEEP_LATEST_COMMITS
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCommitMetadata.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCommitMetadata.java
index 19787ebed..47253637b 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCommitMetadata.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCommitMetadata.java
@@ -17,8 +17,13 @@
 package com.uber.hoodie.common.model;
 
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-
-import com.fasterxml.jackson.databind.DeserializationFeature;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -27,196 +32,195 @@ import org.codehaus.jackson.annotate.JsonMethod;
 import org.codehaus.jackson.map.DeserializationConfig.Feature;
 import org.codehaus.jackson.map.ObjectMapper;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 /**
  * All the metadata that gets stored along with a commit.
  */
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class HoodieCommitMetadata implements Serializable {
-    private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
-    protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
 
-    private Map<String, String> extraMetadataMap;
+  private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
+  protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
 
-    public HoodieCommitMetadata() {
-        extraMetadataMap = new HashMap<>();
-        partitionToWriteStats = new HashMap<>();
+  private Map<String, String> extraMetadataMap;
+
+  public HoodieCommitMetadata() {
+    extraMetadataMap = new HashMap<>();
+    partitionToWriteStats = new HashMap<>();
+  }
+
+  public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
+    if (!partitionToWriteStats.containsKey(partitionPath)) {
+      partitionToWriteStats.put(partitionPath, new ArrayList<>());
     }
+    partitionToWriteStats.get(partitionPath).add(stat);
+  }
 
-    public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
-        if (!partitionToWriteStats.containsKey(partitionPath)) {
-            partitionToWriteStats.put(partitionPath, new ArrayList<>());
+  public void addMetadata(String metaKey, String value) {
+    extraMetadataMap.put(metaKey, value);
+  }
+
+  public List<HoodieWriteStat> getWriteStats(String partitionPath) {
+    return partitionToWriteStats.get(partitionPath);
+  }
+
+  public Map<String, String> getExtraMetadata() {
+    return extraMetadataMap;
+  }
+
+  public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
+    return partitionToWriteStats;
+  }
+
+  public String getMetadata(String metaKey) {
+    return extraMetadataMap.get(metaKey);
+  }
+
+  public HashMap<String, String> getFileIdAndRelativePaths() {
+    HashMap<String, String> filePaths = new HashMap<>();
+    // list all partitions paths
+    for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
+      for (HoodieWriteStat stat : entry.getValue()) {
+        filePaths.put(stat.getFileId(), stat.getPath());
+      }
+    }
+    return filePaths;
+  }
+
+  public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
+    HashMap<String, String> fullPaths = new HashMap<>();
+    for (Map.Entry<String, String> entry : getFileIdAndRelativePaths().entrySet()) {
+      String fullPath =
+          (entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
+      fullPaths.put(entry.getKey(), fullPath);
+    }
+    return fullPaths;
+  }
+
+  public String toJsonString() throws IOException {
+    if (partitionToWriteStats.containsKey(null)) {
+      log.info("partition path is null for " + partitionToWriteStats.get(null));
+      partitionToWriteStats.remove(null);
+    }
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
+    return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
+  }
+
+  public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
+    if (jsonStr == null || jsonStr.isEmpty()) {
+      // For empty commit file (no data or somethings bad happen).
+      return new HoodieCommitMetadata();
+    }
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
+    return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
+  }
+
+  // Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
+  public long fetchTotalPartitionsWritten() {
+    return partitionToWriteStats.size();
+  }
+
+  public long fetchTotalFilesInsert() {
+    long totalFilesInsert = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
+          totalFilesInsert++;
         }
-        partitionToWriteStats.get(partitionPath).add(stat);
+      }
     }
+    return totalFilesInsert;
+  }
 
-    public void addMetadata(String metaKey, String value) {
-        extraMetadataMap.put(metaKey, value);
-    }
-
-    public List<HoodieWriteStat> getWriteStats(String partitionPath) {
-        return partitionToWriteStats.get(partitionPath);
-    }
-
-    public Map<String, String> getExtraMetadata() { return extraMetadataMap; }
-
-    public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
-        return partitionToWriteStats;
-    }
-
-    public String getMetadata(String metaKey) {
-        return extraMetadataMap.get(metaKey);
-    }
-
-    public HashMap<String, String> getFileIdAndRelativePaths() {
-        HashMap<String, String> filePaths = new HashMap<>();
-        // list all partitions paths
-        for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
-            for (HoodieWriteStat stat: entry.getValue()) {
-                filePaths.put(stat.getFileId(), stat.getPath());
-            }
+  public long fetchTotalFilesUpdated() {
+    long totalFilesUpdated = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
+          totalFilesUpdated++;
         }
-        return filePaths;
+      }
     }
+    return totalFilesUpdated;
+  }
 
-    public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
-        HashMap<String, String> fullPaths = new HashMap<>();
-        for (Map.Entry<String, String> entry: getFileIdAndRelativePaths().entrySet()) {
-            String fullPath = (entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
-            fullPaths.put(entry.getKey(), fullPath);
-        } return fullPaths;
+  public long fetchTotalUpdateRecordsWritten() {
+    long totalUpdateRecordsWritten = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        totalUpdateRecordsWritten += stat.getNumUpdateWrites();
+      }
     }
+    return totalUpdateRecordsWritten;
+  }
 
-    public String toJsonString() throws IOException {
-        if(partitionToWriteStats.containsKey(null)) {
-            log.info("partition path is null for " + partitionToWriteStats.get(null));
-            partitionToWriteStats.remove(null);
+  public long fetchTotalInsertRecordsWritten() {
+    long totalInsertRecordsWritten = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
+          totalInsertRecordsWritten += stat.getNumWrites();
         }
-        ObjectMapper mapper = new ObjectMapper();
-        mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
-        return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
+      }
+    }
+    return totalInsertRecordsWritten;
+  }
+
+  public long fetchTotalRecordsWritten() {
+    long totalRecordsWritten = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        totalRecordsWritten += stat.getNumWrites();
+      }
+    }
+    return totalRecordsWritten;
+  }
+
+  public long fetchTotalBytesWritten() {
+    long totalBytesWritten = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        totalBytesWritten += stat.getTotalWriteBytes();
+      }
+    }
+    return totalBytesWritten;
+  }
+
+  public long fetchTotalWriteErrors() {
+    long totalWriteErrors = 0;
+    for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
+      for (HoodieWriteStat stat : stats) {
+        totalWriteErrors += stat.getTotalWriteErrors();
+      }
+    }
+    return totalWriteErrors;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
     }
 
-    public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
-        if (jsonStr == null || jsonStr.isEmpty()) {
-            // For empty commit file (no data or somethings bad happen).
-            return new HoodieCommitMetadata();
-        }
-        ObjectMapper mapper = new ObjectMapper();
-        mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
-        return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
-    }
+    HoodieCommitMetadata that = (HoodieCommitMetadata) o;
 
-    // Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
-    public long fetchTotalPartitionsWritten() {
-        return partitionToWriteStats.size();
-    }
+    return partitionToWriteStats != null ?
+        partitionToWriteStats.equals(that.partitionToWriteStats) :
+        that.partitionToWriteStats == null;
 
-    public long fetchTotalFilesInsert() {
-        long totalFilesInsert = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
-                    totalFilesInsert ++;
-                }
-            }
-        }
-        return totalFilesInsert;
-    }
+  }
 
-    public long fetchTotalFilesUpdated() {
-        long totalFilesUpdated = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
-                    totalFilesUpdated ++;
-                }
-            }
-        }
-        return totalFilesUpdated;
-    }
+  @Override
+  public int hashCode() {
+    return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
+  }
 
-    public long fetchTotalUpdateRecordsWritten() {
-        long totalUpdateRecordsWritten = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                totalUpdateRecordsWritten += stat.getNumUpdateWrites();
-            }
-        }
-        return totalUpdateRecordsWritten;
-    }
-
-    public long fetchTotalInsertRecordsWritten() {
-        long totalInsertRecordsWritten = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
-                    totalInsertRecordsWritten += stat.getNumWrites();
-                }
-            }
-        }
-        return totalInsertRecordsWritten;
-    }
-
-    public long fetchTotalRecordsWritten() {
-        long totalRecordsWritten = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                totalRecordsWritten += stat.getNumWrites();
-            }
-        }
-        return totalRecordsWritten;
-    }
-
-    public long fetchTotalBytesWritten() {
-        long totalBytesWritten = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                totalBytesWritten += stat.getTotalWriteBytes();
-            }
-        }
-        return totalBytesWritten;
-    }
-
-    public long fetchTotalWriteErrors() {
-        long totalWriteErrors = 0;
-        for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
-            for (HoodieWriteStat stat : stats) {
-                totalWriteErrors += stat.getTotalWriteErrors();
-            }
-        }
-        return totalWriteErrors;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-
-        HoodieCommitMetadata that = (HoodieCommitMetadata) o;
-
-        return partitionToWriteStats != null ?
-            partitionToWriteStats.equals(that.partitionToWriteStats) :
-            that.partitionToWriteStats == null;
-
-    }
-
-    @Override
-    public int hashCode() {
-        return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
-    }
-
-    public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
-        return fromJsonString(new String(bytes, Charset.forName("utf-8")));
-    }
+  public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
+    return fromJsonString(new String(bytes, Charset.forName("utf-8")));
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCompactionMetadata.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCompactionMetadata.java
index dc37649a0..043098f36 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCompactionMetadata.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieCompactionMetadata.java
@@ -16,15 +16,12 @@
 
 package com.uber.hoodie.common.model;
 
-import com.google.common.collect.Maps;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.function.BinaryOperator;
-import java.util.function.Supplier;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.codehaus.jackson.annotate.JsonAutoDetect;
@@ -33,9 +30,11 @@ import org.codehaus.jackson.map.DeserializationConfig.Feature;
 import org.codehaus.jackson.map.ObjectMapper;
 
 /**
- * Place holder for the compaction specific meta-data, uses all the details used in a normal HoodieCommitMetadata
+ * Place holder for the compaction specific meta-data, uses all the details used in a normal
+ * HoodieCommitMetadata
  */
 public class HoodieCompactionMetadata extends HoodieCommitMetadata {
+
   private static volatile Logger log = LogManager.getLogger(HoodieCompactionMetadata.class);
   protected HashMap<String, List<CompactionWriteStat>> partitionToCompactionWriteStats;
 
@@ -60,7 +59,7 @@ public class HoodieCompactionMetadata extends HoodieCommitMetadata {
   }
 
   public String toJsonString() throws IOException {
-    if(partitionToCompactionWriteStats.containsKey(null)) {
+    if (partitionToCompactionWriteStats.containsKey(null)) {
       log.info("partition path is null for " + partitionToCompactionWriteStats.get(null));
       partitionToCompactionWriteStats.remove(null);
     }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDataFile.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDataFile.java
index adf2f09d0..baa3c755c 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDataFile.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDataFile.java
@@ -17,56 +17,54 @@
 package com.uber.hoodie.common.model;
 
 import com.uber.hoodie.common.util.FSUtils;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-
 import java.io.Serializable;
 import java.util.Comparator;
+import org.apache.hadoop.fs.FileStatus;
 
 public class HoodieDataFile implements Serializable {
-    private FileStatus fileStatus;
 
-    public HoodieDataFile(FileStatus fileStatus) {
-        this.fileStatus = fileStatus;
-    }
+  private FileStatus fileStatus;
 
-    public String getFileId() {
-        return FSUtils.getFileId(fileStatus.getPath().getName());
-    }
+  public HoodieDataFile(FileStatus fileStatus) {
+    this.fileStatus = fileStatus;
+  }
 
-    public String getCommitTime() {
-        return FSUtils.getCommitTime(fileStatus.getPath().getName());
-    }
+  public String getFileId() {
+    return FSUtils.getFileId(fileStatus.getPath().getName());
+  }
 
-    public String getPath() {
-        return fileStatus.getPath().toString();
-    }
+  public String getCommitTime() {
+    return FSUtils.getCommitTime(fileStatus.getPath().getName());
+  }
 
-    public String getFileName() {
-        return fileStatus.getPath().getName();
-    }
+  public String getPath() {
+    return fileStatus.getPath().toString();
+  }
 
-    public FileStatus getFileStatus() {
-        return fileStatus;
-    }
+  public String getFileName() {
+    return fileStatus.getPath().getName();
+  }
 
-    public static Comparator<HoodieDataFile> getCommitTimeComparator() {
-        return (o1, o2) -> {
-            // reverse the order
-            return o2.getCommitTime().compareTo(o1.getCommitTime());
-        };
-    }
+  public FileStatus getFileStatus() {
+    return fileStatus;
+  }
 
-    public long getFileSize() {
-        return fileStatus.getLen();
-    }
+  public static Comparator<HoodieDataFile> getCommitTimeComparator() {
+    return (o1, o2) -> {
+      // reverse the order
+      return o2.getCommitTime().compareTo(o1.getCommitTime());
+    };
+  }
 
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieDataFile {");
-        sb.append("fileStatus=").append(fileStatus);
-        sb.append('}');
-        return sb.toString();
-    }
+  public long getFileSize() {
+    return fileStatus.getLen();
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieDataFile {");
+    sb.append("fileStatus=").append(fileStatus);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDeltaWriteStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDeltaWriteStat.java
index 2f3ee88bd..a801338d5 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDeltaWriteStat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieDeltaWriteStat.java
@@ -24,22 +24,22 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class HoodieDeltaWriteStat extends HoodieWriteStat {
 
-    private int logVersion;
-    private long logOffset;
+  private int logVersion;
+  private long logOffset;
 
-    public void setLogVersion(int logVersion) {
-        this.logVersion = logVersion;
-    }
+  public void setLogVersion(int logVersion) {
+    this.logVersion = logVersion;
+  }
 
-    public int getLogVersion() {
-        return logVersion;
-    }
+  public int getLogVersion() {
+    return logVersion;
+  }
 
-    public void setLogOffset(long logOffset) {
-        this.logOffset = logOffset;
-    }
+  public void setLogOffset(long logOffset) {
+    this.logOffset = logOffset;
+  }
 
-    public long getLogOffset() {
-        return logOffset;
-    }
+  public long getLogOffset() {
+    return logOffset;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileFormat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileFormat.java
index 8ef06ba53..497a9a3c8 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileFormat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileFormat.java
@@ -17,15 +17,15 @@
 package com.uber.hoodie.common.model;
 
 public enum HoodieFileFormat {
-    PARQUET(".parquet"), HOODIE_LOG(".log");
+  PARQUET(".parquet"), HOODIE_LOG(".log");
 
-    private final String extension;
+  private final String extension;
 
-    HoodieFileFormat(String extension) {
-        this.extension = extension;
-    }
+  HoodieFileFormat(String extension) {
+    this.extension = extension;
+  }
 
-    public String getFileExtension() {
-        return extension;
-    }
+  public String getFileExtension() {
+    return extension;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileGroup.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileGroup.java
index 97781850a..d5884fb99 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileGroup.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieFileGroup.java
@@ -20,9 +20,6 @@ package com.uber.hoodie.common.model;
 
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
-
-import org.apache.commons.lang3.tuple.Pair;
-
 import java.io.Serializable;
 import java.util.Comparator;
 import java.util.List;
@@ -35,212 +32,184 @@ import java.util.stream.Stream;
  */
 public class HoodieFileGroup implements Serializable {
 
-    public static Comparator<String> getReverseCommitTimeComparator() {
-        return (o1, o2) -> {
-            // reverse the order
-            return o2.compareTo(o1);
-        };
+  public static Comparator<String> getReverseCommitTimeComparator() {
+    return (o1, o2) -> {
+      // reverse the order
+      return o2.compareTo(o1);
+    };
+  }
+
+
+  /**
+   * Partition containing the file group.
+   */
+  private final String partitionPath;
+
+  /**
+   * uniquely identifies the file group
+   */
+  private final String id;
+
+  /**
+   * Slices of files in this group, sorted with greater commit first.
+   */
+  private final TreeMap<String, FileSlice> fileSlices;
+
+  /**
+   * Timeline, based on which all getter work
+   */
+  private final HoodieTimeline timeline;
+
+  /**
+   * The last completed instant, that acts as a high watermark for all getters
+   */
+  private final Optional<HoodieInstant> lastInstant;
+
+  public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
+    this.partitionPath = partitionPath;
+    this.id = id;
+    this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
+    this.timeline = timeline;
+    this.lastInstant = timeline.lastInstant();
+  }
+
+  /**
+   * Add a new datafile into the file group
+   */
+  public void addDataFile(HoodieDataFile dataFile) {
+    if (!fileSlices.containsKey(dataFile.getCommitTime())) {
+      fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
     }
+    fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
+  }
 
-
-    /**
-     * Partition containing the file group.
-     */
-    private final String partitionPath;
-
-    /**
-     * uniquely identifies the file group
-     */
-    private final String id;
-
-    /**
-     * Slices of files in this group, sorted with greater commit first.
-     */
-    private final TreeMap<String, FileSlice> fileSlices;
-
-    /**
-     * Timeline, based on which all getter work
-     */
-    private final HoodieTimeline timeline;
-
-    /**
-     * The last completed instant, that acts as a high watermark for all
-     * getters
-     */
-    private final Optional<HoodieInstant> lastInstant;
-
-    public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
-        this.partitionPath = partitionPath;
-        this.id = id;
-        this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
-        this.timeline = timeline;
-        this.lastInstant = timeline.lastInstant();
+  /**
+   * Add a new log file into the group
+   */
+  public void addLogFile(HoodieLogFile logFile) {
+    if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
+      fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
     }
+    fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
+  }
 
-    /**
-     * Add a new datafile into the file group
-     *
-     * @param dataFile
-     */
-    public void addDataFile(HoodieDataFile dataFile) {
-        if (!fileSlices.containsKey(dataFile.getCommitTime())) {
-            fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
-        }
-        fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
+  public String getId() {
+    return id;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  /**
+   * A FileSlice is considered committed, if one of the following is true - There is a committed
+   * data file - There are some log files, that are based off a commit or delta commit
+   */
+  private boolean isFileSliceCommitted(FileSlice slice) {
+    String maxCommitTime = lastInstant.get().getTimestamp();
+    return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
+        HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
+            maxCommitTime,
+            HoodieTimeline.LESSER_OR_EQUAL);
+
+  }
+
+  /**
+   * Provides a stream of committed file slices, sorted reverse base commit time.
+   */
+  public Stream<FileSlice> getAllFileSlices() {
+    if (!timeline.empty()) {
+      return fileSlices.entrySet().stream()
+          .map(sliceEntry -> sliceEntry.getValue())
+          .filter(slice -> isFileSliceCommitted(slice));
     }
+    return Stream.empty();
+  }
 
-    /**
-     * Add a new log file into the group
-     *
-     * @param logFile
-     */
-    public void addLogFile(HoodieLogFile logFile) {
-        if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
-            fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
-        }
-        fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
+  /**
+   * Gets the latest slice - this can contain either
+   *
+   * - just the log files without data file - (or) data file with 0 or more log files
+   */
+  public Optional<FileSlice> getLatestFileSlice() {
+    // there should always be one
+    return getAllFileSlices().findFirst();
+  }
+
+  /**
+   * Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
+   */
+  public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
+    return getAllFileSlices()
+        .filter(slice ->
+            HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
+                maxCommitTime,
+                HoodieTimeline.LESSER_OR_EQUAL))
+        .findFirst();
+  }
+
+  public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
+    return getAllFileSlices()
+        .filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
+        .findFirst();
+  }
+
+  /**
+   * Stream of committed data files, sorted reverse commit time
+   */
+  public Stream<HoodieDataFile> getAllDataFiles() {
+    return getAllFileSlices()
+        .filter(slice -> slice.getDataFile().isPresent())
+        .map(slice -> slice.getDataFile().get());
+  }
+
+  /**
+   * Get the latest committed data file
+   */
+  public Optional<HoodieDataFile> getLatestDataFile() {
+    return getAllDataFiles().findFirst();
+  }
+
+  /**
+   * Get the latest data file, that is <=  max commit time
+   */
+  public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
+    return getAllDataFiles()
+        .filter(dataFile ->
+            HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
+                maxCommitTime,
+                HoodieTimeline.LESSER_OR_EQUAL))
+        .findFirst();
+  }
+
+  /**
+   * Get the latest data file, that is contained within the provided commit range.
+   */
+  public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
+    return getAllDataFiles()
+        .filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
+        .findFirst();
+  }
+
+  /**
+   * Obtain the latest log file (based on latest committed data file), currently being appended to
+   *
+   * @return logfile if present, empty if no log file has been opened already.
+   */
+  public Optional<HoodieLogFile> getLatestLogFile() {
+    Optional<FileSlice> latestSlice = getLatestFileSlice();
+    if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
+      return latestSlice.get().getLogFiles().findFirst();
     }
+    return Optional.empty();
+  }
 
-    public String getId() {
-        return id;
-    }
-
-    public String getPartitionPath() {
-        return partitionPath;
-    }
-
-    /**
-     * A FileSlice is considered committed, if one of the following is true
-     *  - There is a committed data file
-     *  - There are some log files, that are based off a commit or delta commit
-     *
-     * @param slice
-     * @return
-     */
-    private boolean isFileSliceCommitted(FileSlice slice) {
-        String maxCommitTime = lastInstant.get().getTimestamp();
-        return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
-                HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
-                        maxCommitTime,
-                        HoodieTimeline.LESSER_OR_EQUAL);
-
-    }
-
-    /**
-     * Provides a stream of committed file slices, sorted reverse base commit time.
-     *
-     * @return
-     */
-    public Stream<FileSlice> getAllFileSlices() {
-        if (!timeline.empty()) {
-            return fileSlices.entrySet().stream()
-                    .map(sliceEntry -> sliceEntry.getValue())
-                    .filter(slice -> isFileSliceCommitted(slice));
-        }
-        return Stream.empty();
-    }
-
-    /**
-     * Gets the latest slice - this can contain either
-     *
-     *  - just the log files without data file
-     *  - (or) data file with 0 or more log files
-     *
-     * @return
-     */
-    public Optional<FileSlice> getLatestFileSlice() {
-        // there should always be one
-        return getAllFileSlices().findFirst();
-    }
-
-    /**
-     * Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
-     *
-     * @param maxCommitTime
-     * @return
-     */
-    public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
-        return getAllFileSlices()
-                .filter(slice ->
-                        HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
-                                maxCommitTime,
-                                HoodieTimeline.LESSER_OR_EQUAL))
-                .findFirst();
-    }
-
-    public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
-        return getAllFileSlices()
-                .filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
-                .findFirst();
-    }
-
-    /**
-     * Stream of committed data files, sorted reverse commit time
-     *
-     * @return
-     */
-    public Stream<HoodieDataFile> getAllDataFiles() {
-        return getAllFileSlices()
-                .filter(slice -> slice.getDataFile().isPresent())
-                .map(slice -> slice.getDataFile().get());
-    }
-
-    /**
-     * Get the latest committed data file
-     *
-     * @return
-     */
-    public Optional<HoodieDataFile> getLatestDataFile() {
-        return getAllDataFiles().findFirst();
-    }
-
-    /**
-     * Get the latest data file, that is <=  max commit time
-     *
-     * @param maxCommitTime
-     * @return
-     */
-    public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
-        return getAllDataFiles()
-                .filter(dataFile ->
-                        HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
-                                maxCommitTime,
-                                HoodieTimeline.LESSER_OR_EQUAL))
-                .findFirst();
-    }
-
-    /**
-     * Get the latest data file, that is contained within the provided commit range.
-     *
-     * @param commitRange
-     * @return
-     */
-    public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
-        return getAllDataFiles()
-                .filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
-                .findFirst();
-    }
-
-    /**
-     * Obtain the latest log file (based on latest committed data file),
-     * currently being appended to
-     *
-     * @return logfile if present, empty if no log file has been opened already.
-     */
-    public Optional<HoodieLogFile> getLatestLogFile() {
-        Optional<FileSlice> latestSlice = getLatestFileSlice();
-        if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
-            return latestSlice.get().getLogFiles().findFirst();
-        }
-        return Optional.empty();
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
-        sb.append("id=").append(id);
-        sb.append(", fileSlices='").append(fileSlices).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
+    sb.append("id=").append(id);
+    sb.append(", fileSlices='").append(fileSlices).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieKey.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieKey.java
index d36400434..f4545809c 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieKey.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieKey.java
@@ -17,57 +17,58 @@
 package com.uber.hoodie.common.model;
 
 import com.google.common.base.Objects;
-
 import java.io.Serializable;
 
 /**
  * HoodieKey consists of
  *
- * - recordKey     : a recordKey that acts as primary key for a record
- * - partitionPath : path to the partition that contains the record
+ * - recordKey     : a recordKey that acts as primary key for a record - partitionPath : path to the
+ * partition that contains the record
  */
 public class HoodieKey implements Serializable {
 
 
-    private final String recordKey;
+  private final String recordKey;
 
-    private final String partitionPath;
+  private final String partitionPath;
 
-    public HoodieKey(String recordKey, String partitionPath) {
-        this.recordKey = recordKey;
-        this.partitionPath = partitionPath;
+  public HoodieKey(String recordKey, String partitionPath) {
+    this.recordKey = recordKey;
+    this.partitionPath = partitionPath;
+  }
+
+  public String getRecordKey() {
+    return recordKey;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
-
-    public String getRecordKey() {
-        return recordKey;
+    if (o == null || getClass() != o.getClass()) {
+      return false;
     }
+    HoodieKey otherKey = (HoodieKey) o;
+    return Objects.equal(recordKey, otherKey.recordKey) &&
+        Objects.equal(partitionPath, otherKey.partitionPath);
+  }
 
-    public String getPartitionPath() {
-        return partitionPath;
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(recordKey, partitionPath);
+  }
 
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieKey otherKey = (HoodieKey) o;
-        return Objects.equal(recordKey, otherKey.recordKey) &&
-                Objects.equal(partitionPath, otherKey.partitionPath);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hashCode(recordKey, partitionPath);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieKey {");
-        sb.append(" recordKey=").append(recordKey);
-        sb.append(" partitionPath=").append(partitionPath);
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieKey {");
+    sb.append(" recordKey=").append(recordKey);
+    sb.append(" partitionPath=").append(partitionPath);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieLogFile.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieLogFile.java
index d1cb636d0..4e09f5f33 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieLogFile.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieLogFile.java
@@ -19,13 +19,13 @@
 package com.uber.hoodie.common.model;
 
 import com.uber.hoodie.common.util.FSUtils;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.Comparator;
 import java.util.Optional;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 /**
  * Abstracts a single log file. Contains methods to extract metadata like the fileId, version and
@@ -34,73 +34,74 @@ import java.util.Optional;
  * Also contains logic to roll-over the log file
  */
 public class HoodieLogFile implements Serializable {
-    public static final String DELTA_EXTENSION = ".log";
 
-    private final Path path;
-    private Optional<FileStatus> fileStatus;
+  public static final String DELTA_EXTENSION = ".log";
 
-    public HoodieLogFile(FileStatus fileStatus) {
-        this(fileStatus.getPath());
-        this.fileStatus = Optional.of(fileStatus);
-    }
+  private final Path path;
+  private Optional<FileStatus> fileStatus;
 
-    public HoodieLogFile(Path logPath) {
-        this.path = logPath;
-        this.fileStatus = Optional.empty();
-    }
+  public HoodieLogFile(FileStatus fileStatus) {
+    this(fileStatus.getPath());
+    this.fileStatus = Optional.of(fileStatus);
+  }
 
-    public String getFileId() {
-        return FSUtils.getFileIdFromLogPath(path);
-    }
+  public HoodieLogFile(Path logPath) {
+    this.path = logPath;
+    this.fileStatus = Optional.empty();
+  }
 
-    public String getBaseCommitTime() {
-        return FSUtils.getBaseCommitTimeFromLogPath(path);
-    }
+  public String getFileId() {
+    return FSUtils.getFileIdFromLogPath(path);
+  }
 
-    public int getLogVersion() {
-        return FSUtils.getFileVersionFromLog(path);
-    }
+  public String getBaseCommitTime() {
+    return FSUtils.getBaseCommitTimeFromLogPath(path);
+  }
 
-    public String getFileExtension() {
-        return FSUtils.getFileExtensionFromLog(path);
-    }
+  public int getLogVersion() {
+    return FSUtils.getFileVersionFromLog(path);
+  }
 
-    public Path getPath() {
-        return path;
-    }
+  public String getFileExtension() {
+    return FSUtils.getFileExtensionFromLog(path);
+  }
 
-    public String getFileName() {
-        return path.getName();
-    }
+  public Path getPath() {
+    return path;
+  }
 
-    public Optional<FileStatus> getFileStatus() {
-        return fileStatus;
-    }
+  public String getFileName() {
+    return path.getName();
+  }
 
-    public Optional<Long> getFileSize() {
-        return fileStatus.map(FileStatus::getLen);
-    }
+  public Optional<FileStatus> getFileStatus() {
+    return fileStatus;
+  }
 
-    public HoodieLogFile rollOver(FileSystem fs) throws IOException {
-        String fileId = getFileId();
-        String baseCommitTime = getBaseCommitTime();
-        String extension = "." + FSUtils.getFileExtensionFromLog(path);
-        int newVersion = FSUtils
-            .computeNextLogVersion(fs, path.getParent(), fileId,
-                    extension, baseCommitTime);
-        return new HoodieLogFile(new Path(path.getParent(),
-            FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
-    }
+  public Optional<Long> getFileSize() {
+    return fileStatus.map(FileStatus::getLen);
+  }
 
-    public static Comparator<HoodieLogFile> getLogVersionComparator() {
-        return (o1, o2) -> {
-            // reverse the order
-            return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
-        };
-    }
+  public HoodieLogFile rollOver(FileSystem fs) throws IOException {
+    String fileId = getFileId();
+    String baseCommitTime = getBaseCommitTime();
+    String extension = "." + FSUtils.getFileExtensionFromLog(path);
+    int newVersion = FSUtils
+        .computeNextLogVersion(fs, path.getParent(), fileId,
+            extension, baseCommitTime);
+    return new HoodieLogFile(new Path(path.getParent(),
+        FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
+  }
 
-    @Override
-    public String toString() {
-        return "HoodieLogFile {" + path + '}';
-    }
+  public static Comparator<HoodieLogFile> getLogVersionComparator() {
+    return (o1, o2) -> {
+      // reverse the order
+      return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
+    };
+  }
+
+  @Override
+  public String toString() {
+    return "HoodieLogFile {" + path + '}';
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodiePartitionMetadata.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodiePartitionMetadata.java
index d0ddf16ca..ebe801811 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodiePartitionMetadata.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodiePartitionMetadata.java
@@ -17,7 +17,8 @@
 package com.uber.hoodie.common.model;
 
 import com.uber.hoodie.exception.HoodieException;
-
+import java.io.IOException;
+import java.util.Properties;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -25,117 +26,119 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-import java.util.Properties;
-
 /**
  * The metadata that goes into the meta file in each partition
  */
 public class HoodiePartitionMetadata {
 
-    public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
-    public static final String PARTITION_DEPTH_KEY = "partitionDepth";
-    public static final String COMMIT_TIME_KEY = "commitTime";
+  public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
+  public static final String PARTITION_DEPTH_KEY = "partitionDepth";
+  public static final String COMMIT_TIME_KEY = "commitTime";
 
-    /**
-     * Contents of the metadata
-     */
-    private final Properties props;
+  /**
+   * Contents of the metadata
+   */
+  private final Properties props;
 
-    /**
-     * Path to the partition, about which we have the metadata
-     */
-    private final Path partitionPath;
+  /**
+   * Path to the partition, about which we have the metadata
+   */
+  private final Path partitionPath;
 
-    private final FileSystem fs;
+  private final FileSystem fs;
 
-    private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
+  private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
 
 
-    /**
-     * Construct metadata from existing partition
-     */
-    public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
-        this.fs = fs;
-        this.props = new Properties();
-        this.partitionPath = partitionPath;
+  /**
+   * Construct metadata from existing partition
+   */
+  public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
+    this.fs = fs;
+    this.props = new Properties();
+    this.partitionPath = partitionPath;
+  }
+
+  /**
+   * Construct metadata object to be written out.
+   */
+  public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath,
+      Path partitionPath) {
+    this(fs, partitionPath);
+    props.setProperty(COMMIT_TIME_KEY, commitTime);
+    props
+        .setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
+  }
+
+  public int getPartitionDepth() {
+    if (!props.containsKey(PARTITION_DEPTH_KEY)) {
+      throw new HoodieException("Could not find partitionDepth in partition metafile");
     }
+    return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
+  }
 
-    /**
-     * Construct metadata object to be written out.
-     */
-    public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
-        this(fs, partitionPath);
-        props.setProperty(COMMIT_TIME_KEY, commitTime);
-        props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
-    }
+  /**
+   * Write the metadata safely into partition atomically.
+   */
+  public void trySave(int taskPartitionId) {
+    Path tmpMetaPath = new Path(partitionPath,
+        HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
+    Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
+    boolean metafileExists = false;
 
-    public int getPartitionDepth() {
-        if (!props.containsKey(PARTITION_DEPTH_KEY)) {
-            throw new HoodieException("Could not find partitionDepth in partition metafile");
-        }
-        return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
-    }
-
-    /**
-     * Write the metadata safely into partition atomically.
-     *
-     * @param taskPartitionId
-     */
-    public void trySave(int taskPartitionId) {
-        Path tmpMetaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
-        Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
-        boolean metafileExists = false;
+    try {
+      metafileExists = fs.exists(metaPath);
+      if (!metafileExists) {
+        // write to temporary file
+        FSDataOutputStream os = fs.create(tmpMetaPath, true);
+        props.store(os, "partition metadata");
+        os.hsync();
+        os.hflush();
+        os.close();
 
+        // move to actual path
+        fs.rename(tmpMetaPath, metaPath);
+      }
+    } catch (IOException ioe) {
+      log.warn(
+          "Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), "
+              +
+              partitionPath, ioe);
+    } finally {
+      if (!metafileExists) {
         try {
-            metafileExists = fs.exists(metaPath);
-            if (!metafileExists) {
-                // write to temporary file
-                FSDataOutputStream os = fs.create(tmpMetaPath, true);
-                props.store(os, "partition metadata");
-                os.hsync();
-                os.hflush();
-                os.close();
-
-                // move to actual path
-                fs.rename(tmpMetaPath, metaPath);
-            }
+          // clean up tmp file, if still lying around
+          if (fs.exists(tmpMetaPath)) {
+            fs.delete(tmpMetaPath, false);
+          }
         } catch (IOException ioe) {
-            log.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " +
-                    partitionPath, ioe);
-        } finally {
-            if (!metafileExists) {
-                try {
-                    // clean up tmp file, if still lying around
-                    if (fs.exists(tmpMetaPath)) {
-                        fs.delete(tmpMetaPath, false);
-                    }
-                } catch (IOException ioe) {
-                    log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
-                }
-            }
+          log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
         }
+      }
     }
+  }
 
-    /**
-     * Read out the metadata for this partition
-     */
-    public void readFromFS() {
-        try {
-            Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
-            FSDataInputStream is = fs.open(metaFile);
-            props.load(is);
-        } catch (IOException ioe) {
-            throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
-        }
+  /**
+   * Read out the metadata for this partition
+   */
+  public void readFromFS() {
+    try {
+      Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
+      FSDataInputStream is = fs.open(metaFile);
+      props.load(is);
+    } catch (IOException ioe) {
+      throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath,
+          ioe);
     }
+  }
 
-    // methods related to partition meta data
-    public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
-        try {
-            return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
-        } catch (IOException ioe) {
-            throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
-        }
+  // methods related to partition meta data
+  public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
+    try {
+      return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
+    } catch (IOException ioe) {
+      throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath,
+          ioe);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecord.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecord.java
index 4b05a2e3c..43b023020 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecord.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecord.java
@@ -17,7 +17,6 @@
 package com.uber.hoodie.common.model;
 
 import com.google.common.base.Objects;
-
 import java.io.Serializable;
 import java.util.Optional;
 
@@ -26,129 +25,131 @@ import java.util.Optional;
  */
 public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
 
-    public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
-    public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
-    public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
-    public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
-    public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
+  public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
+  public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
+  public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
+  public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
+  public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
 
-    /**
-     * Identifies the record across the table
-     */
-    private HoodieKey key;
+  /**
+   * Identifies the record across the table
+   */
+  private HoodieKey key;
 
-    /**
-     * Actual payload of the record
-     */
-    private T data;
+  /**
+   * Actual payload of the record
+   */
+  private T data;
 
-    /**
-     * Current location of record on storage. Filled in by looking up index
-     */
-    private HoodieRecordLocation currentLocation;
+  /**
+   * Current location of record on storage. Filled in by looking up index
+   */
+  private HoodieRecordLocation currentLocation;
 
-    /**
-     * New location of record on storage, after written
-     */
-    private HoodieRecordLocation newLocation;
+  /**
+   * New location of record on storage, after written
+   */
+  private HoodieRecordLocation newLocation;
 
-    public HoodieRecord(HoodieKey key, T data) {
-        this.key = key;
-        this.data = data;
-        this.currentLocation = null;
-        this.newLocation = null;
+  public HoodieRecord(HoodieKey key, T data) {
+    this.key = key;
+    this.data = data;
+    this.currentLocation = null;
+    this.newLocation = null;
+  }
+
+  public HoodieKey getKey() {
+    return key;
+  }
+
+  public T getData() {
+    if (data == null) {
+      throw new IllegalStateException("Payload already deflated for record.");
     }
+    return data;
+  }
 
-    public HoodieKey getKey() {
-        return key;
+  /**
+   * Release the actual payload, to ease memory pressure. To be called after the record has been
+   * written to storage. Once deflated, cannot be inflated.
+   */
+  public void deflate() {
+    this.data = null;
+  }
+
+
+  /**
+   * Sets the current currentLocation of the record. This should happen exactly-once
+   */
+  public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
+    assert currentLocation == null;
+    this.currentLocation = location;
+    return this;
+  }
+
+  public HoodieRecordLocation getCurrentLocation() {
+    return currentLocation;
+  }
+
+  /**
+   * Sets the new currentLocation of the record, after being written. This again should happen
+   * exactly-once.
+   */
+  public HoodieRecord setNewLocation(HoodieRecordLocation location) {
+    assert newLocation == null;
+    this.newLocation = location;
+    return this;
+  }
+
+  public Optional<HoodieRecordLocation> getNewLocation() {
+    return Optional.of(this.newLocation);
+  }
+
+  public boolean isCurrentLocationKnown() {
+    return this.currentLocation != null;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
-
-    public T getData() {
-        if (data == null) {
-            throw new IllegalStateException("Payload already deflated for record.");
-        }
-        return data;
+    if (o == null || getClass() != o.getClass()) {
+      return false;
     }
+    HoodieRecord that = (HoodieRecord) o;
+    return Objects.equal(key, that.key) &&
+        Objects.equal(data, that.data) &&
+        Objects.equal(currentLocation, that.currentLocation) &&
+        Objects.equal(newLocation, that.newLocation);
+  }
 
-    /**
-     * Release the actual payload, to ease memory pressure. To be called after the record
-     * has been written to storage. Once deflated, cannot be inflated.
-     */
-    public void deflate() {
-        this.data = null;
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(key, data, currentLocation, newLocation);
+  }
 
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieRecord{");
+    sb.append("key=").append(key);
+    sb.append(", currentLocation='").append(currentLocation).append('\'');
+    sb.append(", newLocation='").append(newLocation).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 
-    /**
-     * Sets the current currentLocation of the record. This should happen exactly-once
-     */
-    public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
-        assert currentLocation == null;
-        this.currentLocation = location;
-        return this;
-    }
+  public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
+    return commitTime + "_" + partitionId + "_" + recordIndex;
+  }
 
-    public HoodieRecordLocation getCurrentLocation() {
-        return currentLocation;
-    }
+  public String getPartitionPath() {
+    assert key != null;
+    return key.getPartitionPath();
+  }
 
-    /**
-     * Sets the new currentLocation of the record, after being written. This again should happen
-     * exactly-once.
-     */
-    public HoodieRecord setNewLocation(HoodieRecordLocation location) {
-        assert newLocation == null;
-        this.newLocation = location;
-        return this;
-    }
-
-    public Optional<HoodieRecordLocation> getNewLocation() {
-        return Optional.of(this.newLocation);
-    }
-
-    public boolean isCurrentLocationKnown() {
-        return this.currentLocation != null;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieRecord that = (HoodieRecord) o;
-        return Objects.equal(key, that.key) &&
-                Objects.equal(data, that.data) &&
-                Objects.equal(currentLocation, that.currentLocation) &&
-                Objects.equal(newLocation, that.newLocation);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hashCode(key, data, currentLocation, newLocation);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieRecord{");
-        sb.append("key=").append(key);
-        sb.append(", currentLocation='").append(currentLocation).append('\'');
-        sb.append(", newLocation='").append(newLocation).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
-
-    public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
-        return commitTime + "_" + partitionId + "_" + recordIndex;
-    }
-
-    public String getPartitionPath() {
-        assert key != null;
-        return key.getPartitionPath();
-    }
-
-    public String getRecordKey() {
-        assert key != null;
-        return key.getRecordKey();
-    }
+  public String getRecordKey() {
+    assert key != null;
+    return key.getRecordKey();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordLocation.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordLocation.java
index c84b028ff..fa5f7a04f 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordLocation.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordLocation.java
@@ -17,7 +17,6 @@
 package com.uber.hoodie.common.model;
 
 import com.google.common.base.Objects;
-
 import java.io.Serializable;
 
 /**
@@ -26,44 +25,46 @@ import java.io.Serializable;
  */
 public class HoodieRecordLocation implements Serializable {
 
-    private final String commitTime;
-    private final String fileId;
+  private final String commitTime;
+  private final String fileId;
 
-    public HoodieRecordLocation(String commitTime, String fileId) {
-        this.commitTime = commitTime;
-        this.fileId = fileId;
-    }
+  public HoodieRecordLocation(String commitTime, String fileId) {
+    this.commitTime = commitTime;
+    this.fileId = fileId;
+  }
 
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
-        return Objects.equal(commitTime, otherLoc.commitTime) &&
-                Objects.equal(fileId, otherLoc.fileId);
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
+    return Objects.equal(commitTime, otherLoc.commitTime) &&
+        Objects.equal(fileId, otherLoc.fileId);
+  }
 
-    @Override
-    public int hashCode() {
-        return Objects.hashCode(commitTime, fileId);
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(commitTime, fileId);
+  }
 
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
-        sb.append("commitTime=").append(commitTime).append(", ");
-        sb.append("fileId=").append(fileId);
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
+    sb.append("commitTime=").append(commitTime).append(", ");
+    sb.append("fileId=").append(fileId);
+    sb.append('}');
+    return sb.toString();
+  }
 
-    public String getCommitTime() {
-        return commitTime;
-    }
+  public String getCommitTime() {
+    return commitTime;
+  }
 
-    public String getFileId() {
-        return fileId;
-    }
+  public String getFileId() {
+    return fileId;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordPayload.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordPayload.java
index c2ca79343..c7fe8fff8 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordPayload.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieRecordPayload.java
@@ -16,54 +16,55 @@
 
 package com.uber.hoodie.common.model;
 
+import java.io.IOException;
+import java.io.Serializable;
 import java.util.Map;
+import java.util.Optional;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Optional;
-
 /**
- * Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
- * This abstracts out callbacks which depend on record specific logic
+ * Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts
+ * out callbacks which depend on record specific logic
  */
 public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
-    /**
-     * When more than one HoodieRecord have the same HoodieKey, this function combines them
-     * before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
-     */
-    T preCombine(T another);
 
-    /**
-     *
-     * This methods lets you write custom merging/combining logic to produce new values
-     * as a function of current value on storage and whats contained in this object.
-     *
-     * eg:
-     * 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
-     * 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
-     *
-     * @param currentValue Current value in storage, to merge/combine this payload with
-     * @param schema Schema used for record
-     * @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
-     */
-    Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
+  /**
+   * When more than one HoodieRecord have the same HoodieKey, this function combines them before
+   * attempting to insert/upsert (if combining turned on in HoodieClientConfig)
+   */
+  T preCombine(T another);
 
-    /**
-     * Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
-     * Called when writing a new value for the given HoodieKey, wherein there is no existing record in
-     * storage to be combined against. (i.e insert)
-     * Return EMPTY to skip writing this record.
-     */
-    Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
+  /**
+   * This methods lets you write custom merging/combining logic to produce new values as a function
+   * of current value on storage and whats contained in this object.
+   *
+   * eg: 1) You are updating counters, you may want to add counts to currentValue and write back
+   * updated counts 2) You may be reading DB redo logs, and merge them with current image for a
+   * database row on storage
+   *
+   * @param currentValue Current value in storage, to merge/combine this payload with
+   * @param schema Schema used for record
+   * @return new combined/merged value to be written back to storage. EMPTY to skip writing this
+   * record.
+   */
+  Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
+      throws IOException;
 
-    /**
-     * This method can be used to extract some metadata from HoodieRecordPayload. The metadata is passed
-     * to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute
-     * some aggregate metrics using the metadata in the context of a write success or failure.
-     */
-    default Optional<Map<String, String>> getMetadata() {
-        return Optional.empty();
-    }
+  /**
+   * Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
+   * Called when writing a new value for the given HoodieKey, wherein there is no existing record in
+   * storage to be combined against. (i.e insert) Return EMPTY to skip writing this record.
+   */
+  Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
+
+  /**
+   * This method can be used to extract some metadata from HoodieRecordPayload. The metadata is
+   * passed to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to
+   * compute some aggregate metrics using the metadata in the context of a write success or
+   * failure.
+   */
+  default Optional<Map<String, String>> getMetadata() {
+    return Optional.empty();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieTableType.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieTableType.java
index fb91bc1ca..00564d627 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieTableType.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieTableType.java
@@ -31,5 +31,5 @@ package com.uber.hoodie.common.model;
  * SIMPLE_LSM    - A simple 2 level LSM tree.
  */
 public enum HoodieTableType {
-    COPY_ON_WRITE, MERGE_ON_READ
+  COPY_ON_WRITE, MERGE_ON_READ
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java
index a56338cc4..b69aed36c 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/model/HoodieWriteStat.java
@@ -17,8 +17,6 @@
 package com.uber.hoodie.common.model;
 
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.uber.hoodie.common.util.FSUtils;
-
 import java.io.Serializable;
 
 /**
@@ -27,143 +25,153 @@ import java.io.Serializable;
 @JsonIgnoreProperties(ignoreUnknown = true)
 public class HoodieWriteStat implements Serializable {
 
-    public static final String NULL_COMMIT = "null";
+  public static final String NULL_COMMIT = "null";
 
-    /**
-     * Id of the file being written
-     */
-    private String fileId;
+  /**
+   * Id of the file being written
+   */
+  private String fileId;
 
-    /**
-     * Relative path to the file from the base path
-     */
-    private String path;
+  /**
+   * Relative path to the file from the base path
+   */
+  private String path;
 
-    /**
-     * The previous version of the file. (null if this is the first version. i.e insert)
-     */
-    private String prevCommit;
+  /**
+   * The previous version of the file. (null if this is the first version. i.e insert)
+   */
+  private String prevCommit;
 
-    /**
-     * Total number of records written for this file.
-     * - for updates, its the entire number of records in the file
-     * - for inserts, its the actual number of records inserted.
-     */
-    private long numWrites;
+  /**
+   * Total number of records written for this file. - for updates, its the entire number of records
+   * in the file - for inserts, its the actual number of records inserted.
+   */
+  private long numWrites;
 
-    /**
-     * Total number of records deleted.
-     */
-    private long numDeletes;
+  /**
+   * Total number of records deleted.
+   */
+  private long numDeletes;
 
-    /**
-     * Total number of records actually changed. (0 for inserts)
-     */
-    private long numUpdateWrites;
+  /**
+   * Total number of records actually changed. (0 for inserts)
+   */
+  private long numUpdateWrites;
 
-    /**
-     * Total size of file written
-     */
-    private long totalWriteBytes;
+  /**
+   * Total size of file written
+   */
+  private long totalWriteBytes;
 
-    /**
-     * Total number of records, that were n't able to be written due to errors.
-     */
-    private long totalWriteErrors;
+  /**
+   * Total number of records, that were n't able to be written due to errors.
+   */
+  private long totalWriteErrors;
 
-    public HoodieWriteStat() {
-        // called by jackson json lib
+  public HoodieWriteStat() {
+    // called by jackson json lib
+  }
+
+  public void setFileId(String fileId) {
+    this.fileId = fileId;
+  }
+
+  public void setPath(String path) {
+    this.path = path;
+  }
+
+  public void setPrevCommit(String prevCommit) {
+    this.prevCommit = prevCommit;
+  }
+
+  public void setNumWrites(long numWrites) {
+    this.numWrites = numWrites;
+  }
+
+  public void setNumDeletes(long numDeletes) {
+    this.numDeletes = numDeletes;
+  }
+
+  public void setNumUpdateWrites(long numUpdateWrites) {
+    this.numUpdateWrites = numUpdateWrites;
+  }
+
+  public long getTotalWriteBytes() {
+    return totalWriteBytes;
+  }
+
+  public void setTotalWriteBytes(long totalWriteBytes) {
+    this.totalWriteBytes = totalWriteBytes;
+  }
+
+  public long getTotalWriteErrors() {
+    return totalWriteErrors;
+  }
+
+  public void setTotalWriteErrors(long totalWriteErrors) {
+    this.totalWriteErrors = totalWriteErrors;
+  }
+
+  public String getPrevCommit() {
+    return prevCommit;
+  }
+
+  public long getNumWrites() {
+    return numWrites;
+  }
+
+  public long getNumDeletes() {
+    return numDeletes;
+  }
+
+  public long getNumUpdateWrites() {
+    return numUpdateWrites;
+  }
+
+  public String getFileId() {
+    return fileId;
+  }
+
+  public String getPath() {
+    return path;
+  }
+
+
+  @Override
+  public String toString() {
+    return new StringBuilder()
+        .append("HoodieWriteStat {")
+        .append("path=" + path)
+        .append(", prevCommit='" + prevCommit + '\'')
+        .append(", numWrites=" + numWrites)
+        .append(", numDeletes=" + numDeletes)
+        .append(", numUpdateWrites=" + numUpdateWrites)
+        .append(", numWriteBytes=" + totalWriteBytes)
+        .append('}')
+        .toString();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
     }
 
-    public void setFileId(String fileId) {
-        this.fileId = fileId;
+    HoodieWriteStat that = (HoodieWriteStat) o;
+    if (!path.equals(that.path)) {
+      return false;
     }
+    return prevCommit.equals(that.prevCommit);
 
-    public void setPath(String path) { this.path = path; }
+  }
 
-    public void setPrevCommit(String prevCommit) {
-        this.prevCommit = prevCommit;
-    }
-
-    public void setNumWrites(long numWrites) {
-        this.numWrites = numWrites;
-    }
-
-    public void setNumDeletes(long numDeletes) {
-        this.numDeletes = numDeletes;
-    }
-
-    public void setNumUpdateWrites(long numUpdateWrites) {
-        this.numUpdateWrites = numUpdateWrites;
-    }
-
-    public long getTotalWriteBytes() {
-        return totalWriteBytes;
-    }
-
-    public void setTotalWriteBytes(long totalWriteBytes) {
-        this.totalWriteBytes = totalWriteBytes;
-    }
-
-    public long getTotalWriteErrors() { return totalWriteErrors; }
-
-    public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
-
-    public String getPrevCommit() {
-        return prevCommit;
-    }
-
-    public long getNumWrites() {
-        return numWrites;
-    }
-
-    public long getNumDeletes() {
-        return numDeletes;
-    }
-
-    public long getNumUpdateWrites() {
-        return numUpdateWrites;
-    }
-
-    public String getFileId() {
-        return fileId;
-    }
-
-    public String getPath() { return path; }
-
-
-    @Override
-    public String toString() {
-        return new StringBuilder()
-                .append("HoodieWriteStat {")
-                .append("path=" + path)
-                .append(", prevCommit='" + prevCommit + '\'')
-                .append(", numWrites=" + numWrites)
-                .append(", numDeletes=" + numDeletes)
-                .append(", numUpdateWrites=" + numUpdateWrites)
-                .append(", numWriteBytes=" + totalWriteBytes)
-                .append('}')
-                .toString();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-
-        HoodieWriteStat that = (HoodieWriteStat) o;
-        if (!path.equals(that.path))
-            return false;
-        return prevCommit.equals(that.prevCommit);
-
-    }
-
-    @Override
-    public int hashCode() {
-        int result = path.hashCode();
-        result = 31 * result + prevCommit.hashCode();
-        return result;
-    }
+  @Override
+  public int hashCode() {
+    int result = path.hashCode();
+    result = 31 * result + prevCommit.hashCode();
+    return result;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableConfig.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableConfig.java
index 48cb75ca3..8cc6c18c6 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableConfig.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableConfig.java
@@ -19,8 +19,11 @@ package com.uber.hoodie.common.table;
 import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieFileFormat;
 import com.uber.hoodie.common.model.HoodieTableType;
-import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Date;
+import java.util.Properties;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -28,144 +31,134 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Date;
-import java.util.Properties;
-
 /**
  * Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc
- * Configurations are loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path
- * and never changes during the lifetime of a hoodie dataset.
+ * Configurations are loaded from hoodie.properties, these properties are usually set during
+ * initializing a path as hoodie base path and never changes during the lifetime of a hoodie
+ * dataset.
  *
  * @see HoodieTableMetaClient
  * @since 0.3.0
  */
 public class HoodieTableConfig implements Serializable {
-    private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
 
-    public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
-    public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
-    public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
-    public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
-        "hoodie.table.ro.file.format";
-    public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
-        "hoodie.table.rt.file.format";
-    public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
+  private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
 
-    public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
-    public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
-    public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
-    public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
-    private Properties props;
+  public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
+  public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
+  public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
+  public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
+      "hoodie.table.ro.file.format";
+  public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
+      "hoodie.table.rt.file.format";
+  public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
 
-    public HoodieTableConfig(FileSystem fs, String metaPath) {
-        Properties props = new Properties();
-        Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
-        log.info("Loading dataset properties from " + propertyPath);
-        try {
-            try (FSDataInputStream inputStream = fs.open(propertyPath)) {
-                props.load(inputStream);
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
-        }
-        this.props = props;
+  public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
+  public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
+  public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
+  public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
+  private Properties props;
+
+  public HoodieTableConfig(FileSystem fs, String metaPath) {
+    Properties props = new Properties();
+    Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
+    log.info("Loading dataset properties from " + propertyPath);
+    try {
+      try (FSDataInputStream inputStream = fs.open(propertyPath)) {
+        props.load(inputStream);
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
     }
+    this.props = props;
+  }
 
-    /**
-     * For serailizing and de-serializing
-     * @deprecated
-     */
-    public HoodieTableConfig() {
+  /**
+   * For serailizing and de-serializing
+   *
+   * @deprecated
+   */
+  public HoodieTableConfig() {
+  }
+
+  /**
+   * Initialize the hoodie meta directory and any necessary files inside the meta (including the
+   * hoodie.properties)
+   */
+  public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
+      Properties properties) throws IOException {
+    if (!fs.exists(metadataFolder)) {
+      fs.mkdirs(metadataFolder);
     }
-
-    /**
-     * Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
-     *
-     * @param metadataFolder
-     * @param properties
-     * @throws IOException
-     */
-    public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
-        Properties properties) throws IOException {
-        if (!fs.exists(metadataFolder)) {
-            fs.mkdirs(metadataFolder);
-        }
-        Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
-        FSDataOutputStream outputStream = fs.create(propertyPath);
-        try {
-            if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
-                throw new IllegalArgumentException(
-                    HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
-            }
-            if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
-                properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
-            }
-            if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
-                && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
-                    properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
-            }
-            properties
-                .store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
-        } finally {
-            outputStream.close();
-        }
+    Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
+    FSDataOutputStream outputStream = fs.create(propertyPath);
+    try {
+      if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
+        throw new IllegalArgumentException(
+            HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
+      }
+      if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
+        properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
+      }
+      if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
+          .name()
+          && !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
+        properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
+      }
+      properties
+          .store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
+    } finally {
+      outputStream.close();
     }
+  }
 
 
-    /**
-     * Read the table type from the table properties and if not found, return the default
-     *
-     * @return
-     */
-    public HoodieTableType getTableType() {
-        if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
-            return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
-        }
-        return DEFAULT_TABLE_TYPE;
+  /**
+   * Read the table type from the table properties and if not found, return the default
+   */
+  public HoodieTableType getTableType() {
+    if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
+      return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
     }
+    return DEFAULT_TABLE_TYPE;
+  }
 
-    /**
-     * Read the payload class for HoodieRecords from the table properties
-     *
-     * @return
-     */
-    public String getPayloadClass() {
-        return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
-    }
+  /**
+   * Read the payload class for HoodieRecords from the table properties
+   */
+  public String getPayloadClass() {
+    return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
+  }
 
-    /**
-     * Read the table name
-     *
-     * @return
-     */
-    public String getTableName() {
-        return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
-    }
+  /**
+   * Read the table name
+   */
+  public String getTableName() {
+    return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
+  }
 
-    /**
-     * Get the Read Optimized Storage Format
-     *
-     * @return HoodieFileFormat for the Read Optimized Storage format
-     */
-    public HoodieFileFormat getROFileFormat() {
-        if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
-            return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
-        }
-        return DEFAULT_RO_FILE_FORMAT;
+  /**
+   * Get the Read Optimized Storage Format
+   *
+   * @return HoodieFileFormat for the Read Optimized Storage format
+   */
+  public HoodieFileFormat getROFileFormat() {
+    if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
+      return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
     }
+    return DEFAULT_RO_FILE_FORMAT;
+  }
 
-    /**
-     * Get the Read Optimized Storage Format
-     *
-     * @return HoodieFileFormat for the Read Optimized Storage format
-     */
-    public HoodieFileFormat getRTFileFormat() {
-        if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
-            return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
-        }
-        return DEFAULT_RT_FILE_FORMAT;
+  /**
+   * Get the Read Optimized Storage Format
+   *
+   * @return HoodieFileFormat for the Read Optimized Storage format
+   */
+  public HoodieFileFormat getRTFileFormat() {
+    if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
+      return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
     }
+    return DEFAULT_RT_FILE_FORMAT;
+  }
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java
index b1dd0296c..d012d9799 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTableMetaClient.java
@@ -21,6 +21,11 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.DatasetNotFoundException;
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.Properties;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -28,225 +33,205 @@ import org.apache.hadoop.fs.PathFilter;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.Externalizable;
-import java.io.File;
-import java.io.IOException;
-import java.io.ObjectInput;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutput;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
-import java.util.Objects;
-import java.util.Properties;
-
 /**
- * <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table
- * It returns meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
- * Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to start getting the meta-data.
- * <p>
- * All the timelines are computed lazily, once computed the timeline is cached and never refreshed.
- * Use the <code>HoodieTimeline.reload()</code> to refresh timelines.
+ * <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns
+ * meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
+ * Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to
+ * start getting the meta-data. <p> All the timelines are computed lazily, once computed the
+ * timeline is cached and never refreshed. Use the <code>HoodieTimeline.reload()</code> to refresh
+ * timelines.
  *
  * @see HoodieTimeline
  * @since 0.3.0
  */
 public class HoodieTableMetaClient implements Serializable {
-    private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
-    public static String METAFOLDER_NAME = ".hoodie";
 
-    private String basePath;
-    private transient FileSystem fs;
-    private String metaPath;
-    private HoodieTableType tableType;
-    private HoodieTableConfig tableConfig;
-    private HoodieActiveTimeline activeTimeline;
-    private HoodieArchivedTimeline archivedTimeline;
+  private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
+  public static String METAFOLDER_NAME = ".hoodie";
 
-    public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
-        // Do not load any timeline by default
-        this(fs, basePath, false);
+  private String basePath;
+  private transient FileSystem fs;
+  private String metaPath;
+  private HoodieTableType tableType;
+  private HoodieTableConfig tableConfig;
+  private HoodieActiveTimeline activeTimeline;
+  private HoodieArchivedTimeline archivedTimeline;
+
+  public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
+    // Do not load any timeline by default
+    this(fs, basePath, false);
+  }
+
+  public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
+      throws DatasetNotFoundException {
+    log.info("Loading HoodieTableMetaClient from " + basePath);
+    this.basePath = basePath;
+    this.fs = fs;
+    Path basePathDir = new Path(this.basePath);
+    this.metaPath = basePath + File.separator + METAFOLDER_NAME;
+    Path metaPathDir = new Path(this.metaPath);
+    DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
+    this.tableConfig = new HoodieTableConfig(fs, metaPath);
+    this.tableType = tableConfig.getTableType();
+    log.info("Finished Loading Table of type " + tableType + " from " + basePath);
+    if (loadActiveTimelineOnLoad) {
+      log.info("Loading Active commit timeline for " + basePath);
+      getActiveTimeline();
     }
+  }
 
-    public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
-        throws DatasetNotFoundException {
-        log.info("Loading HoodieTableMetaClient from " + basePath);
-        this.basePath = basePath;
-        this.fs = fs;
-        Path basePathDir = new Path(this.basePath);
-        this.metaPath = basePath + File.separator + METAFOLDER_NAME;
-        Path metaPathDir = new Path(this.metaPath);
-        DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
-        this.tableConfig = new HoodieTableConfig(fs, metaPath);
-        this.tableType = tableConfig.getTableType();
-        log.info("Finished Loading Table of type " + tableType + " from " + basePath);
-        if (loadActiveTimelineOnLoad) {
-            log.info("Loading Active commit timeline for " + basePath);
-            getActiveTimeline();
-        }
-    }
+  /**
+   * For serailizing and de-serializing
+   *
+   * @deprecated
+   */
+  public HoodieTableMetaClient() {
+  }
 
-    /**
-     * For serailizing and de-serializing
-     *
-     * @deprecated
-     */
-    public HoodieTableMetaClient() {
-    }
+  /**
+   * This method is only used when this object is deserialized in a spark executor.
+   *
+   * @deprecated
+   */
+  private void readObject(java.io.ObjectInputStream in)
+      throws IOException, ClassNotFoundException {
+    in.defaultReadObject();
+    this.fs = FSUtils.getFs();
+  }
 
-    /**
-     * This method is only used when this object is deserialized in a spark executor.
-     *
-     * @deprecated
-     */
-    private void readObject(java.io.ObjectInputStream in)
-        throws IOException, ClassNotFoundException {
-        in.defaultReadObject();
-        this.fs = FSUtils.getFs();
-    }
+  private void writeObject(java.io.ObjectOutputStream out)
+      throws IOException {
+    out.defaultWriteObject();
+  }
 
-    private void writeObject(java.io.ObjectOutputStream out)
-        throws IOException {
-        out.defaultWriteObject();
-    }
+  /**
+   * @return Base path
+   */
+  public String getBasePath() {
+    return basePath;
+  }
 
-    /**
-     * @return Base path
-     */
-    public String getBasePath() {
-        return basePath;
-    }
+  /**
+   * @return Hoodie Table Type
+   */
+  public HoodieTableType getTableType() {
+    return tableType;
+  }
 
-    /**
-     * @return Hoodie Table Type
-     */
-    public HoodieTableType getTableType() {
-        return tableType;
-    }
+  /**
+   * @return Meta path
+   */
+  public String getMetaPath() {
+    return metaPath;
+  }
 
-    /**
-     * @return Meta path
-     */
-    public String getMetaPath() {
-        return metaPath;
-    }
+  /**
+   * @return Table Config
+   */
+  public HoodieTableConfig getTableConfig() {
+    return tableConfig;
+  }
 
-    /**
-     * @return Table Config
-     */
-    public HoodieTableConfig getTableConfig() {
-        return tableConfig;
-    }
+  /**
+   * Get the FS implementation for this table
+   */
+  public FileSystem getFs() {
+    return fs;
+  }
 
-    /**
-     * Get the FS implementation for this table
-     * @return
-     */
-    public FileSystem getFs() {
-        return fs;
+  /**
+   * Get the active instants as a timeline
+   *
+   * @return Active instants timeline
+   */
+  public synchronized HoodieActiveTimeline getActiveTimeline() {
+    if (activeTimeline == null) {
+      activeTimeline = new HoodieActiveTimeline(fs, metaPath);
     }
+    return activeTimeline;
+  }
 
-    /**
-     * Get the active instants as a timeline
-     *
-     * @return Active instants timeline
-     * @throws IOException
-     */
-    public synchronized HoodieActiveTimeline getActiveTimeline() {
-        if (activeTimeline == null) {
-            activeTimeline = new HoodieActiveTimeline(fs, metaPath);
-        }
-        return activeTimeline;
+  /**
+   * Get the archived commits as a timeline. This is costly operation, as all data from the archived
+   * files are read. This should not be used, unless for historical debugging purposes
+   *
+   * @return Active commit timeline
+   */
+  public synchronized HoodieArchivedTimeline getArchivedTimeline() {
+    if (archivedTimeline == null) {
+      archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
     }
+    return archivedTimeline;
+  }
 
-    /**
-     * Get the archived commits as a timeline. This is costly operation, as all data from the
-     * archived files are read. This should not be used, unless for historical debugging purposes
-     *
-     * @return Active commit timeline
-     * @throws IOException
-     */
-    public synchronized HoodieArchivedTimeline getArchivedTimeline() {
-        if (archivedTimeline == null) {
-            archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
-        }
-        return archivedTimeline;
+  /**
+   * Helper method to initialize a given path, as a given storage type and table name
+   */
+  public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath,
+      HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
+    Properties properties = new Properties();
+    properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
+    properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
+    if (tableType == HoodieTableType.MERGE_ON_READ) {
+      properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
     }
+    return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
+  }
 
-    /**
-     * Helper method to initialize a given path, as a given storage type and table name
-     *
-     * @param fs
-     * @param basePath
-     * @param tableType
-     * @param tableName
-     * @return
-     * @throws IOException
-     */
-    public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath, HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
-        Properties properties = new Properties();
-        properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
-        properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
-        if(tableType == HoodieTableType.MERGE_ON_READ) {
-            properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
-        }
-        return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
+  /**
+   * Helper method to initialize a given path as a hoodie dataset with configs passed in as as
+   * Properties
+   *
+   * @return Instance of HoodieTableMetaClient
+   */
+  public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
+      String basePath, Properties props) throws IOException {
+    log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
+    Path basePathDir = new Path(basePath);
+    if (!fs.exists(basePathDir)) {
+      fs.mkdirs(basePathDir);
     }
+    Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
+    if (!fs.exists(metaPathDir)) {
+      fs.mkdirs(metaPathDir);
+    }
+    HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
+    HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
+    log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
+        + " from " + basePath);
+    return metaClient;
+  }
 
-    /**
-     * Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
-     *
-     * @param fs
-     * @param basePath
-     * @param props
-     * @return Instance of HoodieTableMetaClient
-     * @throws IOException
-     */
-    public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
-        String basePath, Properties props) throws IOException {
-        log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
-        Path basePathDir = new Path(basePath);
-        if (!fs.exists(basePathDir)) {
-            fs.mkdirs(basePathDir);
-        }
-        Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
-        if (!fs.exists(metaPathDir)) {
-            fs.mkdirs(metaPathDir);
-        }
-        HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
-        HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
-        log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
-            + " from " + basePath);
-        return metaClient;
-    }
+  // HELPER METHODS TO CREATE META FILE NAMES
+  public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
+      throws IOException {
+    return fs.listStatus(metaPath, nameFilter);
+  }
 
-    // HELPER METHODS TO CREATE META FILE NAMES
-    public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
-        throws IOException {
-        return fs.listStatus(metaPath, nameFilter);
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    HoodieTableMetaClient that = (HoodieTableMetaClient) o;
+    return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
+  }
 
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieTableMetaClient that = (HoodieTableMetaClient) o;
-        return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hash(basePath, tableType);
+  }
 
-    @Override
-    public int hashCode() {
-        return Objects.hash(basePath, tableType);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
-        sb.append("basePath='").append(basePath).append('\'');
-        sb.append(", metaPath='").append(metaPath).append('\'');
-        sb.append(", tableType=").append(tableType);
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
+    sb.append("basePath='").append(basePath).append('\'');
+    sb.append(", metaPath='").append(metaPath).append('\'');
+    sb.append(", tableType=").append(tableType);
+    sb.append('}');
+    return sb.toString();
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTimeline.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTimeline.java
index 93377c3eb..e2001a2c7 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTimeline.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/HoodieTimeline.java
@@ -18,19 +18,16 @@ package com.uber.hoodie.common.table;
 
 import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
-import com.uber.hoodie.common.util.FSUtils;
-
 import java.io.Serializable;
 import java.util.Optional;
 import java.util.function.BiPredicate;
 import java.util.stream.Stream;
 
 /**
- * HoodieTimeline is a view of meta-data instants in the hoodie dataset.
- * Instants are specific points in time represented as HoodieInstant.
- * <p>
- * Timelines are immutable once created and operations create new instance of
- * timelines which filter on the instants and this can be chained.
+ * HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
+ * points in time represented as HoodieInstant. <p> Timelines are immutable once created and
+ * operations create new instance of timelines which filter on the instants and this can be
+ * chained.
  *
  * @see com.uber.hoodie.common.table.HoodieTableMetaClient
  * @see HoodieDefaultTimeline
@@ -38,205 +35,195 @@ import java.util.stream.Stream;
  * @since 0.3.0
  */
 public interface HoodieTimeline extends Serializable {
-    String COMMIT_ACTION = "commit";
-    String DELTA_COMMIT_ACTION = "deltacommit";
-    String CLEAN_ACTION = "clean";
-    String ROLLBACK_ACTION = "rollback";
-    String SAVEPOINT_ACTION = "savepoint";
-    String COMPACTION_ACTION = "compaction";
-    String INFLIGHT_EXTENSION = ".inflight";
 
-    String COMMIT_EXTENSION = "." + COMMIT_ACTION;
-    String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
-    String CLEAN_EXTENSION = "." + CLEAN_ACTION;
-    String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
-    String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
-    String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
-    //this is to preserve backwards compatibility on commit in-flight filenames
-    String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
-    String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
-    String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
-    String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
-    String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
-    String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
+  String COMMIT_ACTION = "commit";
+  String DELTA_COMMIT_ACTION = "deltacommit";
+  String CLEAN_ACTION = "clean";
+  String ROLLBACK_ACTION = "rollback";
+  String SAVEPOINT_ACTION = "savepoint";
+  String COMPACTION_ACTION = "compaction";
+  String INFLIGHT_EXTENSION = ".inflight";
 
-    /**
-     * Filter this timeline to just include the in-flights
-     *
-     * @return New instance of HoodieTimeline with just in-flights
-     */
-    HoodieTimeline filterInflights();
+  String COMMIT_EXTENSION = "." + COMMIT_ACTION;
+  String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
+  String CLEAN_EXTENSION = "." + CLEAN_ACTION;
+  String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
+  String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
+  String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
+  //this is to preserve backwards compatibility on commit in-flight filenames
+  String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
+  String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
+  String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
+  String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
+  String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
+  String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
 
-    /**
-     * Filter this timeline to just include the completed instants
-     *
-     * @return New instance of HoodieTimeline with just completed instants
-     */
-    HoodieTimeline filterCompletedInstants();
+  /**
+   * Filter this timeline to just include the in-flights
+   *
+   * @return New instance of HoodieTimeline with just in-flights
+   */
+  HoodieTimeline filterInflights();
+
+  /**
+   * Filter this timeline to just include the completed instants
+   *
+   * @return New instance of HoodieTimeline with just completed instants
+   */
+  HoodieTimeline filterCompletedInstants();
 
 
-    /**
-     * Create a new Timeline with instants after startTs and before or on endTs
-     *
-     * @param startTs
-     * @param endTs
-     */
-    HoodieTimeline findInstantsInRange(String startTs, String endTs);
+  /**
+   * Create a new Timeline with instants after startTs and before or on endTs
+   */
+  HoodieTimeline findInstantsInRange(String startTs, String endTs);
 
-    /**
-     * Create a new Timeline with all the instants after startTs
-     *
-     * @param commitTime
-     * @param numCommits
-     */
-    HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
+  /**
+   * Create a new Timeline with all the instants after startTs
+   */
+  HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
 
-    /**
-     * If the timeline has any instants
-     *
-     * @return true if timeline is empty
-     */
-    boolean empty();
+  /**
+   * If the timeline has any instants
+   *
+   * @return true if timeline is empty
+   */
+  boolean empty();
 
-    /**
-     * @return total number of completed instants
-     */
-    int countInstants();
+  /**
+   * @return total number of completed instants
+   */
+  int countInstants();
 
-    /**
-     * @return first completed instant if available
-     */
-    Optional<HoodieInstant> firstInstant();
+  /**
+   * @return first completed instant if available
+   */
+  Optional<HoodieInstant> firstInstant();
 
-    /**
-     * @param n
-     * @return nth completed instant from the first completed instant
-     */
-    Optional<HoodieInstant> nthInstant(int n);
+  /**
+   * @return nth completed instant from the first completed instant
+   */
+  Optional<HoodieInstant> nthInstant(int n);
 
-    /**
-     * @return last completed instant if available
-     */
-    Optional<HoodieInstant> lastInstant();
+  /**
+   * @return last completed instant if available
+   */
+  Optional<HoodieInstant> lastInstant();
 
-    /**
-     * @param n
-     * @return nth completed instant going back from the last completed instant
-     */
-    Optional<HoodieInstant> nthFromLastInstant(int n);
+  /**
+   * @return nth completed instant going back from the last completed instant
+   */
+  Optional<HoodieInstant> nthFromLastInstant(int n);
 
-    /**
-     * @return true if the passed instant is present as a completed instant on the timeline
-     */
-    boolean containsInstant(HoodieInstant instant);
+  /**
+   * @return true if the passed instant is present as a completed instant on the timeline
+   */
+  boolean containsInstant(HoodieInstant instant);
 
-    /**
-     * @return true if the passed instant is present as a completed instant on the timeline or
-     * if the instant is before the first completed instant in the timeline
-     */
-    boolean containsOrBeforeTimelineStarts(String ts);
+  /**
+   * @return true if the passed instant is present as a completed instant on the timeline or if the
+   * instant is before the first completed instant in the timeline
+   */
+  boolean containsOrBeforeTimelineStarts(String ts);
 
-    /**
-     * @return Get the stream of completed instants
-     */
-    Stream<HoodieInstant> getInstants();
+  /**
+   * @return Get the stream of completed instants
+   */
+  Stream<HoodieInstant> getInstants();
 
-    /**
-     * @return true if the passed in instant is before the first completed instant in the timeline
-     */
-    boolean isBeforeTimelineStarts(String ts);
+  /**
+   * @return true if the passed in instant is before the first completed instant in the timeline
+   */
+  boolean isBeforeTimelineStarts(String ts);
 
-    /**
-     * Read the completed instant details
-     *
-     * @param instant
-     * @return
-     */
-    Optional<byte[]> getInstantDetails(HoodieInstant instant);
+  /**
+   * Read the completed instant details
+   */
+  Optional<byte[]> getInstantDetails(HoodieInstant instant);
 
-    /**
-     * Helper methods to compare instants
-     **/
-    BiPredicate<String, String> GREATER_OR_EQUAL =
-        (commit1, commit2) -> commit1.compareTo(commit2) >= 0;
-    BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
-    BiPredicate<String, String> LESSER_OR_EQUAL =
-        (commit1, commit2) -> commit1.compareTo(commit2) <= 0;
-    BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
+  /**
+   * Helper methods to compare instants
+   **/
+  BiPredicate<String, String> GREATER_OR_EQUAL =
+      (commit1, commit2) -> commit1.compareTo(commit2) >= 0;
+  BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
+  BiPredicate<String, String> LESSER_OR_EQUAL =
+      (commit1, commit2) -> commit1.compareTo(commit2) <= 0;
+  BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
 
-    static boolean compareTimestamps(String commit1, String commit2,
-        BiPredicate<String, String> predicateToApply) {
-        return predicateToApply.test(commit1, commit2);
-    }
+  static boolean compareTimestamps(String commit1, String commit2,
+      BiPredicate<String, String> predicateToApply) {
+    return predicateToApply.test(commit1, commit2);
+  }
 
-    static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
-        return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
-    }
+  static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
+    return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
+  }
 
 
-    static HoodieInstant getInflightInstant(final HoodieInstant instant) {
-        return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
-    }
+  static HoodieInstant getInflightInstant(final HoodieInstant instant) {
+    return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
+  }
 
-    static String makeCommitFileName(String commitTime) {
-        return commitTime + HoodieTimeline.COMMIT_EXTENSION;
-    }
+  static String makeCommitFileName(String commitTime) {
+    return commitTime + HoodieTimeline.COMMIT_EXTENSION;
+  }
 
-    static String makeInflightCommitFileName(String commitTime) {
-        return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
-    }
+  static String makeInflightCommitFileName(String commitTime) {
+    return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
+  }
 
-    static String makeCleanerFileName(String instant) {
-        return instant + HoodieTimeline.CLEAN_EXTENSION;
-    }
+  static String makeCleanerFileName(String instant) {
+    return instant + HoodieTimeline.CLEAN_EXTENSION;
+  }
 
-    static String makeInflightCleanerFileName(String instant) {
-        return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
-    }
+  static String makeInflightCleanerFileName(String instant) {
+    return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
+  }
 
-    static String makeRollbackFileName(String instant) {
-        return instant + HoodieTimeline.ROLLBACK_EXTENSION;
-    }
+  static String makeRollbackFileName(String instant) {
+    return instant + HoodieTimeline.ROLLBACK_EXTENSION;
+  }
 
-    static String makeInflightRollbackFileName(String instant) {
-        return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
-    }
+  static String makeInflightRollbackFileName(String instant) {
+    return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
+  }
 
-    static String makeInflightSavePointFileName(String commitTime) {
-        return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
-    }
+  static String makeInflightSavePointFileName(String commitTime) {
+    return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
+  }
 
-    static String makeSavePointFileName(String commitTime) {
-        return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
-    }
+  static String makeSavePointFileName(String commitTime) {
+    return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
+  }
 
-    static String makeInflightCompactionFileName(String commitTime) {
-        return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
-    }
+  static String makeInflightCompactionFileName(String commitTime) {
+    return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
+  }
 
-    static String makeCompactionFileName(String commitTime) {
-        return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
-    }
+  static String makeCompactionFileName(String commitTime) {
+    return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
+  }
 
-    static String makeInflightDeltaFileName(String commitTime) {
-        return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
-    }
+  static String makeInflightDeltaFileName(String commitTime) {
+    return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
+  }
 
-    static String makeDeltaFileName(String commitTime) {
-        return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
-    }
+  static String makeDeltaFileName(String commitTime) {
+    return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
+  }
 
-    static String getCommitFromCommitFile(String commitFileName) {
-        return commitFileName.split("\\.")[0];
-    }
+  static String getCommitFromCommitFile(String commitFileName) {
+    return commitFileName.split("\\.")[0];
+  }
 
-    static String makeFileNameAsComplete(String fileName) {
-        return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
-    }
+  static String makeFileNameAsComplete(String fileName) {
+    return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
+  }
 
-    static String makeFileNameAsInflight(String fileName) {
-        return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
-    }
+  static String makeFileNameAsInflight(String fileName) {
+    return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
+  }
 
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/TableFileSystemView.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/TableFileSystemView.java
index ee4e373b5..d9ffae790 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/TableFileSystemView.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/TableFileSystemView.java
@@ -19,13 +19,7 @@ package com.uber.hoodie.common.table;
 import com.uber.hoodie.common.model.FileSlice;
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieFileGroup;
-import com.uber.hoodie.common.model.HoodieLogFile;
-import org.apache.hadoop.fs.FileStatus;
-
-import java.io.IOException;
 import java.util.List;
-import java.util.Map;
-import java.util.function.Predicate;
 import java.util.stream.Stream;
 
 /**
@@ -35,70 +29,70 @@ import java.util.stream.Stream;
  */
 public interface TableFileSystemView {
 
-    /**
-     * ReadOptimizedView - methods to provide a view of columnar data files only.
-     */
-    interface ReadOptimizedView {
-        /**
-         * Stream all the latest data files in the given partition
-         */
-        Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
-
-        /**
-         * Stream all the latest data files, in the file system view
-         */
-        Stream<HoodieDataFile> getLatestDataFiles();
-
-        /**
-         * Stream all the latest version data files in the given partition with precondition that
-         * commitTime(file) before maxCommitTime
-         */
-        Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
-                                                            String maxCommitTime);
-
-        /**
-         * Stream all the latest data files pass
-         */
-        Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
-
-        /**
-         * Stream all the data file versions grouped by FileId for a given partition
-         */
-        Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
-    }
+  /**
+   * ReadOptimizedView - methods to provide a view of columnar data files only.
+   */
+  interface ReadOptimizedView {
 
     /**
-     * RealtimeView - methods to access a combination of columnar data files + log files with real time data.
+     * Stream all the latest data files in the given partition
      */
-    interface RealtimeView {
-        /**
-         * Stream all the latest file slices in the given partition
-         */
-        Stream<FileSlice> getLatestFileSlices(String partitionPath);
-
-        /**
-         * Stream all the latest file slices in the given partition with precondition that
-         * commitTime(file) before maxCommitTime
-         */
-        Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
-                                                        String maxCommitTime);
-
-        /**
-         * Stream all the latest file slices, in the given range
-         */
-        Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
-
-        /**
-         * Stream all the file slices for a given partition, latest or not.
-         */
-        Stream<FileSlice> getAllFileSlices(String partitionPath);
-    }
+    Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
 
     /**
-     * Stream all the file groups for a given partition
-     *
-     * @param partitionPath
-     * @return
+     * Stream all the latest data files, in the file system view
      */
-    Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
+    Stream<HoodieDataFile> getLatestDataFiles();
+
+    /**
+     * Stream all the latest version data files in the given partition with precondition that
+     * commitTime(file) before maxCommitTime
+     */
+    Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
+        String maxCommitTime);
+
+    /**
+     * Stream all the latest data files pass
+     */
+    Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
+
+    /**
+     * Stream all the data file versions grouped by FileId for a given partition
+     */
+    Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
+  }
+
+  /**
+   * RealtimeView - methods to access a combination of columnar data files + log files with real
+   * time data.
+   */
+  interface RealtimeView {
+
+    /**
+     * Stream all the latest file slices in the given partition
+     */
+    Stream<FileSlice> getLatestFileSlices(String partitionPath);
+
+    /**
+     * Stream all the latest file slices in the given partition with precondition that
+     * commitTime(file) before maxCommitTime
+     */
+    Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
+        String maxCommitTime);
+
+    /**
+     * Stream all the latest file slices, in the given range
+     */
+    Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
+
+    /**
+     * Stream all the file slices for a given partition, latest or not.
+     */
+    Stream<FileSlice> getAllFileSlices(String partitionPath);
+  }
+
+  /**
+   * Stream all the file groups for a given partition
+   */
+  Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java
index 2060db1a1..667199233 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java
@@ -16,6 +16,9 @@
 
 package com.uber.hoodie.common.table.log;
 
+import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
+import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
+
 import com.google.common.collect.Maps;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieLogFile;
@@ -29,14 +32,6 @@ import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
 import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
 import com.uber.hoodie.common.util.ReflectionUtils;
 import com.uber.hoodie.exception.HoodieIOException;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.Arrays;
@@ -48,17 +43,22 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicLong;
-
-import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
-import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
- * Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged
- * list of records which will be used as a lookup table when merging the base columnar file
- * with the redo log file.
- *
+ * Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of
+ * records which will be used as a lookup table when merging the base columnar file with the redo
+ * log file.
  */
-public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
+public class HoodieCompactedLogRecordScanner implements
+    Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
+
   private final static Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
 
   // Final list of compacted/merged records to iterate
@@ -80,10 +80,10 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
   Deque<HoodieLogBlock> lastBlocks = new ArrayDeque<>();
 
   public HoodieCompactedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
-                                         Schema readerSchema, String latestInstantTime) {
+      Schema readerSchema, String latestInstantTime) {
     this.readerSchema = readerSchema;
     this.latestInstantTime = latestInstantTime;
-    this.hoodieTableMetaClient =  new HoodieTableMetaClient(fs, basePath);
+    this.hoodieTableMetaClient = new HoodieTableMetaClient(fs, basePath);
     // load class from the payload fully qualified class name
     this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
 
@@ -91,18 +91,18 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
     Map<String, HoodieRecord<? extends HoodieRecordPayload>> records = Maps.newHashMap();
     // iterate over the paths
     Iterator<String> logFilePathsItr = logFilePaths.iterator();
-    while(logFilePathsItr.hasNext()) {
+    while (logFilePathsItr.hasNext()) {
       HoodieLogFile logFile = new HoodieLogFile(new Path(logFilePathsItr.next()));
       log.info("Scanning log file " + logFile.getPath());
       totalLogFiles.incrementAndGet();
       try {
         // Use the HoodieLogFormatReader to iterate through the blocks in the log file
         HoodieLogFormatReader reader = new HoodieLogFormatReader(fs, logFile, readerSchema, true);
-        while(reader.hasNext()) {
+        while (reader.hasNext()) {
           HoodieLogBlock r = reader.next();
           String blockInstantTime = r.getLogMetadata().get(INSTANT_TIME);
-          if(!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
-                  HoodieTimeline.LESSER_OR_EQUAL)) {
+          if (!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
+              HoodieTimeline.LESSER_OR_EQUAL)) {
             //hit a block with instant time greater than should be processed, stop processing further
             break;
           }
@@ -117,7 +117,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
             case DELETE_BLOCK:
               log.info("Reading a delete block from file " + logFile.getPath());
               String lastBlockInstantTime = lastBlocks.peek().getLogMetadata().get(INSTANT_TIME);
-              if(!lastBlockInstantTime.equals(blockInstantTime)) {
+              if (!lastBlockInstantTime.equals(blockInstantTime)) {
                 // Block with the keys listed as to be deleted, data and delete blocks written in different batches
                 // so it is safe to merge
                 // This is a delete block, so lets merge any records from previous data block
@@ -130,7 +130,8 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
               log.info("Reading a command block from file " + logFile.getPath());
               // This is a command block - take appropriate action based on the command
               HoodieCommandBlock commandBlock = (HoodieCommandBlock) r;
-              String targetInstantForCommandBlock = r.getLogMetadata().get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
+              String targetInstantForCommandBlock = r.getLogMetadata()
+                  .get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
               switch (commandBlock.getType()) { // there can be different types of command blocks
                 case ROLLBACK_PREVIOUS_BLOCK:
                   // Rollback the last read log block
@@ -139,15 +140,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
                   // the rollback operation itself
                   HoodieLogBlock lastBlock = lastBlocks.peek();
                   if (lastBlock != null && lastBlock.getBlockType() != CORRUPT_BLOCK &&
-                          targetInstantForCommandBlock.contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
+                      targetInstantForCommandBlock
+                          .contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
                     log.info("Rolling back the last log block read in " + logFile.getPath());
                     lastBlocks.pop();
-                  } else if(lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
+                  } else if (lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
                     // handle corrupt blocks separately since they may not have metadata
-                    log.info("Rolling back the last corrupted log block read in " + logFile.getPath());
+                    log.info(
+                        "Rolling back the last corrupted log block read in " + logFile.getPath());
                     lastBlocks.pop();
-                  }
-                  else {
+                  } else {
                     log.warn("Invalid or extra rollback command block in " + logFile.getPath());
                   }
                   break;
@@ -165,7 +167,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
         throw new HoodieIOException("IOException when reading log file " + logFile);
       }
       // merge the last read block when all the blocks are done reading
-      if(!lastBlocks.isEmpty()) {
+      if (!lastBlocks.isEmpty()) {
         log.info("Merging the final data blocks in " + logFile.getPath());
         merge(records, lastBlocks);
       }
@@ -175,32 +177,33 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
   }
 
   /**
-   * Iterate over the GenericRecord in the block, read the hoodie key and partition path
-   * and merge with the application specific payload if the same key was found before
-   * Sufficient to just merge the log records since the base data is merged on previous compaction
-   *
-   * @param dataBlock
+   * Iterate over the GenericRecord in the block, read the hoodie key and partition path and merge
+   * with the application specific payload if the same key was found before Sufficient to just merge
+   * the log records since the base data is merged on previous compaction
    */
-  private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(HoodieAvroDataBlock dataBlock) {
-    Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps.newHashMap();
+  private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(
+      HoodieAvroDataBlock dataBlock) {
+    Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps
+        .newHashMap();
     List<IndexedRecord> recs = dataBlock.getRecords();
     totalLogRecords.addAndGet(recs.size());
     recs.forEach(rec -> {
       String key = ((GenericRecord) rec).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
-              .toString();
+          .toString();
       String partitionPath =
-              ((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
-                      .toString();
+          ((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
+              .toString();
       HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
-              new HoodieKey(key, partitionPath),
-              ReflectionUtils.loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
+          new HoodieKey(key, partitionPath),
+          ReflectionUtils
+              .loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
       if (recordsFromLastBlock.containsKey(key)) {
         // Merge and store the merged record
         HoodieRecordPayload combinedValue = recordsFromLastBlock.get(key).getData()
-                .preCombine(hoodieRecord.getData());
+            .preCombine(hoodieRecord.getData());
         recordsFromLastBlock
-                .put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
-                        combinedValue));
+            .put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
+                combinedValue));
       } else {
         // Put the record as is
         recordsFromLastBlock.put(key, hoodieRecord);
@@ -211,12 +214,9 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
 
   /**
    * Merge the last seen log blocks with the accumulated records
-   *
-   * @param records
-   * @param lastBlocks
    */
   private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
-                     Deque<HoodieLogBlock> lastBlocks) {
+      Deque<HoodieLogBlock> lastBlocks) {
     while (!lastBlocks.isEmpty()) {
       HoodieLogBlock lastBlock = lastBlocks.pop();
       switch (lastBlock.getBlockType()) {
@@ -234,19 +234,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
 
   /**
    * Merge the records read from a single data block with the accumulated records
-   *
-   * @param records
-   * @param recordsFromLastBlock
    */
   private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
-                     Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
+      Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
     recordsFromLastBlock.forEach((key, hoodieRecord) -> {
       if (records.containsKey(key)) {
         // Merge and store the merged record
         HoodieRecordPayload combinedValue = records.get(key).getData()
-                .preCombine(hoodieRecord.getData());
+            .preCombine(hoodieRecord.getData());
         records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
-                combinedValue));
+            combinedValue));
       } else {
         // Put the record as is
         records.put(key, hoodieRecord);
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
index bae8aa126..2979bb619 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormat.java
@@ -29,31 +29,36 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 /**
- * File Format for Hoodie Log Files.
- * The File Format consists of blocks each seperated with a MAGIC sync marker.
- * A Block can either be a Data block, Command block or Delete Block.
- * Data Block - Contains log records serialized as Avro Binary Format
- * Command Block - Specific commands like RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block
- * Delete Block - List of keys to delete - tombstone for keys
+ * File Format for Hoodie Log Files. The File Format consists of blocks each seperated with a MAGIC
+ * sync marker. A Block can either be a Data block, Command block or Delete Block. Data Block -
+ * Contains log records serialized as Avro Binary Format Command Block - Specific commands like
+ * RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of keys
+ * to delete - tombstone for keys
  */
 public interface HoodieLogFormat {
+
   /**
-   * Magic 4 bytes we put at the start of every block in the log file. Sync marker.
-   * We could make this file specific (generate a random 4 byte magic and stick it in the file header), but this I think is suffice for now - PR
+   * Magic 4 bytes we put at the start of every block in the log file. Sync marker. We could make
+   * this file specific (generate a random 4 byte magic and stick it in the file header), but this I
+   * think is suffice for now - PR
    */
-  byte [] MAGIC = new byte [] {'H', 'U', 'D', 'I'};
+  byte[] MAGIC = new byte[]{'H', 'U', 'D', 'I'};
 
   /**
    * Writer interface to allow appending block to this file format
    */
   interface Writer extends Closeable {
-    /** @return the path to this {@link HoodieLogFormat} */
+
+    /**
+     * @return the path to this {@link HoodieLogFormat}
+     */
     HoodieLogFile getLogFile();
 
     /**
      * Append Block returns a new Writer if the log is rolled
      */
     Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException;
+
     long getCurrentSize() throws IOException;
   }
 
@@ -61,7 +66,10 @@ public interface HoodieLogFormat {
    * Reader interface which is an Iterator of HoodieLogBlock
    */
   interface Reader extends Closeable, Iterator<HoodieLogBlock> {
-    /** @return the path to this {@link HoodieLogFormat} */
+
+    /**
+     * @return the path to this {@link HoodieLogFormat}
+     */
     HoodieLogFile getLogFile();
   }
 
@@ -70,6 +78,7 @@ public interface HoodieLogFormat {
    * Builder class to construct the default log format writer
    */
   class WriterBuilder {
+
     private final static Logger log = LogManager.getLogger(WriterBuilder.class);
     // Default max log file size 512 MB
     public static final long DEFAULT_SIZE_THRESHOLD = 512 * 1024 * 1024L;
@@ -187,7 +196,8 @@ public interface HoodieLogFormat {
     return new WriterBuilder();
   }
 
-  static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata)
+  static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
+      boolean readMetadata)
       throws IOException {
     return new HoodieLogFormatReader(fs, logFile, readerSchema, readMetadata);
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatReader.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatReader.java
index d6a513a66..4168e27cf 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatReader.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatReader.java
@@ -17,7 +17,6 @@
 package com.uber.hoodie.common.table.log;
 
 import com.google.common.base.Preconditions;
-
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
 import com.uber.hoodie.common.table.log.block.HoodieCommandBlock;
@@ -38,11 +37,12 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 /**
- * Scans a log file and provides block level iterator on the log file
- * Loads the entire block contents in memory
- * Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found)
+ * Scans a log file and provides block level iterator on the log file Loads the entire block
+ * contents in memory Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one
+ * is found)
  */
 public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
+
   private static final int DEFAULT_BUFFER_SIZE = 4096;
   private final static Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
 
@@ -53,14 +53,16 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
   private HoodieLogBlock nextBlock = null;
   private boolean readMetadata = true;
 
-  HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readMetadata) throws IOException {
+  HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
+      boolean readMetadata) throws IOException {
     this.inputStream = fs.open(logFile.getPath(), bufferSize);
     this.logFile = logFile;
     this.readerSchema = readerSchema;
     this.readMetadata = readMetadata;
   }
 
-  HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata) throws IOException {
+  HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
+      boolean readMetadata) throws IOException {
     this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, readMetadata);
   }
 
@@ -83,7 +85,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
     // Skip blocksize in the stream and we should either find a sync marker (start of the next block) or EOF
     // If we did not find either of it, then this block is a corrupted block.
     boolean isCorrupted = isBlockCorrupt(blocksize);
-    if(isCorrupted) {
+    if (isCorrupted) {
       return createCorruptBlock();
     }
 
@@ -140,7 +142,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
   }
 
   private long scanForNextAvailableBlockOffset() throws IOException {
-    while(true) {
+    while (true) {
       long currentPos = inputStream.getPos();
       try {
         boolean isEOF = readMagic();
@@ -191,7 +193,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
 
   @Override
   public HoodieLogBlock next() {
-    if(nextBlock == null) {
+    if (nextBlock == null) {
       // may be hasNext is not called
       hasNext();
     }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatWriter.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatWriter.java
index 277829e3e..26a0845e2 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatWriter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieLogFormatWriter.java
@@ -16,8 +16,6 @@
 
 package com.uber.hoodie.common.table.log;
 
-import com.google.common.base.Preconditions;
-
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
 import com.uber.hoodie.common.table.log.HoodieLogFormat.WriterBuilder;
@@ -35,8 +33,8 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 /**
- * HoodieLogFormatWriter can be used to append blocks to a log file
- * Use HoodieLogFormat.WriterBuilder to construct
+ * HoodieLogFormatWriter can be used to append blocks to a log file Use
+ * HoodieLogFormat.WriterBuilder to construct
  */
 public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
 
@@ -58,7 +56,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
    * @param sizeThreshold
    */
   HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
-                        Short replication, Long sizeThreshold)
+      Short replication, Long sizeThreshold)
       throws IOException, InterruptedException {
     this.fs = fs;
     this.logFile = logFile;
@@ -157,8 +155,9 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
   }
 
   public long getCurrentSize() throws IOException {
-    if(output == null) {
-      throw new IllegalStateException("Cannot get current size as the underlying stream has been closed already");
+    if (output == null) {
+      throw new IllegalStateException(
+          "Cannot get current size as the underlying stream has been closed already");
     }
     return output.getPos();
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieAvroDataBlock.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieAvroDataBlock.java
index 9019689b7..d2f73ef1b 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieAvroDataBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieAvroDataBlock.java
@@ -18,6 +18,14 @@ package com.uber.hoodie.common.table.log.block;
 
 import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.exception.HoodieIOException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -27,30 +35,18 @@ import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.io.Encoder;
 import org.apache.avro.io.EncoderFactory;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
 /**
- * DataBlock contains a list of records serialized using Avro.
- * The Datablock contains
- * 1. Compressed Writer Schema length
- * 2. Compressed Writer Schema content
- * 3. Total number of records in the block
- * 4. Size of a record
- * 5. Actual avro serialized content of the record
+ * DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Compressed
+ * Writer Schema length 2. Compressed Writer Schema content 3. Total number of records in the block
+ * 4. Size of a record 5. Actual avro serialized content of the record
  */
 public class HoodieAvroDataBlock extends HoodieLogBlock {
 
   private List<IndexedRecord> records;
   private Schema schema;
 
-  public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema, Map<LogMetadataType, String> metadata) {
+  public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema,
+      Map<LogMetadataType, String> metadata) {
     super(metadata);
     this.records = records;
     this.schema = schema;
@@ -76,7 +72,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
     DataOutputStream output = new DataOutputStream(baos);
 
     // 1. Write out metadata
-    if(super.getLogMetadata() != null) {
+    if (super.getLogMetadata() != null) {
       output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
     }
 
@@ -117,12 +113,13 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
     return HoodieLogBlockType.AVRO_DATA_BLOCK;
   }
 
-  public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata) throws IOException {
+  public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata)
+      throws IOException {
 
     DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
     Map<LogMetadataType, String> metadata = null;
     // 1. Read the metadata written out, if applicable
-    if(readMetadata) {
+    if (readMetadata) {
       metadata = HoodieLogBlock.getLogMetadata(dis);
     }
     // 1. Read the schema written out
@@ -131,7 +128,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
     dis.readFully(compressedSchema, 0, schemaLength);
     Schema writerSchema = new Schema.Parser().parse(HoodieAvroUtils.decompress(compressedSchema));
 
-    if(readerSchema == null) {
+    if (readerSchema == null) {
       readerSchema = writerSchema;
     }
 
@@ -141,7 +138,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
     List<IndexedRecord> records = new ArrayList<>(totalRecords);
 
     // 3. Read the content
-    for(int i=0;i<totalRecords;i++) {
+    for (int i = 0; i < totalRecords; i++) {
       // TODO - avoid bytes copy
       int recordLength = dis.readInt();
       byte[] recordData = new byte[recordLength];
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCommandBlock.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCommandBlock.java
index 247e2d0ee..d2358eef0 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCommandBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCommandBlock.java
@@ -32,7 +32,8 @@ public class HoodieCommandBlock extends HoodieLogBlock {
 
   public enum HoodieCommandBlockTypeEnum {ROLLBACK_PREVIOUS_BLOCK}
 
-  public HoodieCommandBlock(HoodieCommandBlockTypeEnum type, Map<LogMetadataType, String> metadata) {
+  public HoodieCommandBlock(HoodieCommandBlockTypeEnum type,
+      Map<LogMetadataType, String> metadata) {
     super(metadata);
     this.type = type;
   }
@@ -46,7 +47,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
 
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream output = new DataOutputStream(baos);
-    if(super.getLogMetadata() != null) {
+    if (super.getLogMetadata() != null) {
       output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
     }
     output.writeInt(type.ordinal());
@@ -66,7 +67,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
   public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
     DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
     Map<LogMetadataType, String> metadata = null;
-    if(readMetadata) {
+    if (readMetadata) {
       metadata = HoodieLogBlock.getLogMetadata(dis);
     }
     int ordinal = dis.readInt();
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCorruptBlock.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCorruptBlock.java
index 1e79a6241..3858ae54e 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCorruptBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieCorruptBlock.java
@@ -44,7 +44,7 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
   public byte[] getBytes() throws IOException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream output = new DataOutputStream(baos);
-    if(super.getLogMetadata() != null) {
+    if (super.getLogMetadata() != null) {
       output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
     }
     output.write(corruptedBytes);
@@ -60,20 +60,21 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
     return corruptedBytes;
   }
 
-  public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata) throws IOException {
+  public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata)
+      throws IOException {
     DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
     Map<LogMetadataType, String> metadata = null;
     int bytesRemaining = blockSize;
-    if(readMetadata) {
+    if (readMetadata) {
       try { //attempt to read metadata
         metadata = HoodieLogBlock.getLogMetadata(dis);
         bytesRemaining = blockSize - HoodieLogBlock.getLogMetadataBytes(metadata).length;
-      } catch(IOException e) {
+      } catch (IOException e) {
         // unable to read metadata, possibly corrupted
         metadata = null;
       }
     }
-    byte [] corruptedBytes = new byte[bytesRemaining];
+    byte[] corruptedBytes = new byte[bytesRemaining];
     dis.readFully(corruptedBytes);
     return new HoodieCorruptBlock(corruptedBytes, metadata);
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieDeleteBlock.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieDeleteBlock.java
index c1256a4f6..485bfdcc0 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieDeleteBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieDeleteBlock.java
@@ -16,8 +16,6 @@
 
 package com.uber.hoodie.common.table.log.block;
 
-import org.apache.commons.lang3.StringUtils;
-
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
@@ -25,6 +23,7 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
 
 /**
  * Delete block contains a list of keys to be deleted from scanning the blocks so far
@@ -46,10 +45,10 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
   public byte[] getBytes() throws IOException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream output = new DataOutputStream(baos);
-    if(super.getLogMetadata() != null) {
+    if (super.getLogMetadata() != null) {
       output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
     }
-    byte [] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
+    byte[] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
     output.writeInt(bytesToWrite.length);
     output.write(bytesToWrite);
     return baos.toByteArray();
@@ -67,11 +66,11 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
   public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
     DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
     Map<LogMetadataType, String> metadata = null;
-    if(readMetadata) {
+    if (readMetadata) {
       metadata = HoodieLogBlock.getLogMetadata(dis);
     }
     int dataLength = dis.readInt();
-    byte [] data = new byte[dataLength];
+    byte[] data = new byte[dataLength];
     dis.readFully(data);
     return new HoodieDeleteBlock(new String(data).split(","), metadata);
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java
index cf4f90859..817016100 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.common.table.log.block;
 
 import com.google.common.collect.Maps;
 import com.uber.hoodie.exception.HoodieException;
-
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
@@ -34,6 +33,7 @@ public abstract class HoodieLogBlock {
   public byte[] getBytes() throws IOException {
     throw new HoodieException("No implementation was provided");
   }
+
   public HoodieLogBlockType getBlockType() {
     throw new HoodieException("No implementation was provided");
   }
@@ -42,8 +42,8 @@ public abstract class HoodieLogBlock {
   private Map<LogMetadataType, String> logMetadata;
 
   /**
-   * Type of the log block
-   * WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
+   * Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at
+   * the end.
    */
   public enum HoodieLogBlockType {
     COMMAND_BLOCK,
@@ -53,8 +53,8 @@ public abstract class HoodieLogBlock {
   }
 
   /**
-   * Metadata abstraction for a HoodieLogBlock
-   * WARNING : This enum is serialized as the ordinal. Only add new enums at the end.
+   * Metadata abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal.
+   * Only add new enums at the end.
    */
   public enum LogMetadataType {
     INSTANT_TIME,
@@ -70,21 +70,17 @@ public abstract class HoodieLogBlock {
   }
 
   /**
-   * Convert log metadata to bytes
-   * 1. Write size of metadata
-   * 2. Write enum ordinal
-   * 3. Write actual bytes
-   * @param metadata
-   * @return
-   * @throws IOException
+   * Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual
+   * bytes
    */
-  public static byte [] getLogMetadataBytes(Map<LogMetadataType, String> metadata) throws IOException {
+  public static byte[] getLogMetadataBytes(Map<LogMetadataType, String> metadata)
+      throws IOException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     DataOutputStream output = new DataOutputStream(baos);
     output.writeInt(metadata.size());
-    for(Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
+    for (Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
       output.writeInt(entry.getKey().ordinal());
-      byte [] bytes = entry.getValue().getBytes();
+      byte[] bytes = entry.getValue().getBytes();
       output.writeInt(bytes.length);
       output.write(bytes);
     }
@@ -92,13 +88,10 @@ public abstract class HoodieLogBlock {
   }
 
   /**
-   * Convert bytes to LogMetadata, follow the same order as
-   * {@link HoodieLogBlock#getLogMetadataBytes}
-   * @param dis
-   * @return
-   * @throws IOException
+   * Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}
    */
-  public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis) throws IOException {
+  public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis)
+      throws IOException {
 
     Map<LogMetadataType, String> metadata = Maps.newHashMap();
     // 1. Read the metadata written out
@@ -113,7 +106,7 @@ public abstract class HoodieLogBlock {
         metadataCount--;
       }
       return metadata;
-    } catch(EOFException eof) {
+    } catch (EOFException eof) {
       throw new IOException("Could not read metadata fields ", eof);
     }
   }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieActiveTimeline.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieActiveTimeline.java
index 3d739f156..6848d4a21 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieActiveTimeline.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieActiveTimeline.java
@@ -22,7 +22,18 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.Date;
+import java.util.Optional;
+import java.util.Set;
+import java.util.function.Function;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -31,297 +42,273 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.text.SimpleDateFormat;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.Optional;
-import java.util.Set;
-import java.util.function.Function;
-import java.util.function.Predicate;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
 /**
- * Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
- * is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline
- * that allows for creation of instants on the timeline.
- * <p></p>
- * The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload()
- * so that they can chain multiple mutations to the timeline and then call reload() once.
- * <p></p>
- * This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
+ * Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours
+ * (configurable) is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special
+ * timeline that allows for creation of instants on the timeline. <p></p> The timeline is not
+ * automatically reloaded on any mutation operation, clients have to manually call reload() so that
+ * they can chain multiple mutations to the timeline and then call reload() once. <p></p> This class
+ * can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
  */
 public class HoodieActiveTimeline extends HoodieDefaultTimeline {
-    public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
+
+  public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
 
 
-    private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
-    private String metaPath;
-    private transient FileSystem fs;
+  private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
+  private String metaPath;
+  private transient FileSystem fs;
 
 
-    /**
-     * Returns next commit time in the {@link #COMMIT_FORMATTER} format.
-     * @return
-     */
-    public static String createNewCommitTime() {
-        return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
+  /**
+   * Returns next commit time in the {@link #COMMIT_FORMATTER} format.
+   */
+  public static String createNewCommitTime() {
+    return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
+  }
+
+  protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
+    // Filter all the filter in the metapath and include only the extensions passed and
+    // convert them into HoodieInstant
+    try {
+      this.instants =
+          Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
+            // Include only the meta files with extensions that needs to be included
+            String extension = FSUtils.getFileExtension(path.getName());
+            return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
+          })).sorted(Comparator.comparing(
+              // Sort the meta-data by the instant time (first part of the file name)
+              fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
+              // create HoodieInstantMarkers from FileStatus, which extracts properties
+              .map(HoodieInstant::new).collect(Collectors.toList());
+      log.info("Loaded instants " + instants);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to scan metadata", e);
     }
+    this.fs = fs;
+    this.metaPath = metaPath;
+    // multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
+    this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
+  }
 
-    protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
-        // Filter all the filter in the metapath and include only the extensions passed and
-        // convert them into HoodieInstant
-        try {
-            this.instants =
-                Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
-                    // Include only the meta files with extensions that needs to be included
-                    String extension = FSUtils.getFileExtension(path.getName());
-                    return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
-                })).sorted(Comparator.comparing(
-                    // Sort the meta-data by the instant time (first part of the file name)
-                    fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
-                    // create HoodieInstantMarkers from FileStatus, which extracts properties
-                    .map(HoodieInstant::new).collect(Collectors.toList());
-            log.info("Loaded instants " + instants);
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to scan metadata", e);
-        }
-        this.fs = fs;
-        this.metaPath = metaPath;
-        // multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
-        this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
+  public HoodieActiveTimeline(FileSystem fs, String metaPath) {
+    this(fs, metaPath,
+        new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
+            INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
+            INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
+            CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
+  }
+
+  /**
+   * For serialization and de-serialization only.
+   *
+   * @deprecated
+   */
+  public HoodieActiveTimeline() {
+  }
+
+  /**
+   * This method is only used when this object is deserialized in a spark executor.
+   *
+   * @deprecated
+   */
+  private void readObject(java.io.ObjectInputStream in)
+      throws IOException, ClassNotFoundException {
+    in.defaultReadObject();
+    this.fs = FSUtils.getFs();
+  }
+
+  /**
+   * Get all instants (commits, delta commits, compactions) that produce new data, in the active
+   * timeline *
+   */
+  public HoodieTimeline getCommitsAndCompactionsTimeline() {
+    return getTimelineOfActions(
+        Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
+  }
+
+  /**
+   * Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result
+   * in actions, in the active timeline *
+   */
+  public HoodieTimeline getAllCommitsTimeline() {
+    return getTimelineOfActions(
+        Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
+            SAVEPOINT_ACTION, ROLLBACK_ACTION));
+  }
+
+  /**
+   * Get only pure commits (inflight and completed) in the active timeline
+   */
+  public HoodieTimeline getCommitTimeline() {
+    return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
+  }
+
+  /**
+   * Get only the delta commits (inflight and completed) in the active timeline
+   */
+  public HoodieTimeline getDeltaCommitTimeline() {
+    return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+  /**
+   * Get only the commits (inflight and completed) in the compaction timeline
+   */
+  public HoodieTimeline getCompactionTimeline() {
+    return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+  /**
+   * Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
+   * actions
+   *
+   * @param actions actions allowed in the timeline
+   */
+  public HoodieTimeline getTimelineOfActions(Set<String> actions) {
+    return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+
+  /**
+   * Get only the cleaner action (inflight and completed) in the active timeline
+   */
+  public HoodieTimeline getCleanerTimeline() {
+    return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+  /**
+   * Get only the rollback action (inflight and completed) in the active timeline
+   */
+  public HoodieTimeline getRollbackTimeline() {
+    return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+  /**
+   * Get only the save point action (inflight and completed) in the active timeline
+   */
+  public HoodieTimeline getSavePointTimeline() {
+    return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
+        (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
+  }
+
+
+  protected Stream<HoodieInstant> filterInstantsByAction(String action) {
+    return instants.stream().filter(s -> s.getAction().equals(action));
+  }
+
+  public void createInflight(HoodieInstant instant) {
+    log.info("Creating a new in-flight instant " + instant);
+    // Create the in-flight file
+    createFileInMetaPath(instant.getFileName(), Optional.empty());
+  }
+
+  public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
+    log.info("Marking instant complete " + instant);
+    Preconditions.checkArgument(instant.isInflight(),
+        "Could not mark an already completed instant as complete again " + instant);
+    moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
+    log.info("Completed " + instant);
+  }
+
+  public void revertToInflight(HoodieInstant instant) {
+    log.info("Reverting instant to inflight " + instant);
+    moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
+    log.info("Reverted " + instant + " to inflight");
+  }
+
+  public void deleteInflight(HoodieInstant instant) {
+    log.info("Deleting in-flight " + instant);
+    Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
+    try {
+      boolean result = fs.delete(inFlightCommitFilePath, false);
+      if (result) {
+        log.info("Removed in-flight " + instant);
+      } else {
+        throw new HoodieIOException("Could not delete in-flight instant " + instant);
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException(
+          "Could not remove inflight commit " + inFlightCommitFilePath, e);
     }
+  }
 
-    public HoodieActiveTimeline(FileSystem fs, String metaPath) {
-        this(fs, metaPath,
-            new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
-                INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
-                INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
-                CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
+  @Override
+  public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
+    Path detailPath = new Path(metaPath, instant.getFileName());
+    return readDataFromPath(detailPath);
+  }
+
+  protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
+      Optional<byte[]> data) {
+    Path commitFilePath = new Path(metaPath, completed.getFileName());
+    try {
+      // open a new file and write the commit metadata in
+      Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
+      createFileInMetaPath(inflight.getFileName(), data);
+      boolean success = fs.rename(inflightCommitFile, commitFilePath);
+      if (!success) {
+        throw new HoodieIOException(
+            "Could not rename " + inflightCommitFile + " to " + commitFilePath);
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not complete " + inflight, e);
     }
+  }
 
-    /**
-     * For serialization and de-serialization only.
-     * @deprecated
-     */
-    public HoodieActiveTimeline() {
-    }
-
-    /**
-     * This method is only used when this object is deserialized in a spark executor.
-     *
-     * @deprecated
-     */
-    private void readObject(java.io.ObjectInputStream in)
-        throws IOException, ClassNotFoundException {
-        in.defaultReadObject();
-        this.fs = FSUtils.getFs();
-    }
-
-    /**
-     * Get all instants (commits, delta commits, compactions) that produce new data, in the active timeline
-     **
-     * @return
-     */
-    public HoodieTimeline getCommitsAndCompactionsTimeline() {
-        return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
-    }
-
-    /**
-     * Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result in actions, in the active timeline
-     **
-     * @return
-     */
-    public HoodieTimeline getAllCommitsTimeline() {
-        return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION));
-    }
-
-    /**
-     * Get only pure commits (inflight and completed) in the active timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getCommitTimeline() {
-        return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
-    }
-
-    /**
-     * Get only the delta commits (inflight and completed) in the active timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getDeltaCommitTimeline() {
-        return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-    /**
-     * Get only the commits (inflight and completed) in the compaction timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getCompactionTimeline() {
-        return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-    /**
-     * Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions
-     *
-     * @param actions actions allowed in the timeline
-     * @return
-     */
-    public HoodieTimeline getTimelineOfActions(Set<String> actions) {
-        return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-
-    /**
-     * Get only the cleaner action (inflight and completed) in the active timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getCleanerTimeline() {
-        return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-    /**
-     * Get only the rollback action (inflight and completed) in the active timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getRollbackTimeline() {
-        return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-    /**
-     * Get only the save point action (inflight and completed) in the active timeline
-     *
-     * @return
-     */
-    public HoodieTimeline getSavePointTimeline() {
-        return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
-            (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
-    }
-
-
-    protected Stream<HoodieInstant> filterInstantsByAction(String action) {
-        return instants.stream().filter(s -> s.getAction().equals(action));
-    }
-
-    public void createInflight(HoodieInstant instant) {
-        log.info("Creating a new in-flight instant " + instant);
-        // Create the in-flight file
-        createFileInMetaPath(instant.getFileName(), Optional.empty());
-    }
-
-    public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
-        log.info("Marking instant complete " + instant);
-        Preconditions.checkArgument(instant.isInflight(),
-            "Could not mark an already completed instant as complete again " + instant);
-        moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
-        log.info("Completed " + instant);
-    }
-
-    public void revertToInflight(HoodieInstant instant) {
-        log.info("Reverting instant to inflight " + instant);
-        moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
-        log.info("Reverted " + instant + " to inflight");
-    }
-
-    public void deleteInflight(HoodieInstant instant) {
-        log.info("Deleting in-flight " + instant);
-        Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
-        try {
-            boolean result = fs.delete(inFlightCommitFilePath, false);
-            if (result) {
-                log.info("Removed in-flight " + instant);
-            } else {
-                throw new HoodieIOException("Could not delete in-flight instant " + instant);
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException(
-                "Could not remove inflight commit " + inFlightCommitFilePath, e);
-        }
-    }
-
-    @Override
-    public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
-        Path detailPath = new Path(metaPath, instant.getFileName());
-        return readDataFromPath(detailPath);
-    }
-
-    protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
-        Optional<byte[]> data) {
+  protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
+    Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
+    try {
+      if (!fs.exists(inFlightCommitFilePath)) {
         Path commitFilePath = new Path(metaPath, completed.getFileName());
-        try {
-            // open a new file and write the commit metadata in
-            Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
-            createFileInMetaPath(inflight.getFileName(), data);
-            boolean success = fs.rename(inflightCommitFile, commitFilePath);
-            if (!success) {
-                throw new HoodieIOException(
-                    "Could not rename " + inflightCommitFile + " to " + commitFilePath);
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not complete " + inflight, e);
+        boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
+        if (!success) {
+          throw new HoodieIOException(
+              "Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
         }
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not complete revert " + completed, e);
     }
+  }
 
-    protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
-        Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
-        try {
-            if (!fs.exists(inFlightCommitFilePath)) {
-                Path commitFilePath = new Path(metaPath, completed.getFileName());
-                boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
-                if (!success) {
-                    throw new HoodieIOException(
-                        "Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
-                }
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not complete revert " + completed, e);
+  public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
+    createFileInMetaPath(instant.getFileName(), content);
+  }
+
+  protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
+    Path fullPath = new Path(metaPath, filename);
+    try {
+      if (!content.isPresent()) {
+        if (fs.createNewFile(fullPath)) {
+          log.info("Created a new file in meta path: " + fullPath);
+          return;
         }
+      } else {
+        FSDataOutputStream fsout = fs.create(fullPath, true);
+        fsout.write(content.get());
+        fsout.close();
+        return;
+      }
+      throw new HoodieIOException("Failed to create file " + fullPath);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to create file " + fullPath, e);
     }
+  }
 
-    public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
-        createFileInMetaPath(instant.getFileName(), content);
+  protected Optional<byte[]> readDataFromPath(Path detailPath) {
+    try (FSDataInputStream is = fs.open(detailPath)) {
+      return Optional.of(IOUtils.toByteArray(is));
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not read commit details from " + detailPath, e);
     }
+  }
 
-    protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
-        Path fullPath = new Path(metaPath, filename);
-        try {
-            if (!content.isPresent()) {
-                if (fs.createNewFile(fullPath)) {
-                    log.info("Created a new file in meta path: " + fullPath);
-                    return;
-                }
-            } else {
-                FSDataOutputStream fsout = fs.create(fullPath, true);
-                fsout.write(content.get());
-                fsout.close();
-                return;
-            }
-            throw new HoodieIOException("Failed to create file " + fullPath);
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to create file " + fullPath, e);
-        }
-    }
-
-    protected Optional<byte[]> readDataFromPath(Path detailPath) {
-        try (FSDataInputStream is = fs.open(detailPath)) {
-            return Optional.of(IOUtils.toByteArray(is));
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not read commit details from " + detailPath, e);
-        }
-    }
-
-    public HoodieActiveTimeline reload() {
-        return new HoodieActiveTimeline(fs, metaPath);
-    }
+  public HoodieActiveTimeline reload() {
+    return new HoodieActiveTimeline(fs, metaPath);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieArchivedTimeline.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieArchivedTimeline.java
index 458cf6eb8..37e5e9414 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieArchivedTimeline.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieArchivedTimeline.java
@@ -19,13 +19,6 @@ package com.uber.hoodie.common.table.timeline;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.Arrays;
@@ -34,79 +27,85 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
- * Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
- * is in the ActiveTimeline and the rest are in ArchivedTimeline.
- * <p></p>
- * Instants are read from the archive file during initialization and never refreshed. To refresh, clients
- * need to call reload()
- * <p></p>
- * This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
+ * Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours
+ * (configurable) is in the ActiveTimeline and the rest are in ArchivedTimeline. <p></p> Instants
+ * are read from the archive file during initialization and never refreshed. To refresh, clients
+ * need to call reload() <p></p> This class can be serialized and de-serialized and on
+ * de-serialization the FileSystem is re-initialized.
  */
 public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
-    private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
-    private transient FileSystem fs;
-    private String metaPath;
-    private Map<String, byte[]> readCommits = new HashMap<>();
 
-    private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
+  private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
+  private transient FileSystem fs;
+  private String metaPath;
+  private Map<String, byte[]> readCommits = new HashMap<>();
 
-    public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
-        // Read back the commits to make sure
-        Path archiveLogPath = getArchiveLogPath(metaPath);
-        try (SequenceFile.Reader reader =
-                     new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
-            Text key = new Text();
-            Text val = new Text();
-            while (reader.next(key, val)) {
-                // TODO - limit the number of commits loaded in memory. this could get very large.
-                // This is okay because only tooling will load the archived commit timeline today
-                readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
-            }
-            this.instants = readCommits.keySet().stream().map(
-                s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
-                Collectors.toList());
-        } catch (IOException e) {
-            throw new HoodieIOException(
-                "Could not load archived commit timeline from path " + archiveLogPath, e);
-        }
-        // multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
-        this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
-        this.fs = fs;
-        this.metaPath = metaPath;
+  private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
+
+  public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
+    // Read back the commits to make sure
+    Path archiveLogPath = getArchiveLogPath(metaPath);
+    try (SequenceFile.Reader reader =
+        new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
+      Text key = new Text();
+      Text val = new Text();
+      while (reader.next(key, val)) {
+        // TODO - limit the number of commits loaded in memory. this could get very large.
+        // This is okay because only tooling will load the archived commit timeline today
+        readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
+      }
+      this.instants = readCommits.keySet().stream().map(
+          s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
+          Collectors.toList());
+    } catch (IOException e) {
+      throw new HoodieIOException(
+          "Could not load archived commit timeline from path " + archiveLogPath, e);
     }
+    // multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
+    this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
+    this.fs = fs;
+    this.metaPath = metaPath;
+  }
 
-    /**
-     * For serialization and de-serialization only.
-     * @deprecated
-     */
-    public HoodieArchivedTimeline() {
-    }
+  /**
+   * For serialization and de-serialization only.
+   *
+   * @deprecated
+   */
+  public HoodieArchivedTimeline() {
+  }
 
-    /**
-     * This method is only used when this object is deserialized in a spark executor.
-     *
-     * @deprecated
-     */
-    private void readObject(java.io.ObjectInputStream in)
-        throws IOException, ClassNotFoundException {
-        in.defaultReadObject();
-        this.fs = FSUtils.getFs();
-    }
+  /**
+   * This method is only used when this object is deserialized in a spark executor.
+   *
+   * @deprecated
+   */
+  private void readObject(java.io.ObjectInputStream in)
+      throws IOException, ClassNotFoundException {
+    in.defaultReadObject();
+    this.fs = FSUtils.getFs();
+  }
 
 
-    public static Path getArchiveLogPath(String metaPath) {
-        return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
-    }
+  public static Path getArchiveLogPath(String metaPath) {
+    return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
+  }
 
-    @Override
-    public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
-        return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
-    }
+  @Override
+  public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
+    return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
+  }
 
-    public HoodieArchivedTimeline reload() {
-        return new HoodieArchivedTimeline(fs, metaPath);
-    }
+  public HoodieArchivedTimeline reload() {
+    return new HoodieArchivedTimeline(fs, metaPath);
+  }
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieDefaultTimeline.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieDefaultTimeline.java
index e250640c6..3a0240239 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieDefaultTimeline.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieDefaultTimeline.java
@@ -17,135 +17,136 @@
 package com.uber.hoodie.common.table.timeline;
 
 import com.uber.hoodie.common.table.HoodieTimeline;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.util.List;
 import java.util.Optional;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
- * HoodieDefaultTimeline is a default implementation of the HoodieTimeline.
- * It provides methods to inspect a List[HoodieInstant]. Function to get the details of the instant
- * is passed in as a lamdba.
+ * HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to
+ * inspect a List[HoodieInstant]. Function to get the details of the instant is passed in as a
+ * lamdba.
  *
  * @see HoodieTimeline
  */
 public class HoodieDefaultTimeline implements HoodieTimeline {
-    private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
 
-    protected Function<HoodieInstant, Optional<byte[]>> details;
-    protected List<HoodieInstant> instants;
+  private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
 
-    public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
-        Function<HoodieInstant, Optional<byte[]>> details) {
-        this.instants = instants.collect(Collectors.toList());
-        this.details = details;
-    }
+  protected Function<HoodieInstant, Optional<byte[]>> details;
+  protected List<HoodieInstant> instants;
 
-    /**
-     * For serailizing and de-serializing
-     *
-     * @deprecated
-     */
-    public HoodieDefaultTimeline() {
-    }
+  public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
+      Function<HoodieInstant, Optional<byte[]>> details) {
+    this.instants = instants.collect(Collectors.toList());
+    this.details = details;
+  }
 
-    public HoodieTimeline filterInflights() {
-        return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
-            details);
-    }
+  /**
+   * For serailizing and de-serializing
+   *
+   * @deprecated
+   */
+  public HoodieDefaultTimeline() {
+  }
 
-    public HoodieTimeline filterCompletedInstants() {
-        return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
-    }
+  public HoodieTimeline filterInflights() {
+    return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
+        details);
+  }
 
-    @Override
-    public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
-        return new HoodieDefaultTimeline(instants.stream().filter(
-            s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
-                    HoodieTimeline.compareTimestamps(
+  public HoodieTimeline filterCompletedInstants() {
+    return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
+  }
+
+  @Override
+  public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
+    return new HoodieDefaultTimeline(instants.stream().filter(
+        s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
+            HoodieTimeline.compareTimestamps(
                 s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
-    }
+  }
 
-    @Override
-    public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
-        return new HoodieDefaultTimeline(
-            instants.stream().filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
-                .limit(numCommits), details);
-    }
+  @Override
+  public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
+    return new HoodieDefaultTimeline(
+        instants.stream()
+            .filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
+            .limit(numCommits), details);
+  }
 
-    @Override
-    public boolean empty() {
-        return !instants.stream().findFirst().isPresent();
-    }
+  @Override
+  public boolean empty() {
+    return !instants.stream().findFirst().isPresent();
+  }
 
-    @Override
-    public int countInstants() {
-        return new Long(instants.stream().count()).intValue();
-    }
+  @Override
+  public int countInstants() {
+    return new Long(instants.stream().count()).intValue();
+  }
 
-    @Override
-    public Optional<HoodieInstant> firstInstant() {
-        return instants.stream().findFirst();
-    }
+  @Override
+  public Optional<HoodieInstant> firstInstant() {
+    return instants.stream().findFirst();
+  }
 
-    @Override
-    public Optional<HoodieInstant> nthInstant(int n) {
-        if (empty() || n >= countInstants()) {
-            return Optional.empty();
-        }
-        return Optional.of(instants.get(n));
+  @Override
+  public Optional<HoodieInstant> nthInstant(int n) {
+    if (empty() || n >= countInstants()) {
+      return Optional.empty();
     }
+    return Optional.of(instants.get(n));
+  }
 
-    @Override
-    public Optional<HoodieInstant> lastInstant() {
-        return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
-    }
+  @Override
+  public Optional<HoodieInstant> lastInstant() {
+    return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
+  }
 
-    @Override
-    public Optional<HoodieInstant> nthFromLastInstant(int n) {
-        if (countInstants() < n + 1) {
-            return Optional.empty();
-        }
-        return nthInstant(countInstants() - 1 - n);
+  @Override
+  public Optional<HoodieInstant> nthFromLastInstant(int n) {
+    if (countInstants() < n + 1) {
+      return Optional.empty();
     }
+    return nthInstant(countInstants() - 1 - n);
+  }
 
-    @Override
-    public boolean containsInstant(HoodieInstant instant) {
-        return instants.stream().anyMatch(s -> s.equals(instant));
-    }
+  @Override
+  public boolean containsInstant(HoodieInstant instant) {
+    return instants.stream().anyMatch(s -> s.equals(instant));
+  }
 
-    @Override
-    public boolean containsOrBeforeTimelineStarts(String instant) {
-        return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
-            || isBeforeTimelineStarts(instant);
-    }
+  @Override
+  public boolean containsOrBeforeTimelineStarts(String instant) {
+    return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
+        || isBeforeTimelineStarts(instant);
+  }
 
-    @Override
-    public Stream<HoodieInstant> getInstants() {
-        return instants.stream();
-    }
+  @Override
+  public Stream<HoodieInstant> getInstants() {
+    return instants.stream();
+  }
 
-    @Override
-    public boolean isBeforeTimelineStarts(String instant) {
-        Optional<HoodieInstant> firstCommit = firstInstant();
-        return firstCommit.isPresent() &&
-                HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
-    }
+  @Override
+  public boolean isBeforeTimelineStarts(String instant) {
+    Optional<HoodieInstant> firstCommit = firstInstant();
+    return firstCommit.isPresent() &&
+        HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
+  }
 
 
-    @Override
-    public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
-        return details.apply(instant);
-    }
+  @Override
+  public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
+    return details.apply(instant);
+  }
 
-    @Override
-    public String toString() {
-        return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
-            .collect(Collectors.joining(","));
-    }
+  @Override
+  public String toString() {
+    return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
+        .collect(Collectors.joining(","));
+  }
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieInstant.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieInstant.java
index 584105dee..bf27b7db2 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieInstant.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/timeline/HoodieInstant.java
@@ -16,118 +16,117 @@
 
 package com.uber.hoodie.common.table.timeline;
 
-import com.google.common.io.Files;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.util.FSUtils;
-import org.apache.hadoop.fs.FileStatus;
-
 import java.io.Serializable;
 import java.util.Objects;
+import org.apache.hadoop.fs.FileStatus;
 
 /**
- * A Hoodie Instant represents a action done on a hoodie dataset.
- * All actions start with a inflight instant and then create a completed instant after done.
+ * A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight
+ * instant and then create a completed instant after done.
  *
  * @see HoodieTimeline
  */
 public class HoodieInstant implements Serializable {
-    private boolean isInflight = false;
-    private String action;
-    private String timestamp;
 
-    /**
-     * Load the instant from the meta FileStatus
-     * @param fileStatus
-     */
-    public HoodieInstant(FileStatus fileStatus) {
-        // First read the instant timestamp. [==>20170101193025<==].commit
-        String fileName = fileStatus.getPath().getName();
-        String fileExtension = FSUtils.getFileExtension(fileName);
-        timestamp = fileName.replace(fileExtension, "");
+  private boolean isInflight = false;
+  private String action;
+  private String timestamp;
 
-        // Next read the action for this marker
-        action = fileExtension.replaceFirst(".", "");
-        if(action.equals("inflight")) {
-            // This is to support backwards compatibility on how in-flight commit files were written
-            // General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
-            action = "commit";
-            isInflight = true;
-        } else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
-            isInflight = true;
-            action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
-        }
+  /**
+   * Load the instant from the meta FileStatus
+   */
+  public HoodieInstant(FileStatus fileStatus) {
+    // First read the instant timestamp. [==>20170101193025<==].commit
+    String fileName = fileStatus.getPath().getName();
+    String fileExtension = FSUtils.getFileExtension(fileName);
+    timestamp = fileName.replace(fileExtension, "");
+
+    // Next read the action for this marker
+    action = fileExtension.replaceFirst(".", "");
+    if (action.equals("inflight")) {
+      // This is to support backwards compatibility on how in-flight commit files were written
+      // General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
+      action = "commit";
+      isInflight = true;
+    } else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
+      isInflight = true;
+      action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
     }
+  }
 
-    public HoodieInstant(boolean isInflight, String action, String timestamp) {
-        this.isInflight = isInflight;
-        this.action = action;
-        this.timestamp = timestamp;
-    }
+  public HoodieInstant(boolean isInflight, String action, String timestamp) {
+    this.isInflight = isInflight;
+    this.action = action;
+    this.timestamp = timestamp;
+  }
 
-    public boolean isInflight() {
-        return isInflight;
-    }
+  public boolean isInflight() {
+    return isInflight;
+  }
 
-    public String getAction() {
-        return action;
-    }
+  public String getAction() {
+    return action;
+  }
 
-    public String getTimestamp() {
-        return timestamp;
-    }
+  public String getTimestamp() {
+    return timestamp;
+  }
 
-    /**
-     * Get the filename for this instant
-     * @return
-     */
-    public String getFileName() {
-        if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightCommitFileName(timestamp) :
-                HoodieTimeline.makeCommitFileName(timestamp);
-        } else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightCleanerFileName(timestamp) :
-                HoodieTimeline.makeCleanerFileName(timestamp);
-        } else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightRollbackFileName(timestamp) :
-                HoodieTimeline.makeRollbackFileName(timestamp);
-        } else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightSavePointFileName(timestamp) :
-                HoodieTimeline.makeSavePointFileName(timestamp);
-        } else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightCompactionFileName(timestamp) :
-                HoodieTimeline.makeCompactionFileName(timestamp);
-        } else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
-            return isInflight ?
-                HoodieTimeline.makeInflightDeltaFileName(timestamp) :
-                HoodieTimeline.makeDeltaFileName(timestamp);
-        }
-        throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
+  /**
+   * Get the filename for this instant
+   */
+  public String getFileName() {
+    if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightCommitFileName(timestamp) :
+          HoodieTimeline.makeCommitFileName(timestamp);
+    } else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightCleanerFileName(timestamp) :
+          HoodieTimeline.makeCleanerFileName(timestamp);
+    } else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightRollbackFileName(timestamp) :
+          HoodieTimeline.makeRollbackFileName(timestamp);
+    } else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightSavePointFileName(timestamp) :
+          HoodieTimeline.makeSavePointFileName(timestamp);
+    } else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightCompactionFileName(timestamp) :
+          HoodieTimeline.makeCompactionFileName(timestamp);
+    } else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
+      return isInflight ?
+          HoodieTimeline.makeInflightDeltaFileName(timestamp) :
+          HoodieTimeline.makeDeltaFileName(timestamp);
     }
+    throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
+  }
 
-    @Override
-    public boolean equals(Object o) {
-        if (this == o)
-            return true;
-        if (o == null || getClass() != o.getClass())
-            return false;
-        HoodieInstant that = (HoodieInstant) o;
-        return isInflight == that.isInflight &&
-            Objects.equals(action, that.action) &&
-            Objects.equals(timestamp, that.timestamp);
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    HoodieInstant that = (HoodieInstant) o;
+    return isInflight == that.isInflight &&
+        Objects.equals(action, that.action) &&
+        Objects.equals(timestamp, that.timestamp);
+  }
 
-    @Override
-    public int hashCode() {
-        return Objects.hash(isInflight, action, timestamp);
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hash(isInflight, action, timestamp);
+  }
 
-    @Override
-    public String toString() {
-        return  "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
-    }
+  @Override
+  public String toString() {
+    return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemView.java b/hoodie-common/src/main/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemView.java
index 6f1d63c44..afd2c89dc 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemView.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemView.java
@@ -19,18 +19,12 @@ package com.uber.hoodie.common.table.view;
 import com.uber.hoodie.common.model.FileSlice;
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieFileGroup;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.common.table.TableFileSystemView;
-import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.model.HoodieLogFile;
+import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import com.uber.hoodie.common.table.HoodieTimeline;
+import com.uber.hoodie.common.table.TableFileSystemView;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
-
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -44,6 +38,10 @@ import java.util.Set;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 /**
  * Common abstract implementation for multiple TableFileSystemView Implementations. 2 possible
@@ -54,8 +52,9 @@ import java.util.stream.Stream;
  * @see TableFileSystemView
  * @since 0.3.0
  */
-public class HoodieTableFileSystemView implements TableFileSystemView, TableFileSystemView.ReadOptimizedView,
-        TableFileSystemView.RealtimeView, Serializable {
+public class HoodieTableFileSystemView implements TableFileSystemView,
+    TableFileSystemView.ReadOptimizedView,
+    TableFileSystemView.RealtimeView, Serializable {
 
     protected HoodieTableMetaClient metaClient;
     protected transient FileSystem fs;
@@ -69,12 +68,9 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
 
     /**
      * Create a file system view, as of the given timeline
-     *
-     * @param metaClient
-     * @param visibleActiveTimeline
      */
     public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
-                                     HoodieTimeline visibleActiveTimeline) {
+        HoodieTimeline visibleActiveTimeline) {
         this.metaClient = metaClient;
         this.fs = metaClient.getFs();
         this.visibleActiveTimeline = visibleActiveTimeline;
@@ -85,14 +81,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
 
     /**
      * Create a file system view, as of the given timeline, with the provided file statuses.
-     *
-     * @param metaClient
-     * @param visibleActiveTimeline
-     * @param fileStatuses
      */
     public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
-                                     HoodieTimeline visibleActiveTimeline,
-                                     FileStatus[] fileStatuses) {
+        HoodieTimeline visibleActiveTimeline,
+        FileStatus[] fileStatuses) {
         this(metaClient, visibleActiveTimeline);
         addFilesToView(fileStatuses);
     }
@@ -104,44 +96,44 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
      * @deprecated
      */
     private void readObject(java.io.ObjectInputStream in)
-            throws IOException, ClassNotFoundException {
+        throws IOException, ClassNotFoundException {
         in.defaultReadObject();
         this.fs = FSUtils.getFs();
     }
 
     private void writeObject(java.io.ObjectOutputStream out)
-            throws IOException {
+        throws IOException {
         out.defaultWriteObject();
     }
 
     /**
      * Adds the provided statuses into the file system view, and also caches it inside this object.
-     *
-     * @param statuses
-     * @return
      */
     private List<HoodieFileGroup> addFilesToView(FileStatus[] statuses) {
-        Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(statuses)
-                .collect(Collectors.groupingBy((dataFile) -> {
-                    String partitionPathStr = FSUtils.getRelativePartitionPath(
-                            new Path(metaClient.getBasePath()),
-                            dataFile.getFileStatus().getPath().getParent());
-                    return Pair.of(partitionPathStr , dataFile.getFileId());
-                }));
-        Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(statuses)
-                .collect(Collectors.groupingBy((logFile) -> {
-                    String partitionPathStr = FSUtils.getRelativePartitionPath(
-                            new Path(metaClient.getBasePath()),
-                            logFile.getPath().getParent());
-                    return Pair.of(partitionPathStr , logFile.getFileId());
-                }));
+        Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(
+            statuses)
+            .collect(Collectors.groupingBy((dataFile) -> {
+                String partitionPathStr = FSUtils.getRelativePartitionPath(
+                    new Path(metaClient.getBasePath()),
+                    dataFile.getFileStatus().getPath().getParent());
+                return Pair.of(partitionPathStr, dataFile.getFileId());
+            }));
+        Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(
+            statuses)
+            .collect(Collectors.groupingBy((logFile) -> {
+                String partitionPathStr = FSUtils.getRelativePartitionPath(
+                    new Path(metaClient.getBasePath()),
+                    logFile.getPath().getParent());
+                return Pair.of(partitionPathStr, logFile.getFileId());
+            }));
 
         Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
         fileIdSet.addAll(logFiles.keySet());
 
         List<HoodieFileGroup> fileGroups = new ArrayList<>();
         fileIdSet.forEach(pair -> {
-            HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(), visibleActiveTimeline);
+            HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(),
+                visibleActiveTimeline);
             if (dataFiles.containsKey(pair)) {
                 dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
             }
@@ -165,90 +157,93 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
 
     private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
         Predicate<FileStatus> roFilePredicate = fileStatus ->
-                fileStatus.getPath().getName().contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
+            fileStatus.getPath().getName()
+                .contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
         return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
     }
 
     private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
         Predicate<FileStatus> rtFilePredicate = fileStatus ->
-                fileStatus.getPath().getName().contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
+            fileStatus.getPath().getName()
+                .contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
         return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
     }
 
     @Override
     public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) {
         return getAllFileGroups(partitionPath)
-                .map(fileGroup -> fileGroup.getLatestDataFile())
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestDataFile())
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<HoodieDataFile> getLatestDataFiles() {
         return fileGroupMap.values().stream()
-                .map(fileGroup -> fileGroup.getLatestDataFile())
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestDataFile())
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
-                                                               String maxCommitTime) {
+        String maxCommitTime) {
         return getAllFileGroups(partitionPath)
-                .map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
-        return  fileGroupMap.values().stream()
-                .map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+        return fileGroupMap.values().stream()
+            .map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
         return getAllFileGroups(partitionPath)
-                .map(fileGroup -> fileGroup.getAllDataFiles())
-                .flatMap(dataFileList -> dataFileList);
+            .map(fileGroup -> fileGroup.getAllDataFiles())
+            .flatMap(dataFileList -> dataFileList);
     }
 
     @Override
     public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
         return getAllFileGroups(partitionPath)
-                .map(fileGroup -> fileGroup.getLatestFileSlice())
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestFileSlice())
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
-    public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
+    public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
+        String maxCommitTime) {
         return getAllFileGroups(partitionPath)
-                .map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
         return fileGroupMap.values().stream()
-                .map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
-                .filter(dataFileOpt -> dataFileOpt.isPresent())
-                .map(Optional::get);
+            .map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
+            .filter(dataFileOpt -> dataFileOpt.isPresent())
+            .map(Optional::get);
     }
 
     @Override
     public Stream<FileSlice> getAllFileSlices(String partitionPath) {
         return getAllFileGroups(partitionPath)
-                .map(group -> group.getAllFileSlices())
-                .flatMap(sliceList -> sliceList);
+            .map(group -> group.getAllFileSlices())
+            .flatMap(sliceList -> sliceList);
     }
 
     /**
-     * Given a partition path, obtain all filegroups within that. All methods, that work at the partition level
-     * go through this.
+     * Given a partition path, obtain all filegroups within that. All methods, that work at the
+     * partition level go through this.
      */
     @Override
     public Stream<HoodieFileGroup> getAllFileGroups(String partitionPathStr) {
@@ -266,7 +261,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
             return fileGroups.stream();
         } catch (IOException e) {
             throw new HoodieIOException(
-                    "Failed to list data files in partition " + partitionPathStr, e);
+                "Failed to list data files in partition " + partitionPathStr, e);
         }
     }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/AvroUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/AvroUtils.java
index 3d1fad843..ae0dbd3f0 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/AvroUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/AvroUtils.java
@@ -19,7 +19,6 @@ package com.uber.hoodie.common.util;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
-
 import com.uber.hoodie.avro.model.HoodieCleanMetadata;
 import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
 import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
@@ -32,7 +31,11 @@ import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.exception.HoodieIOException;
-
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileReader;
 import org.apache.avro.file.DataFileWriter;
@@ -50,146 +53,140 @@ import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-
 public class AvroUtils {
 
-    public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
-        List<String> deltaFilePaths, Schema expectedSchema) {
-        List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
-        deltaFilePaths.forEach(s -> {
-            List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
-            loadedRecords.addAll(records);
-        });
-        return loadedRecords;
+  public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
+      List<String> deltaFilePaths, Schema expectedSchema) {
+    List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
+    deltaFilePaths.forEach(s -> {
+      List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
+      loadedRecords.addAll(records);
+    });
+    return loadedRecords;
+  }
+
+  public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
+      String deltaFilePath, Schema expectedSchema) {
+    List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
+    Path path = new Path(deltaFilePath);
+    try {
+      SeekableInput input = new FsInput(path, fs.getConf());
+      GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
+      // Set the expected schema to be the current schema to account for schema evolution
+      reader.setExpected(expectedSchema);
+
+      FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
+      for (GenericRecord deltaRecord : fileReader) {
+        String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+        String partitionPath =
+            deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
+        loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
+            new HoodieAvroPayload(Optional.of(deltaRecord))));
+      }
+      fileReader.close(); // also closes underlying FsInput
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
+          e);
     }
+    return loadedRecords;
+  }
 
-    public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
-        String deltaFilePath, Schema expectedSchema) {
-        List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
-        Path path = new Path(deltaFilePath);
-        try {
-            SeekableInput input = new FsInput(path, fs.getConf());
-            GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
-            // Set the expected schema to be the current schema to account for schema evolution
-            reader.setExpected(expectedSchema);
-
-            FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
-            for (GenericRecord deltaRecord : fileReader) {
-                String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-                String partitionPath =
-                    deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-                loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
-                    new HoodieAvroPayload(Optional.of(deltaRecord))));
-            }
-            fileReader.close(); // also closes underlying FsInput
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
-                e);
-        }
-        return loadedRecords;
+  public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
+      Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
+    ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
+        ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
+    int totalDeleted = 0;
+    String earliestCommitToRetain = null;
+    for (HoodieCleanStat stat : cleanStats) {
+      HoodieCleanPartitionMetadata metadata =
+          new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
+              stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
+              stat.getDeletePathPatterns());
+      partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
+      totalDeleted += stat.getSuccessDeleteFiles().size();
+      if (earliestCommitToRetain == null) {
+        // This will be the same for all partitions
+        earliestCommitToRetain = stat.getEarliestCommitToRetain();
+      }
     }
+    return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
+        totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
+  }
 
-    public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
-        Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
-        ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
-            ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
-        int totalDeleted = 0;
-        String earliestCommitToRetain = null;
-        for (HoodieCleanStat stat : cleanStats) {
-            HoodieCleanPartitionMetadata metadata =
-                new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
-                    stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
-                    stat.getDeletePathPatterns());
-            partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
-            totalDeleted += stat.getSuccessDeleteFiles().size();
-            if (earliestCommitToRetain == null) {
-                // This will be the same for all partitions
-                earliestCommitToRetain = stat.getEarliestCommitToRetain();
-            }
-        }
-        return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
-            totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
+  public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
+      Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
+    ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
+        ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
+    int totalDeleted = 0;
+    for (HoodieRollbackStat stat : stats) {
+      HoodieRollbackPartitionMetadata metadata =
+          new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
+              stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
+      partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
+      totalDeleted += stat.getSuccessDeleteFiles().size();
     }
+    return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
+        totalDeleted, commits, partitionMetadataBuilder.build());
+  }
 
-    public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
-        Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
-        ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
-            ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
-        int totalDeleted = 0;
-        for (HoodieRollbackStat stat : stats) {
-            HoodieRollbackPartitionMetadata metadata =
-                new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
-                    stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
-            partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
-            totalDeleted += stat.getSuccessDeleteFiles().size();
-        }
-        return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
-            totalDeleted, commits, partitionMetadataBuilder.build());
-    }
-
-    public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
-        Map<String, List<String>> latestFiles) {
-        ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
-            ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
-        for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
-            HoodieSavepointPartitionMetadata metadata =
-                new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
-            partitionMetadataBuilder.put(stat.getKey(), metadata);
-        }
-        return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
-            partitionMetadataBuilder.build());
+  public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
+      Map<String, List<String>> latestFiles) {
+    ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
+        ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
+    for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
+      HoodieSavepointPartitionMetadata metadata =
+          new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
+      partitionMetadataBuilder.put(stat.getKey(), metadata);
     }
+    return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
+        partitionMetadataBuilder.build());
+  }
 
 
-    public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
-        throws IOException {
-        return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
-    }
+  public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
+      throws IOException {
+    return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
+  }
 
-    public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
-        throws IOException {
-        return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
-    }
+  public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
+      throws IOException {
+    return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
+  }
 
-    public static Optional<byte[]> serializeRollbackMetadata(
-        HoodieRollbackMetadata rollbackMetadata) throws IOException {
-        return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
-    }
+  public static Optional<byte[]> serializeRollbackMetadata(
+      HoodieRollbackMetadata rollbackMetadata) throws IOException {
+    return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
+  }
 
-    public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
-        Class<T> clazz) throws IOException {
-        DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
-        DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        fileWriter.create(metadata.getSchema(), baos);
-        fileWriter.append(metadata);
-        fileWriter.flush();
-        return Optional.of(baos.toByteArray());
-    }
+  public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
+      Class<T> clazz) throws IOException {
+    DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
+    DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    fileWriter.create(metadata.getSchema(), baos);
+    fileWriter.append(metadata);
+    fileWriter.flush();
+    return Optional.of(baos.toByteArray());
+  }
 
-    public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
-        throws IOException {
-        return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
-    }
+  public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
+      throws IOException {
+    return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
+  }
 
-    public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
-        throws IOException {
-        return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
-    }
+  public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
+      throws IOException {
+    return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
+  }
 
-    public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
-        Class<T> clazz) throws IOException {
-        DatumReader<T> reader = new SpecificDatumReader<>(clazz);
-        FileReader<T> fileReader =
-            DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
-        Preconditions
-            .checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
-        return fileReader.next();
-    }
+  public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
+      Class<T> clazz) throws IOException {
+    DatumReader<T> reader = new SpecificDatumReader<>(clazz);
+    FileReader<T> fileReader =
+        DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
+    Preconditions
+        .checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
+    return fileReader.next();
+  }
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
index e4c97f75b..daecf6237 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/FSUtils.java
@@ -23,16 +23,6 @@ import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.InvalidHoodiePathException;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -43,319 +33,339 @@ import java.util.Optional;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Stream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
 /**
  * Utility functions related to accessing the file storage
  */
 public class FSUtils {
 
-    private static final Logger LOG = LogManager.getLogger(FSUtils.class);
-    // Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
-    private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
-    private static final String LOG_FILE_PREFIX = ".";
-    private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
-    private static final long MIN_CLEAN_TO_KEEP = 10;
-    private static final long MIN_ROLLBACK_TO_KEEP = 10;
-    private static FileSystem fs;
+  private static final Logger LOG = LogManager.getLogger(FSUtils.class);
+  // Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
+  private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
+  private static final String LOG_FILE_PREFIX = ".";
+  private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
+  private static final long MIN_CLEAN_TO_KEEP = 10;
+  private static final long MIN_ROLLBACK_TO_KEEP = 10;
+  private static FileSystem fs;
 
-    /**
-     * Only to be used for testing.
-     */
-    @VisibleForTesting
-    public static void setFs(FileSystem fs) {
-        FSUtils.fs = fs;
+  /**
+   * Only to be used for testing.
+   */
+  @VisibleForTesting
+  public static void setFs(FileSystem fs) {
+    FSUtils.fs = fs;
+  }
+
+
+  public static FileSystem getFs() {
+    if (fs != null) {
+      return fs;
     }
-
-
-    public static FileSystem getFs() {
-        if (fs != null) {
-            return fs;
-        }
-        Configuration conf = new Configuration();
-        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
-        conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
-        FileSystem fs;
-        try {
-            fs = FileSystem.get(conf);
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
-                    e);
-        }
-        LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
-                conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
-
-        return fs;
+    Configuration conf = new Configuration();
+    conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
+    conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
+    FileSystem fs;
+    try {
+      fs = FileSystem.get(conf);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
+          e);
     }
+    LOG.info(
+        String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
+            conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
 
-    public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
-        return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
+    return fs;
+  }
+
+  public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
+    return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
+  }
+
+  public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
+    return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
+  }
+
+  public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
+    return String.format("%s_*_%s.parquet", fileId, commitTime);
+  }
+
+  public static String maskWithOnlyCommitTime(String commitTime) {
+    return String.format("*_*_%s.parquet", commitTime);
+  }
+
+  public static String getCommitFromCommitFile(String commitFileName) {
+    return commitFileName.split("\\.")[0];
+  }
+
+  public static String getCommitTime(String fullFileName) {
+    return fullFileName.split("_")[2].split("\\.")[0];
+  }
+
+  public static long getFileSize(FileSystem fs, Path path) throws IOException {
+    return fs.getFileStatus(path).getLen();
+  }
+
+  public static String getFileId(String fullFileName) {
+    return fullFileName.split("_")[0];
+  }
+
+
+  /**
+   * Gets all partition paths assuming date partitioning (year, month, day) three levels down.
+   */
+  public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath)
+      throws IOException {
+    List<String> datePartitions = new ArrayList<>();
+    FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
+    for (FileStatus status : folders) {
+      Path path = status.getPath();
+      datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
+          path.getParent().getName(), path.getName()));
     }
+    return datePartitions;
+  }
 
-    public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
-        return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
+  public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
+    String partitionFullPath = partitionPath.toString();
+    int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
+    return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
+  }
+
+  /**
+   * Obtain all the partition paths, that are present in this table, denoted by presence of {@link
+   * com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
+   */
+  public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
+      throws IOException {
+    List<String> partitions = new ArrayList<>();
+    Path basePath = new Path(basePathStr);
+    RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
+    while (allFiles.hasNext()) {
+      Path filePath = allFiles.next().getPath();
+      if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
+        partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
+      }
     }
+    return partitions;
+  }
 
-    public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
-        return String.format("%s_*_%s.parquet", fileId, commitTime);
+  public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
+      boolean assumeDatePartitioning)
+      throws IOException {
+    if (assumeDatePartitioning) {
+      return getAllFoldersThreeLevelsDown(fs, basePathStr);
+    } else {
+      return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
     }
+  }
 
-    public static String maskWithOnlyCommitTime(String commitTime) {
-        return String.format("*_*_%s.parquet", commitTime);
+  public static String getFileExtension(String fullName) {
+    Preconditions.checkNotNull(fullName);
+    String fileName = (new File(fullName)).getName();
+    int dotIndex = fileName.indexOf('.');
+    return dotIndex == -1 ? "" : fileName.substring(dotIndex);
+  }
+
+  public static String getInstantTime(String name) {
+    return name.replace(getFileExtension(name), "");
+  }
+
+
+  /**
+   * Get the file extension from the log file
+   */
+  public static String getFileExtensionFromLog(Path logPath) {
+    Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
+    if (!matcher.find()) {
+      throw new InvalidHoodiePathException(logPath, "LogFile");
     }
+    return matcher.group(3);
+  }
 
-    public static String getCommitFromCommitFile(String commitFileName) {
-        return commitFileName.split("\\.")[0];
+  /**
+   * Get the first part of the file name in the log file. That will be the fileId. Log file do not
+   * have commitTime in the file name.
+   */
+  public static String getFileIdFromLogPath(Path path) {
+    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
+    if (!matcher.find()) {
+      throw new InvalidHoodiePathException(path, "LogFile");
     }
+    return matcher.group(1);
+  }
 
-    public static String getCommitTime(String fullFileName) {
-        return fullFileName.split("_")[2].split("\\.")[0];
+  /**
+   * Get the first part of the file name in the log file. That will be the fileId. Log file do not
+   * have commitTime in the file name.
+   */
+  public static String getBaseCommitTimeFromLogPath(Path path) {
+    Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
+    if (!matcher.find()) {
+      throw new InvalidHoodiePathException(path, "LogFile");
     }
+    return matcher.group(2);
+  }
 
-    public static long getFileSize(FileSystem fs, Path path) throws IOException {
-        return fs.getFileStatus(path).getLen();
+  /**
+   * Get the last part of the file name in the log file and convert to int.
+   */
+  public static int getFileVersionFromLog(Path logPath) {
+    Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
+    if (!matcher.find()) {
+      throw new InvalidHoodiePathException(logPath, "LogFile");
     }
+    return Integer.parseInt(matcher.group(4));
+  }
 
-    public static String getFileId(String fullFileName) {
-        return fullFileName.split("_")[0];
+  public static String makeLogFileName(String fileId, String logFileExtension,
+      String baseCommitTime, int version) {
+    return LOG_FILE_PREFIX + String
+        .format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
+  }
+
+  public static String maskWithoutLogVersion(String commitTime, String fileId,
+      String logFileExtension) {
+    return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
+  }
+
+
+  /**
+   * Get the latest log file written from the list of log files passed in
+   */
+  public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
+    return logFiles.sorted(Comparator
+        .comparing(s -> s.getLogVersion(),
+            Comparator.reverseOrder())).findFirst();
+  }
+
+  /**
+   * Get all the log files for the passed in FileId in the partition path
+   */
+  public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
+      final String fileId, final String logFileExtension, final String baseCommitTime)
+      throws IOException {
+    return Arrays.stream(fs.listStatus(partitionPath,
+        path -> path.getName().startsWith("." + fileId) && path.getName()
+            .contains(logFileExtension)))
+        .map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
+  }
+
+  /**
+   * Get the latest log version for the fileId in the partition path
+   */
+  public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
+      final String fileId, final String logFileExtension, final String baseCommitTime)
+      throws IOException {
+    Optional<HoodieLogFile> latestLogFile =
+        getLatestLogFile(
+            getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
+    if (latestLogFile.isPresent()) {
+      return Optional.of(latestLogFile.get().getLogVersion());
     }
+    return Optional.empty();
+  }
 
+  public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
+      final String fileId, final String logFileExtension, final String baseCommitTime)
+      throws IOException {
+    Optional<Integer> currentVersion =
+        getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
+    // handle potential overflow
+    return (currentVersion.isPresent()) ? currentVersion.get() : 1;
+  }
 
-    /**
-     * Gets all partition paths assuming date partitioning (year, month, day) three levels down.
-     */
-    public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException {
-        List<String> datePartitions = new ArrayList<>();
-        FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
-        for (FileStatus status : folders) {
-            Path path = status.getPath();
-            datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
-                    path.getParent().getName(), path.getName()));
-        }
-        return datePartitions;
+  /**
+   * computes the next log version for the specified fileId in the partition path
+   */
+  public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
+      final String logFileExtension, final String baseCommitTime) throws IOException {
+    Optional<Integer> currentVersion =
+        getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
+    // handle potential overflow
+    return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
+  }
+
+  public static int getDefaultBufferSize(final FileSystem fs) {
+    return fs.getConf().getInt("io.file.buffer.size", 4096);
+  }
+
+  public static Short getDefaultReplication(FileSystem fs, Path path) {
+    return fs.getDefaultReplication(path);
+  }
+
+  public static Long getDefaultBlockSize(FileSystem fs, Path path) {
+    return fs.getDefaultBlockSize(path);
+  }
+
+  /**
+   * When a file was opened and the task died without closing the stream, another task executor
+   * cannot open because the existing lease will be active. We will try to recover the lease, from
+   * HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But if
+   * the client dies, this should be instant.
+   */
+  public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
+      throws IOException, InterruptedException {
+    LOG.info("Recover lease on dfs file " + p);
+    // initiate the recovery
+    boolean recovered = false;
+    for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
+      LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
+      recovered = dfs.recoverLease(p);
+      if (recovered) {
+        break;
+      }
+      // Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
+      Thread.sleep(1000);
     }
+    return recovered;
 
-    public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
-        String partitionFullPath = partitionPath.toString();
-        int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
-        return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
+  }
+
+  public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
+      Stream<HoodieInstant> instants) {
+    //TODO - this should be archived when archival is made general for all meta-data
+    // skip MIN_CLEAN_TO_KEEP and delete rest
+    instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
+      try {
+        return fs.delete(new Path(metaPath, s.getFileName()), false);
+      } catch (IOException e) {
+        throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
+            e);
+      }
+    });
+  }
+
+  public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
+      Stream<HoodieInstant> instants) {
+    //TODO - this should be archived when archival is made general for all meta-data
+    // skip MIN_ROLLBACK_TO_KEEP and delete rest
+    instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
+      try {
+        return fs.delete(new Path(metaPath, s.getFileName()), false);
+      } catch (IOException e) {
+        throw new HoodieIOException(
+            "Could not delete rollback meta files " + s.getFileName(), e);
+      }
+    });
+  }
+
+  public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
+    if (!fs.exists(partitionPath)) {
+      fs.mkdirs(partitionPath);
     }
+  }
 
-    /**
-     * Obtain all the partition paths, that are present in this table, denoted by presence of {@link
-     * com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
-     */
-    public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
-            throws IOException {
-        List<String> partitions = new ArrayList<>();
-        Path basePath = new Path(basePathStr);
-        RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
-        while (allFiles.hasNext()) {
-            Path filePath = allFiles.next().getPath();
-            if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
-                partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
-            }
-        }
-        return partitions;
-    }
-
-    public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr, boolean assumeDatePartitioning)
-            throws IOException {
-       if (assumeDatePartitioning) {
-           return getAllFoldersThreeLevelsDown(fs, basePathStr);
-       } else {
-           return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
-       }
-    }
-
-    public static String getFileExtension(String fullName) {
-        Preconditions.checkNotNull(fullName);
-        String fileName = (new File(fullName)).getName();
-        int dotIndex = fileName.indexOf('.');
-        return dotIndex == -1 ? "" : fileName.substring(dotIndex);
-    }
-
-    public static String getInstantTime(String name) {
-        return name.replace(getFileExtension(name), "");
-    }
-
-
-    /**
-     * Get the file extension from the log file
-     */
-    public static String getFileExtensionFromLog(Path logPath) {
-        Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
-        if (!matcher.find()) {
-            throw new InvalidHoodiePathException(logPath, "LogFile");
-        }
-        return matcher.group(3);
-    }
-
-    /**
-     * Get the first part of the file name in the log file. That will be the fileId. Log file do not
-     * have commitTime in the file name.
-     */
-    public static String getFileIdFromLogPath(Path path) {
-        Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
-        if (!matcher.find()) {
-            throw new InvalidHoodiePathException(path, "LogFile");
-        }
-        return matcher.group(1);
-    }
-
-    /**
-     * Get the first part of the file name in the log file. That will be the fileId. Log file do not
-     * have commitTime in the file name.
-     */
-    public static String getBaseCommitTimeFromLogPath(Path path) {
-        Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
-        if (!matcher.find()) {
-            throw new InvalidHoodiePathException(path, "LogFile");
-        }
-        return matcher.group(2);
-    }
-
-    /**
-     * Get the last part of the file name in the log file and convert to int.
-     */
-    public static int getFileVersionFromLog(Path logPath) {
-        Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
-        if (!matcher.find()) {
-            throw new InvalidHoodiePathException(logPath, "LogFile");
-        }
-        return Integer.parseInt(matcher.group(4));
-    }
-
-    public static String makeLogFileName(String fileId, String logFileExtension,
-        String baseCommitTime, int version) {
-        return LOG_FILE_PREFIX + String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
-    }
-
-    public static String maskWithoutLogVersion(String commitTime, String fileId, String logFileExtension) {
-        return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
-    }
-
-
-    /**
-     * Get the latest log file written from the list of log files passed in
-     */
-    public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
-        return logFiles.sorted(Comparator
-                .comparing(s -> s.getLogVersion(),
-                        Comparator.reverseOrder())).findFirst();
-    }
-
-    /**
-     * Get all the log files for the passed in FileId in the partition path
-     */
-    public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
-                                                       final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
-        return Arrays.stream(fs.listStatus(partitionPath,
-            path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
-            .map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
-    }
-
-    /**
-     * Get the latest log version for the fileId in the partition path
-     */
-    public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
-                                                        final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
-        Optional<HoodieLogFile> latestLogFile =
-                getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
-        if (latestLogFile.isPresent()) {
-            return Optional.of(latestLogFile.get().getLogVersion());
-        }
-        return Optional.empty();
-    }
-
-    public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
-                                           final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
-        Optional<Integer> currentVersion =
-                getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
-        // handle potential overflow
-        return (currentVersion.isPresent()) ? currentVersion.get() : 1;
-    }
-
-    /**
-     * computes the next log version for the specified fileId in the partition path
-     */
-    public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
-                                            final String logFileExtension, final String baseCommitTime) throws IOException {
-        Optional<Integer> currentVersion =
-                getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
-        // handle potential overflow
-        return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
-    }
-
-    public static int getDefaultBufferSize(final FileSystem fs) {
-        return fs.getConf().getInt("io.file.buffer.size", 4096);
-    }
-
-    public static Short getDefaultReplication(FileSystem fs, Path path) {
-        return fs.getDefaultReplication(path);
-    }
-
-    public static Long getDefaultBlockSize(FileSystem fs, Path path) {
-        return fs.getDefaultBlockSize(path);
-    }
-
-    /**
-     * When a file was opened and the task died without closing the stream, another task executor
-     * cannot open because the existing lease will be active. We will try to recover the lease, from
-     * HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But
-     * if the client dies, this should be instant.
-     */
-    public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
-            throws IOException, InterruptedException {
-        LOG.info("Recover lease on dfs file " + p);
-        // initiate the recovery
-        boolean recovered = false;
-        for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
-            LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
-            recovered = dfs.recoverLease(p);
-            if (recovered)
-                break;
-            // Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
-            Thread.sleep(1000);
-        }
-        return recovered;
-
-    }
-
-    public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
-                                                 Stream<HoodieInstant> instants) {
-        //TODO - this should be archived when archival is made general for all meta-data
-        // skip MIN_CLEAN_TO_KEEP and delete rest
-        instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
-            try {
-                return fs.delete(new Path(metaPath, s.getFileName()), false);
-            } catch (IOException e) {
-                throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
-                        e);
-            }
-        });
-    }
-
-    public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
-                                                    Stream<HoodieInstant> instants) {
-        //TODO - this should be archived when archival is made general for all meta-data
-        // skip MIN_ROLLBACK_TO_KEEP and delete rest
-        instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
-            try {
-                return fs.delete(new Path(metaPath, s.getFileName()), false);
-            } catch (IOException e) {
-                throw new HoodieIOException(
-                        "Could not delete rollback meta files " + s.getFileName(), e);
-            }
-        });
-    }
-
-    public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
-        if(!fs.exists(partitionPath)) {
-            fs.mkdirs(partitionPath);
-        }
-    }
-
-    public static Long getSizeInMB(long sizeInBytes) {
-        return sizeInBytes / (1024 * 1024);
-    }
+  public static Long getSizeInMB(long sizeInBytes) {
+    return sizeInBytes / (1024 * 1024);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java
index 8323bc5ca..519ce7b60 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java
@@ -17,156 +17,167 @@
 package com.uber.hoodie.common.util;
 
 import com.uber.hoodie.common.model.HoodieRecord;
-
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.SchemaCompatabilityException;
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.InflaterInputStream;
 import org.apache.avro.Schema;
-import org.apache.avro.generic.*;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.BinaryEncoder;
 import org.apache.avro.io.Decoder;
 import org.apache.avro.io.DecoderFactory;
 import org.apache.avro.io.EncoderFactory;
 
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
 /**
  * Helper class to do common stuff across Avro.
  */
 public class HoodieAvroUtils {
 
-    // All metadata fields are optional strings.
-    private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
-            Schema.create(Schema.Type.NULL),
-            Schema.create(Schema.Type.STRING)));
+  // All metadata fields are optional strings.
+  private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
+      Schema.create(Schema.Type.NULL),
+      Schema.create(Schema.Type.STRING)));
 
-    private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
+  private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
 
-    /**
-     * Convert a given avro record to bytes
-     */
-    public static byte[] avroToBytes(GenericRecord record) throws IOException {
-        GenericDatumWriter<GenericRecord> writer =
-                new GenericDatumWriter<>(record.getSchema());
-        ByteArrayOutputStream out = new ByteArrayOutputStream();
-        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
-        writer.write(record, encoder);
-        encoder.flush();
-        out.close();
-        return out.toByteArray();
+  /**
+   * Convert a given avro record to bytes
+   */
+  public static byte[] avroToBytes(GenericRecord record) throws IOException {
+    GenericDatumWriter<GenericRecord> writer =
+        new GenericDatumWriter<>(record.getSchema());
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
+    writer.write(record, encoder);
+    encoder.flush();
+    out.close();
+    return out.toByteArray();
+  }
+
+  /**
+   * Convert serialized bytes back into avro record
+   */
+  public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
+    Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
+    GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
+    return reader.read(null, decoder);
+  }
+
+
+  /**
+   * Adds the Hoodie metadata fields to the given schema
+   */
+  public static Schema addMetadataFields(Schema schema) {
+    List<Schema.Field> parentFields = new ArrayList<>();
+
+    Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+    Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+    Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+    Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+    Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+
+    parentFields.add(commitTimeField);
+    parentFields.add(commitSeqnoField);
+    parentFields.add(recordKeyField);
+    parentFields.add(partitionPathField);
+    parentFields.add(fileNameField);
+    for (Schema.Field field : schema.getFields()) {
+      parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
     }
 
-    /**
-     * Convert serialized bytes back into avro record
-     */
-    public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
-        Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
-        GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
-        return reader.read(null, decoder);
+    Schema mergedSchema = Schema
+        .createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
+    mergedSchema.setFields(parentFields);
+    return mergedSchema;
+  }
+
+  private static Schema initRecordKeySchema() {
+    Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
+        METADATA_FIELD_SCHEMA, "", null);
+    Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
+    recordKeySchema.setFields(Arrays.asList(recordKeyField));
+    return recordKeySchema;
+  }
+
+  public static Schema getRecordKeySchema() {
+    return RECORD_KEY_SCHEMA;
+  }
+
+  public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey,
+      String partitionPath, String fileName) {
+    record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
+    record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
+    record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
+    return record;
+  }
+
+  /**
+   * Adds the Hoodie commit metadata into the provided Generic Record.
+   */
+  public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime,
+      String commitSeqno) {
+    record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
+    record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
+    return record;
+  }
+
+
+  /**
+   * Given a avro record with a given schema, rewrites it into the new schema
+   */
+  public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
+    GenericRecord newRecord = new GenericData.Record(newSchema);
+    for (Schema.Field f : record.getSchema().getFields()) {
+      newRecord.put(f.name(), record.get(f.name()));
     }
-
-
-    /**
-     * Adds the Hoodie metadata fields to the given schema
-     */
-    public static Schema addMetadataFields(Schema schema) {
-        List<Schema.Field> parentFields = new ArrayList<>();
-
-        Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-        Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-        Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-        Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-        Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-
-        parentFields.add(commitTimeField);
-        parentFields.add(commitSeqnoField);
-        parentFields.add(recordKeyField);
-        parentFields.add(partitionPathField);
-        parentFields.add(fileNameField);
-        for (Schema.Field field : schema.getFields()) {
-            parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
-        }
-
-        Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
-        mergedSchema.setFields(parentFields);
-        return mergedSchema;
+    if (!new GenericData().validate(newSchema, newRecord)) {
+      throw new SchemaCompatabilityException(
+          "Unable to validate the rewritten record " + record + " against schema "
+              + newSchema);
     }
+    return newRecord;
+  }
 
-    private static Schema initRecordKeySchema() {
-        Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
-        Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
-        recordKeySchema.setFields(Arrays.asList(recordKeyField));
-        return recordKeySchema;
+  public static byte[] compress(String text) {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      OutputStream out = new DeflaterOutputStream(baos);
+      out.write(text.getBytes("UTF-8"));
+      out.close();
+    } catch (IOException e) {
+      throw new HoodieIOException("IOException while compressing text " + text, e);
     }
+    return baos.toByteArray();
+  }
 
-    public static Schema getRecordKeySchema() {
-        return RECORD_KEY_SCHEMA;
-    }
-
-    public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath, String fileName) {
-        record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
-        record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
-        record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
-        return record;
-    }
-
-    /**
-     * Adds the Hoodie commit metadata into the provided Generic Record.
-     */
-    public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
-        record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
-        record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
-        return record;
-    }
-
-
-    /**
-     * Given a avro record with a given schema, rewrites it into the new schema
-     */
-    public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
-        GenericRecord newRecord = new GenericData.Record(newSchema);
-        for (Schema.Field f : record.getSchema().getFields()) {
-            newRecord.put(f.name(), record.get(f.name()));
-        }
-        if (!new GenericData().validate(newSchema, newRecord)) {
-            throw new SchemaCompatabilityException(
-                    "Unable to validate the rewritten record " + record + " against schema "
-                            + newSchema);
-        }
-        return newRecord;
-    }
-
-    public static byte[] compress(String text) {
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        try {
-            OutputStream out = new DeflaterOutputStream(baos);
-            out.write(text.getBytes("UTF-8"));
-            out.close();
-        } catch (IOException e) {
-            throw new HoodieIOException("IOException while compressing text " + text, e);
-        }
-        return baos.toByteArray();
-    }
-
-    public static String decompress(byte[] bytes) {
-        InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        try {
-            byte[] buffer = new byte[8192];
-            int len;
-            while((len = in.read(buffer))>0)
-                baos.write(buffer, 0, len);
-            return new String(baos.toByteArray(), "UTF-8");
-        } catch (IOException e) {
-            throw new HoodieIOException("IOException while decompressing text", e);
-        }
+  public static String decompress(byte[] bytes) {
+    InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      byte[] buffer = new byte[8192];
+      int len;
+      while ((len = in.read(buffer)) > 0) {
+        baos.write(buffer, 0, len);
+      }
+      return new String(baos.toByteArray(), "UTF-8");
+    } catch (IOException e) {
+      throw new HoodieIOException("IOException while decompressing text", e);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/NumericUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/NumericUtils.java
index 7828c1e73..a15ae1ec1 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/NumericUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/NumericUtils.java
@@ -17,10 +17,13 @@
 package com.uber.hoodie.common.util;
 
 public class NumericUtils {
-    public static String humanReadableByteCount(double bytes) {
-        if (bytes < 1024) return String.format("%.1f B", bytes);
-        int exp = (int) (Math.log(bytes) / Math.log(1024));
-        String pre = "KMGTPE".charAt(exp-1) + "";
-        return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
+
+  public static String humanReadableByteCount(double bytes) {
+    if (bytes < 1024) {
+      return String.format("%.1f B", bytes);
     }
+    int exp = (int) (Math.log(bytes) / Math.log(1024));
+    String pre = "KMGTPE".charAt(exp - 1) + "";
+    return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/ParquetUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ParquetUtils.java
index 017f3cfbf..a4a683350 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/ParquetUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ParquetUtils.java
@@ -16,14 +16,20 @@
 
 package com.uber.hoodie.common.util;
 
+import static com.uber.hoodie.common.util.FSUtils.getFs;
+
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.BloomFilter;
 import com.uber.hoodie.common.model.HoodieRecord;
-
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.MetadataNotFoundException;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.conf.Configuration;
@@ -36,163 +42,144 @@ import org.apache.parquet.hadoop.ParquetReader;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.apache.parquet.schema.MessageType;
 
-import java.io.*;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import static com.uber.hoodie.common.util.FSUtils.getFs;
-
 /**
  * Utility functions involving with parquet.
  */
 public class ParquetUtils {
 
-    /**
-     * Read the rowKey list from the given parquet file.
-     *
-     * @param filePath    The parquet file path.
-     */
-    public static Set<String> readRowKeysFromParquet(Path filePath) {
-        Configuration conf = new Configuration();
-        conf.addResource(getFs().getConf());
-        Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
-        AvroReadSupport.setAvroReadSchema(conf, readSchema);
-        AvroReadSupport.setRequestedProjection(conf, readSchema);
-        ParquetReader reader = null;
-        Set<String> rowKeys = new HashSet<>();
+  /**
+   * Read the rowKey list from the given parquet file.
+   *
+   * @param filePath The parquet file path.
+   */
+  public static Set<String> readRowKeysFromParquet(Path filePath) {
+    Configuration conf = new Configuration();
+    conf.addResource(getFs().getConf());
+    Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
+    AvroReadSupport.setAvroReadSchema(conf, readSchema);
+    AvroReadSupport.setRequestedProjection(conf, readSchema);
+    ParquetReader reader = null;
+    Set<String> rowKeys = new HashSet<>();
+    try {
+      reader = AvroParquetReader.builder(filePath).withConf(conf).build();
+      Object obj = reader.read();
+      while (obj != null) {
+        if (obj instanceof GenericRecord) {
+          rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
+        }
+        obj = reader.read();
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
+
+    } finally {
+      if (reader != null) {
         try {
-            reader = AvroParquetReader.builder(filePath).withConf(conf).build();
-            Object obj = reader.read();
-            while (obj != null) {
-                if (obj instanceof GenericRecord) {
-                    rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
-                }
-                obj = reader.read();
-            }
+          reader.close();
         } catch (IOException e) {
-            throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
-
-        } finally {
-            if (reader != null) {
-                try {
-                    reader.close();
-                } catch (IOException e) {
-                    // ignore
-                }
-            }
+          // ignore
         }
-        return rowKeys;
+      }
     }
+    return rowKeys;
+  }
 
 
-    /**
-     *
-     * Read the metadata from a parquet file
-     *
-     * @param parquetFilePath
-     * @return
-     */
-    public static ParquetMetadata readMetadata(Path parquetFilePath) {
-        return readMetadata(new Configuration(), parquetFilePath);
+  /**
+   * Read the metadata from a parquet file
+   */
+  public static ParquetMetadata readMetadata(Path parquetFilePath) {
+    return readMetadata(new Configuration(), parquetFilePath);
+  }
+
+  public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
+    ParquetMetadata footer;
+    try {
+      // TODO(vc): Should we use the parallel reading version here?
+      footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
+          e);
     }
+    return footer;
+  }
 
-    public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
-        ParquetMetadata footer;
+
+  /**
+   * Get the schema of the given parquet file.
+   */
+  public static MessageType readSchema(Path parquetFilePath) {
+    return readMetadata(parquetFilePath).getFileMetaData().getSchema();
+  }
+
+
+  private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
+    List<String> footerVals = new ArrayList<>();
+    ParquetMetadata footer = readMetadata(parquetFilePath);
+    Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
+    for (String footerName : footerNames) {
+      if (metadata.containsKey(footerName)) {
+        footerVals.add(metadata.get(footerName));
+      } else {
+        throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
+            "Looked for key " + footerName + " in " + parquetFilePath);
+      }
+    }
+    return footerVals;
+  }
+
+  public static Schema readAvroSchema(Path parquetFilePath) {
+    return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
+  }
+
+  /**
+   * Read out the bloom filter from the parquet file meta data.
+   */
+  public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
+    String footerVal = readParquetFooter(parquetFilePath,
+        HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
+    return new BloomFilter(footerVal);
+  }
+
+  public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
+    List<String> minMaxKeys = readParquetFooter(parquetFilePath,
+        HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
+        HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
+    if (minMaxKeys.size() != 2) {
+      throw new HoodieException(String.format(
+          "Could not read min/max record key out of footer correctly from %s. read) : %s",
+          parquetFilePath, minMaxKeys));
+    }
+    return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
+  }
+
+  /**
+   * NOTE: This literally reads the entire file contents, thus should be used with caution.
+   */
+  public static List<GenericRecord> readAvroRecords(Path filePath) {
+    ParquetReader reader = null;
+    List<GenericRecord> records = new ArrayList<>();
+    try {
+      reader = AvroParquetReader.builder(filePath).build();
+      Object obj = reader.read();
+      while (obj != null) {
+        if (obj instanceof GenericRecord) {
+          records.add(((GenericRecord) obj));
+        }
+        obj = reader.read();
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
+
+    } finally {
+      if (reader != null) {
         try {
-            // TODO(vc): Should we use the parallel reading version here?
-            footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
+          reader.close();
         } catch (IOException e) {
-            throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
-                    e);
+          // ignore
         }
-        return footer;
-    }
-
-
-    /**
-     * Get the schema of the given parquet file.
-     *
-     * @param parquetFilePath
-     * @return
-     */
-    public static MessageType readSchema(Path parquetFilePath) {
-        return readMetadata(parquetFilePath).getFileMetaData().getSchema();
-    }
-
-
-    private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
-        List<String> footerVals = new ArrayList<>();
-        ParquetMetadata footer = readMetadata(parquetFilePath);
-        Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
-        for (String footerName : footerNames) {
-            if (metadata.containsKey(footerName)) {
-                footerVals.add(metadata.get(footerName));
-            } else {
-                throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
-                        "Looked for key " + footerName + " in " + parquetFilePath);
-            }
-        }
-        return footerVals;
-    }
-
-    public static Schema readAvroSchema(Path parquetFilePath) {
-        return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
-    }
-
-    /**
-     * Read out the bloom filter from the parquet file meta data.
-     */
-    public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
-        String footerVal = readParquetFooter(parquetFilePath,
-                HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
-        return new BloomFilter(footerVal);
-    }
-
-    public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
-        List<String> minMaxKeys = readParquetFooter(parquetFilePath, HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
-                HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
-        if (minMaxKeys.size() != 2) {
-            throw new HoodieException(String.format(
-                    "Could not read min/max record key out of footer correctly from %s. read) : %s",
-                    parquetFilePath, minMaxKeys));
-        }
-        return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
-    }
-
-    /**
-     *
-     * NOTE: This literally reads the entire file contents, thus should be used with caution.
-     *
-     * @param filePath
-     * @return
-     */
-    public static List<GenericRecord> readAvroRecords(Path filePath) {
-        ParquetReader reader = null;
-        List<GenericRecord> records = new ArrayList<>();
-        try {
-            reader = AvroParquetReader.builder(filePath).build();
-            Object obj = reader.read();
-            while (obj != null) {
-                if (obj instanceof GenericRecord) {
-                    records.add(((GenericRecord) obj));
-                }
-                obj = reader.read();
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
-
-        } finally {
-            if (reader != null) {
-                try {
-                    reader.close();
-                } catch (IOException e) {
-                    // ignore
-                }
-            }
-        }
-        return records;
+      }
     }
+    return records;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/ReflectionUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ReflectionUtils.java
index c1a8e9062..629935ee9 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/ReflectionUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/ReflectionUtils.java
@@ -17,22 +17,18 @@
 package com.uber.hoodie.common.util;
 
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
 import com.uber.hoodie.exception.HoodieException;
-import org.apache.avro.generic.GenericRecord;
-
-import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Optional;
 
 public class ReflectionUtils {
+
   private static Map<String, Class<?>> clazzCache = new HashMap<>();
 
   public static <T> T loadClass(String fqcn) {
     try {
-      if(clazzCache.get(fqcn) == null) {
+      if (clazzCache.get(fqcn) == null) {
         Class<?> clazz = Class.<HoodieRecordPayload>forName(fqcn);
         clazzCache.put(fqcn, clazz);
       }
@@ -48,21 +44,17 @@ public class ReflectionUtils {
 
   /**
    * Instantiate a given class with a generic record payload
-   *
-   * @param recordPayloadClass
-   * @param payloadArgs
-   * @param <T>
-   * @return
    */
   public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass,
-                                                              Object [] payloadArgs,
-                                                              Class<?> ... constructorArgTypes) {
+      Object[] payloadArgs,
+      Class<?>... constructorArgTypes) {
     try {
-      if(clazzCache.get(recordPayloadClass) == null) {
+      if (clazzCache.get(recordPayloadClass) == null) {
         Class<?> clazz = Class.<HoodieRecordPayload>forName(recordPayloadClass);
         clazzCache.put(recordPayloadClass, clazz);
       }
-      return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes).newInstance(payloadArgs);
+      return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes)
+          .newInstance(payloadArgs);
     } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
       throw new HoodieException("Unable to instantiate payload class ", e);
     } catch (ClassNotFoundException e) {
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/DatasetNotFoundException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/DatasetNotFoundException.java
index 476ebddde..5510dbe00 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/DatasetNotFoundException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/DatasetNotFoundException.java
@@ -16,43 +16,40 @@
 
 package com.uber.hoodie.exception;
 
+import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-import java.io.IOException;
-
 /**
- * <p>
- * Exception thrown to indicate that a hoodie dataset was not found on the path provided
- * <p>
+ * <p> Exception thrown to indicate that a hoodie dataset was not found on the path provided <p>
  */
 public class DatasetNotFoundException extends HoodieException {
-    public DatasetNotFoundException(String basePath) {
-        super(getErrorMessage(basePath));
-    }
 
-    private static String getErrorMessage(String basePath) {
-        return "Hoodie dataset not found in path " + basePath;
-    }
+  public DatasetNotFoundException(String basePath) {
+    super(getErrorMessage(basePath));
+  }
 
-    public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
-        throws DatasetNotFoundException {
-        // Check if the base path is found
-        try {
-            if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
-                throw new DatasetNotFoundException(basePathDir.toString());
-            }
-            // Check if the meta path is found
-            if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
-                throw new DatasetNotFoundException(metaPathDir.toString());
-            }
-        } catch (IllegalArgumentException e) {
-            // if the base path is file:///, then we have a IllegalArgumentException
-            throw new DatasetNotFoundException(metaPathDir.toString());
-        }
-        catch (IOException e) {
-            throw new HoodieIOException(
-                "Could not check if dataset " + basePathDir + " is valid dataset", e);
-        }
+  private static String getErrorMessage(String basePath) {
+    return "Hoodie dataset not found in path " + basePath;
+  }
+
+  public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
+      throws DatasetNotFoundException {
+    // Check if the base path is found
+    try {
+      if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
+        throw new DatasetNotFoundException(basePathDir.toString());
+      }
+      // Check if the meta path is found
+      if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
+        throw new DatasetNotFoundException(metaPathDir.toString());
+      }
+    } catch (IllegalArgumentException e) {
+      // if the base path is file:///, then we have a IllegalArgumentException
+      throw new DatasetNotFoundException(metaPathDir.toString());
+    } catch (IOException e) {
+      throw new HoodieIOException(
+          "Could not check if dataset " + basePathDir + " is valid dataset", e);
     }
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieException.java
index 4c933826e..b216b4710 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieException.java
@@ -19,39 +19,34 @@ package com.uber.hoodie.exception;
 import java.io.Serializable;
 
 /**
- * <p>
- * Exception thrown for Hoodie failures. The root of
- * the exception hierarchy.
- * </p>
- * <p>
- * Hoodie Write/Read clients will throw this exception if
- * any of its operations fail. This is a runtime (unchecked) exception.
- * </p>
- *
+ * <p> Exception thrown for Hoodie failures. The root of the exception hierarchy. </p> <p> Hoodie
+ * Write/Read clients will throw this exception if any of its operations fail. This is a runtime
+ * (unchecked) exception. </p>
  */
 public class HoodieException extends RuntimeException implements Serializable {
-    public HoodieException() {
-        super();
-    }
 
-    public HoodieException(String message) {
-        super(message);
-    }
+  public HoodieException() {
+    super();
+  }
 
-    public HoodieException(String message, Throwable t) {
-        super(message, t);
-    }
+  public HoodieException(String message) {
+    super(message);
+  }
 
-    public HoodieException(Throwable t) {
-        super(t);
-    }
+  public HoodieException(String message, Throwable t) {
+    super(message, t);
+  }
 
-    protected static String format(String message, Object... args) {
-        String[] argStrings = new String[args.length];
-        for (int i = 0; i < args.length; i += 1) {
-            argStrings[i] = String.valueOf(args[i]);
-        }
-        return String.format(String.valueOf(message), (Object[]) argStrings);
+  public HoodieException(Throwable t) {
+    super(t);
+  }
+
+  protected static String format(String message, Object... args) {
+    String[] argStrings = new String[args.length];
+    for (int i = 0; i < args.length; i += 1) {
+      argStrings[i] = String.valueOf(args[i]);
     }
+    return String.format(String.valueOf(message), (Object[]) argStrings);
+  }
 
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIOException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIOException.java
index 74f7ed164..7353de940 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIOException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIOException.java
@@ -19,23 +19,22 @@ package com.uber.hoodie.exception;
 import java.io.IOException;
 
 /**
- * <p>
- * Exception thrown for dataset IO-related failures.
- * </p>
+ * <p> Exception thrown for dataset IO-related failures. </p>
  */
 public class HoodieIOException extends HoodieException {
-    private IOException ioException;
 
-    public HoodieIOException(String msg, IOException t) {
-        super(msg, t);
-        this.ioException = t;
-    }
+  private IOException ioException;
 
-    public HoodieIOException(String msg) {
-        super(msg);
-    }
+  public HoodieIOException(String msg, IOException t) {
+    super(msg, t);
+    this.ioException = t;
+  }
 
-    public IOException getIOException() {
-        return ioException;
-    }
+  public HoodieIOException(String msg) {
+    super(msg);
+  }
+
+  public IOException getIOException() {
+    return ioException;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIndexException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIndexException.java
index 93da5b9d8..ae46893a7 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIndexException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieIndexException.java
@@ -17,16 +17,15 @@
 package com.uber.hoodie.exception;
 
 /**
- * <p>
- * Exception thrown for HoodieIndex related errors.
- * </p>
+ * <p> Exception thrown for HoodieIndex related errors. </p>
  */
 public class HoodieIndexException extends HoodieException {
-    public HoodieIndexException(String msg) {
-        super(msg);
-    }
 
-    public HoodieIndexException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieIndexException(String msg) {
+    super(msg);
+  }
+
+  public HoodieIndexException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieNotSupportedException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieNotSupportedException.java
index 2305df3ab..65cc63093 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieNotSupportedException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieNotSupportedException.java
@@ -17,7 +17,8 @@
 package com.uber.hoodie.exception;
 
 public class HoodieNotSupportedException extends HoodieException {
-    public HoodieNotSupportedException(String errorMsg) {
-        super(errorMsg);
-    }
+
+  public HoodieNotSupportedException(String errorMsg) {
+    super(errorMsg);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieRecordMissingException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieRecordMissingException.java
index 72b1d29a2..0316e4d29 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieRecordMissingException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/HoodieRecordMissingException.java
@@ -19,17 +19,15 @@ package com.uber.hoodie.exception;
 import com.uber.hoodie.common.model.HoodieRecord;
 
 /**
- * <p>
- * Exception throws when indexing fails to locate the hoodie record.
- * HoodieRecord current location and partition path does not match.
- * This is an unrecoverable error
- * </p>
+ * <p> Exception throws when indexing fails to locate the hoodie record. HoodieRecord current
+ * location and partition path does not match. This is an unrecoverable error </p>
  */
 public class HoodieRecordMissingException extends HoodieException {
-    public HoodieRecordMissingException(HoodieRecord record) {
-        super(
-            "Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
-                + " in current location " + record.getCurrentLocation()
-                + " is not found in the partition");
-    }
+
+  public HoodieRecordMissingException(HoodieRecord record) {
+    super(
+        "Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
+            + " in current location " + record.getCurrentLocation()
+            + " is not found in the partition");
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidDatasetException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidDatasetException.java
index e80ae306a..60fcbdf0a 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidDatasetException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidDatasetException.java
@@ -17,16 +17,15 @@
 package com.uber.hoodie.exception;
 
 /**
- * <p>
- * Exception thrown to indicate that a hoodie dataset is invalid
- * <p>
+ * <p> Exception thrown to indicate that a hoodie dataset is invalid <p>
  */
 public class InvalidDatasetException extends HoodieException {
-    public InvalidDatasetException(String basePath) {
-        super(getErrorMessage(basePath));
-    }
 
-    private static String getErrorMessage(String basePath) {
-        return "Invalid Hoodie Dataset. " + basePath;
-    }
+  public InvalidDatasetException(String basePath) {
+    super(getErrorMessage(basePath));
+  }
+
+  private static String getErrorMessage(String basePath) {
+    return "Invalid Hoodie Dataset. " + basePath;
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidHoodiePathException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidHoodiePathException.java
index 2d3f342b5..3ab4e5fdf 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidHoodiePathException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/InvalidHoodiePathException.java
@@ -19,7 +19,8 @@ package com.uber.hoodie.exception;
 import org.apache.hadoop.fs.Path;
 
 public class InvalidHoodiePathException extends HoodieException {
-    public InvalidHoodiePathException(Path path, String type) {
-        super("Invalid path " + path + " of type " + type);
-    }
+
+  public InvalidHoodiePathException(Path path, String type) {
+    super("Invalid path " + path + " of type " + type);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/MetadataNotFoundException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/MetadataNotFoundException.java
index 8be9ff401..19a461107 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/MetadataNotFoundException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/MetadataNotFoundException.java
@@ -22,11 +22,12 @@ package com.uber.hoodie.exception;
  * Thrown when expected metadata is not found
  */
 public class MetadataNotFoundException extends HoodieException {
-    public MetadataNotFoundException(String msg) {
-        super(msg);
-    }
 
-    public MetadataNotFoundException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public MetadataNotFoundException(String msg) {
+    super(msg);
+  }
+
+  public MetadataNotFoundException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
diff --git a/hoodie-common/src/main/java/com/uber/hoodie/exception/SchemaCompatabilityException.java b/hoodie-common/src/main/java/com/uber/hoodie/exception/SchemaCompatabilityException.java
index 773f4e53d..420d8c6d4 100644
--- a/hoodie-common/src/main/java/com/uber/hoodie/exception/SchemaCompatabilityException.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/exception/SchemaCompatabilityException.java
@@ -17,15 +17,16 @@
 package com.uber.hoodie.exception;
 
 public class SchemaCompatabilityException extends HoodieException {
-    public SchemaCompatabilityException(String message) {
-        super(message);
-    }
 
-    public SchemaCompatabilityException(String message, Throwable t) {
-        super(message, t);
-    }
+  public SchemaCompatabilityException(String message) {
+    super(message);
+  }
 
-    public SchemaCompatabilityException(Throwable t) {
-        super(t);
-    }
+  public SchemaCompatabilityException(String message, Throwable t) {
+    super(message, t);
+  }
+
+  public SchemaCompatabilityException(Throwable t) {
+    super(t);
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/TestBloomFilter.java b/hoodie-common/src/test/java/com/uber/hoodie/common/TestBloomFilter.java
index 54f93c49d..6c024a897 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/TestBloomFilter.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/TestBloomFilter.java
@@ -16,30 +16,30 @@
 
 package com.uber.hoodie.common;
 
+import java.io.IOException;
 import org.junit.Test;
 
-import java.io.*;
-
 public class
 
 TestBloomFilter {
-    @Test
-    public void testAddKey() {
-        BloomFilter filter = new BloomFilter(100, 0.0000001);
-        filter.add("key1");
-        assert (filter.mightContain("key1"));
-    }
 
-    @Test
-    public void testSerialize() throws IOException, ClassNotFoundException {
-        BloomFilter filter = new BloomFilter(1000, 0.0000001);
-        filter.add("key1");
-        filter.add("key2");
-        String filterStr = filter.serializeToString();
+  @Test
+  public void testAddKey() {
+    BloomFilter filter = new BloomFilter(100, 0.0000001);
+    filter.add("key1");
+    assert (filter.mightContain("key1"));
+  }
 
-        // Rebuild
-        BloomFilter newFilter = new BloomFilter(filterStr);
-        assert (newFilter.mightContain("key1"));
-        assert (newFilter.mightContain("key2"));
-    }
+  @Test
+  public void testSerialize() throws IOException, ClassNotFoundException {
+    BloomFilter filter = new BloomFilter(1000, 0.0000001);
+    filter.add("key1");
+    filter.add("key2");
+    String filterStr = filter.serializeToString();
+
+    // Rebuild
+    BloomFilter newFilter = new BloomFilter(filterStr);
+    assert (newFilter.mightContain("key1"));
+    assert (newFilter.mightContain("key2"));
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/HdfsTestService.java b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/HdfsTestService.java
index 8dd9a799e..0b3a6bdeb 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/HdfsTestService.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/HdfsTestService.java
@@ -19,6 +19,8 @@ package com.uber.hoodie.common.minicluster;
 
 import com.google.common.base.Preconditions;
 import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -27,140 +29,129 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.IOException;
-
 /**
  * An HDFS minicluster service implementation.
  */
 public class HdfsTestService {
 
-    private static final Logger logger = LoggerFactory.getLogger(HdfsTestService.class);
+  private static final Logger logger = LoggerFactory.getLogger(HdfsTestService.class);
 
-    /**
-     * Configuration settings
-     */
-    private Configuration hadoopConf;
-    private String workDir;
-    private String bindIP = "127.0.0.1";
-    private int namenodeRpcPort = 8020;
-    private int namenodeHttpPort = 50070;
-    private int datanodePort = 50010;
-    private int datanodeIpcPort = 50020;
-    private int datanodeHttpPort = 50075;
+  /**
+   * Configuration settings
+   */
+  private Configuration hadoopConf;
+  private String workDir;
+  private String bindIP = "127.0.0.1";
+  private int namenodeRpcPort = 8020;
+  private int namenodeHttpPort = 50070;
+  private int datanodePort = 50010;
+  private int datanodeIpcPort = 50020;
+  private int datanodeHttpPort = 50075;
 
-    /**
-     * Embedded HDFS cluster
-     */
-    private MiniDFSCluster miniDfsCluster;
+  /**
+   * Embedded HDFS cluster
+   */
+  private MiniDFSCluster miniDfsCluster;
 
-    public HdfsTestService() {
-        hadoopConf = new Configuration();
-        workDir = Files.createTempDir().getAbsolutePath();
+  public HdfsTestService() {
+    hadoopConf = new Configuration();
+    workDir = Files.createTempDir().getAbsolutePath();
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
+
+  public MiniDFSCluster start(boolean format) throws IOException {
+    Preconditions
+        .checkState(workDir != null, "The work dir must be set before starting cluster.");
+
+    if (hadoopConf == null) {
+      hadoopConf = new Configuration();
     }
 
-    public Configuration getHadoopConf() {
-        return hadoopConf;
+    // If clean, then remove the work dir so we can start fresh.
+    String localDFSLocation = getDFSLocation(workDir);
+    if (format) {
+      logger.info(
+          "Cleaning HDFS cluster data at: " + localDFSLocation + " and starting fresh.");
+      File file = new File(localDFSLocation);
+      FileUtils.deleteDirectory(file);
     }
 
-    public MiniDFSCluster start(boolean format) throws IOException {
-        Preconditions
-            .checkState(workDir != null, "The work dir must be set before starting cluster.");
+    // Configure and start the HDFS cluster
+    // boolean format = shouldFormatDFSCluster(localDFSLocation, clean);
+    hadoopConf = configureDFSCluster(hadoopConf, localDFSLocation, bindIP, namenodeRpcPort,
+        namenodeHttpPort, datanodePort, datanodeIpcPort, datanodeHttpPort);
+    miniDfsCluster = new MiniDFSCluster.Builder(hadoopConf).numDataNodes(1).format(format)
+        .checkDataNodeAddrConfig(true).checkDataNodeHostConfig(true).build();
+    logger.info("HDFS Minicluster service started.");
+    return miniDfsCluster;
+  }
 
-        if (hadoopConf == null) {
-            hadoopConf = new Configuration();
-        }
+  public void stop() throws IOException {
+    miniDfsCluster.shutdown();
+    logger.info("HDFS Minicluster service shut down.");
+    miniDfsCluster = null;
+    hadoopConf = null;
+  }
 
-        // If clean, then remove the work dir so we can start fresh.
-        String localDFSLocation = getDFSLocation(workDir);
-        if (format) {
-            logger.info(
-                "Cleaning HDFS cluster data at: " + localDFSLocation + " and starting fresh.");
-            File file = new File(localDFSLocation);
-            FileUtils.deleteDirectory(file);
-        }
+  /**
+   * Get the location on the local FS where we store the HDFS data.
+   *
+   * @param baseFsLocation The base location on the local filesystem we have write access to create
+   * dirs.
+   * @return The location for HDFS data.
+   */
+  private static String getDFSLocation(String baseFsLocation) {
+    return baseFsLocation + Path.SEPARATOR + "dfs";
+  }
 
-        // Configure and start the HDFS cluster
-        // boolean format = shouldFormatDFSCluster(localDFSLocation, clean);
-        hadoopConf = configureDFSCluster(hadoopConf, localDFSLocation, bindIP, namenodeRpcPort,
-            namenodeHttpPort, datanodePort, datanodeIpcPort, datanodeHttpPort);
-        miniDfsCluster = new MiniDFSCluster.Builder(hadoopConf).numDataNodes(1).format(format)
-            .checkDataNodeAddrConfig(true).checkDataNodeHostConfig(true).build();
-        logger.info("HDFS Minicluster service started.");
-        return miniDfsCluster;
+  /**
+   * Returns true if we should format the DFS Cluster. We'll format if clean is true, or if the
+   * dfsFsLocation does not exist.
+   *
+   * @param localDFSLocation The location on the local FS to hold the HDFS metadata and block data
+   * @param clean Specifies if we want to start a clean cluster
+   * @return Returns true if we should format a DFSCluster, otherwise false
+   */
+  private static boolean shouldFormatDFSCluster(String localDFSLocation, boolean clean) {
+    boolean format = true;
+    File f = new File(localDFSLocation);
+    if (f.exists() && f.isDirectory() && !clean) {
+      format = false;
     }
+    return format;
+  }
 
-    public void stop() throws IOException {
-        miniDfsCluster.shutdown();
-        logger.info("HDFS Minicluster service shut down.");
-        miniDfsCluster = null;
-        hadoopConf = null;
-    }
+  /**
+   * Configure the DFS Cluster before launching it.
+   *
+   * @param config The already created Hadoop configuration we'll further configure for HDFS
+   * @param localDFSLocation The location on the local filesystem where cluster data is stored
+   * @param bindIP An IP address we want to force the datanode and namenode to bind to.
+   * @return The updated Configuration object.
+   */
+  private static Configuration configureDFSCluster(Configuration config, String localDFSLocation,
+      String bindIP, int namenodeRpcPort, int namenodeHttpPort, int datanodePort,
+      int datanodeIpcPort, int datanodeHttpPort) {
 
-    /**
-     * Get the location on the local FS where we store the HDFS data.
-     *
-     * @param baseFsLocation The base location on the local filesystem we have write access to
-     *                       create dirs.
-     * @return The location for HDFS data.
-     */
-    private static String getDFSLocation(String baseFsLocation) {
-        return baseFsLocation + Path.SEPARATOR + "dfs";
-    }
-
-    /**
-     * Returns true if we should format the DFS Cluster. We'll format if clean is
-     * true, or if the dfsFsLocation does not exist.
-     *
-     * @param localDFSLocation The location on the local FS to hold the HDFS metadata and block
-     *                         data
-     * @param clean            Specifies if we want to start a clean cluster
-     * @return Returns true if we should format a DFSCluster, otherwise false
-     */
-    private static boolean shouldFormatDFSCluster(String localDFSLocation, boolean clean) {
-        boolean format = true;
-        File f = new File(localDFSLocation);
-        if (f.exists() && f.isDirectory() && !clean) {
-            format = false;
-        }
-        return format;
-    }
-
-    /**
-     * Configure the DFS Cluster before launching it.
-     *
-     * @param config           The already created Hadoop configuration we'll further configure
-     *                         for HDFS
-     * @param localDFSLocation The location on the local filesystem where cluster data is stored
-     * @param bindIP           An IP address we want to force the datanode and namenode to bind
-     *                         to.
-     * @param namenodeRpcPort
-     * @param namenodeHttpPort
-     * @param datanodePort
-     * @param datanodeIpcPort
-     * @param datanodeHttpPort
-     * @return The updated Configuration object.
-     */
-    private static Configuration configureDFSCluster(Configuration config, String localDFSLocation,
-        String bindIP, int namenodeRpcPort, int namenodeHttpPort, int datanodePort,
-        int datanodeIpcPort, int datanodeHttpPort) {
-
-        logger.info("HDFS force binding to ip: " + bindIP);
-        config.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + bindIP + ":" + namenodeRpcPort);
-        config.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, bindIP + ":" + datanodePort);
-        config.set(DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY, bindIP + ":" + datanodeIpcPort);
-        config.set(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, bindIP + ":" + datanodeHttpPort);
-        // When a datanode registers with the namenode, the Namenode do a hostname
-        // check of the datanode which will fail on OpenShift due to reverse DNS
-        // issues with the internal IP addresses. This config disables that check,
-        // and will allow a datanode to connect regardless.
-        config.setBoolean("dfs.namenode.datanode.registration.ip-hostname-check", false);
-        config.set("hdfs.minidfs.basedir", localDFSLocation);
-        // allow current user to impersonate others
-        String user = System.getProperty("user.name");
-        config.set("hadoop.proxyuser." + user + ".groups", "*");
-        config.set("hadoop.proxyuser." + user + ".hosts", "*");
-        return config;
-    }
+    logger.info("HDFS force binding to ip: " + bindIP);
+    config.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + bindIP + ":" + namenodeRpcPort);
+    config.set(DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY, bindIP + ":" + datanodePort);
+    config.set(DFSConfigKeys.DFS_DATANODE_IPC_ADDRESS_KEY, bindIP + ":" + datanodeIpcPort);
+    config.set(DFSConfigKeys.DFS_DATANODE_HTTP_ADDRESS_KEY, bindIP + ":" + datanodeHttpPort);
+    // When a datanode registers with the namenode, the Namenode do a hostname
+    // check of the datanode which will fail on OpenShift due to reverse DNS
+    // issues with the internal IP addresses. This config disables that check,
+    // and will allow a datanode to connect regardless.
+    config.setBoolean("dfs.namenode.datanode.registration.ip-hostname-check", false);
+    config.set("hdfs.minidfs.basedir", localDFSLocation);
+    // allow current user to impersonate others
+    String user = System.getProperty("user.name");
+    config.set("hadoop.proxyuser." + user + ".groups", "*");
+    config.set("hadoop.proxyuser." + user + ".hosts", "*");
+    return config;
+  }
 
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/MiniClusterUtil.java b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/MiniClusterUtil.java
index 11f9a45dd..1633e3d48 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/MiniClusterUtil.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/MiniClusterUtil.java
@@ -16,38 +16,38 @@
 
 package com.uber.hoodie.common.minicluster;
 
+import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.zookeeper.server.ZooKeeperServer;
 
-import java.io.IOException;
-
 public class MiniClusterUtil {
-    private static MiniDFSCluster dfsCluster;
-    private static ZooKeeperServer zkServer;
-    public static Configuration configuration;
-    public static FileSystem fileSystem;
 
-    public static void setUp() throws IOException, InterruptedException {
-        if (dfsCluster == null) {
-            HdfsTestService service = new HdfsTestService();
-            dfsCluster = service.start(true);
-            configuration = service.getHadoopConf();
-        }
-        if (zkServer == null) {
-            ZookeeperTestService zkService = new ZookeeperTestService(configuration);
-            zkServer = zkService.start();
-        }
-        fileSystem = FileSystem.get(configuration);
-    }
+  private static MiniDFSCluster dfsCluster;
+  private static ZooKeeperServer zkServer;
+  public static Configuration configuration;
+  public static FileSystem fileSystem;
 
-    public static void shutdown() {
-        if (dfsCluster != null) {
-            dfsCluster.shutdown();
-        }
-        if (zkServer != null) {
-            zkServer.shutdown();
-        }
+  public static void setUp() throws IOException, InterruptedException {
+    if (dfsCluster == null) {
+      HdfsTestService service = new HdfsTestService();
+      dfsCluster = service.start(true);
+      configuration = service.getHadoopConf();
     }
+    if (zkServer == null) {
+      ZookeeperTestService zkService = new ZookeeperTestService(configuration);
+      zkServer = zkService.start();
+    }
+    fileSystem = FileSystem.get(configuration);
+  }
+
+  public static void shutdown() {
+    if (dfsCluster != null) {
+      dfsCluster.shutdown();
+    }
+    if (zkServer != null) {
+      zkServer.shutdown();
+    }
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/ZookeeperTestService.java b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/ZookeeperTestService.java
index 4ed32f114..290064e19 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/ZookeeperTestService.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/minicluster/ZookeeperTestService.java
@@ -18,14 +18,6 @@ package com.uber.hoodie.common.minicluster;
 
 import com.google.common.base.Preconditions;
 import com.google.common.io.Files;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.zookeeper.server.NIOServerCnxnFactory;
-import org.apache.zookeeper.server.ZooKeeperServer;
-import org.apache.zookeeper.server.persistence.FileTxnLog;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
@@ -34,208 +26,212 @@ import java.io.OutputStream;
 import java.io.Reader;
 import java.net.InetSocketAddress;
 import java.net.Socket;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.zookeeper.server.NIOServerCnxnFactory;
+import org.apache.zookeeper.server.ZooKeeperServer;
+import org.apache.zookeeper.server.persistence.FileTxnLog;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A Zookeeper minicluster service implementation.
  * <p/>
- * This class was ripped from MiniZooKeeperCluster from the HBase tests. Changes
- * made include:
+ * This class was ripped from MiniZooKeeperCluster from the HBase tests. Changes made include:
  * <p/>
  * 1. It will now only launch 1 zookeeper server.
  * <p/>
- * 2. It will only attempt to bind to the port specified, and will fail if it
- * can't.
+ * 2. It will only attempt to bind to the port specified, and will fail if it can't.
  * <p/>
- * 3. The startup method now takes a bindAddress, which allows us to configure
- * which IP the ZK server binds to. This was not configurable in the original
- * class.
+ * 3. The startup method now takes a bindAddress, which allows us to configure which IP the ZK
+ * server binds to. This was not configurable in the original class.
  * <p/>
- * 4. The ZK cluster will re-use a data dir on the local filesystem if it
- * already exists instead of blowing it away.
+ * 4. The ZK cluster will re-use a data dir on the local filesystem if it already exists instead of
+ * blowing it away.
  */
 public class ZookeeperTestService {
 
-    private static final Logger logger = LoggerFactory.getLogger(ZookeeperTestService.class);
+  private static final Logger logger = LoggerFactory.getLogger(ZookeeperTestService.class);
 
-    private static final int TICK_TIME = 2000;
-    private static final int CONNECTION_TIMEOUT = 30000;
+  private static final int TICK_TIME = 2000;
+  private static final int CONNECTION_TIMEOUT = 30000;
 
-    /**
-     * Configuration settings
-     */
-    private Configuration hadoopConf;
-    private String workDir;
-    private Integer clientPort = 2828;
-    private String bindIP = "127.0.0.1";
-    private Boolean clean = false;
-    private int tickTime = 0;
+  /**
+   * Configuration settings
+   */
+  private Configuration hadoopConf;
+  private String workDir;
+  private Integer clientPort = 2828;
+  private String bindIP = "127.0.0.1";
+  private Boolean clean = false;
+  private int tickTime = 0;
 
-    /**
-     * Embedded ZooKeeper cluster
-     */
-    private NIOServerCnxnFactory standaloneServerFactory;
-    private ZooKeeperServer zooKeeperServer;
-    private boolean started = false;
+  /**
+   * Embedded ZooKeeper cluster
+   */
+  private NIOServerCnxnFactory standaloneServerFactory;
+  private ZooKeeperServer zooKeeperServer;
+  private boolean started = false;
 
-    public ZookeeperTestService(Configuration config) {
-        this.workDir = Files.createTempDir().getAbsolutePath();
-        this.hadoopConf = config;
+  public ZookeeperTestService(Configuration config) {
+    this.workDir = Files.createTempDir().getAbsolutePath();
+    this.hadoopConf = config;
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
+
+  public ZooKeeperServer start() throws IOException, InterruptedException {
+    Preconditions.checkState(workDir != null,
+        "The localBaseFsLocation must be set before starting cluster.");
+
+    setupTestEnv();
+    stop();
+
+    File dir = new File(workDir, "zookeeper").getAbsoluteFile();
+    recreateDir(dir, clean);
+    int tickTimeToUse;
+    if (this.tickTime > 0) {
+      tickTimeToUse = this.tickTime;
+    } else {
+      tickTimeToUse = TICK_TIME;
+    }
+    this.zooKeeperServer = new ZooKeeperServer(dir, dir, tickTimeToUse);
+    standaloneServerFactory = new NIOServerCnxnFactory();
+
+    // NOTE: Changed from the original, where InetSocketAddress was
+    // originally created to bind to the wildcard IP, we now configure it.
+    logger.info("Zookeeper force binding to: " + this.bindIP);
+    standaloneServerFactory.configure(new InetSocketAddress(bindIP, clientPort), 1000);
+
+    // Start up this ZK server
+    standaloneServerFactory.startup(zooKeeperServer);
+
+    String serverHostname;
+    if (bindIP.equals("0.0.0.0")) {
+      serverHostname = "localhost";
+    } else {
+      serverHostname = bindIP;
+    }
+    if (!waitForServerUp(serverHostname, clientPort, CONNECTION_TIMEOUT)) {
+      throw new IOException("Waiting for startup of standalone server");
     }
 
-    public Configuration getHadoopConf() {
-        return hadoopConf;
+    started = true;
+    logger.info("Zookeeper Minicluster service started on client port: " + clientPort);
+    return zooKeeperServer;
+  }
+
+  public void stop() throws IOException {
+    if (!started) {
+      return;
     }
 
-    public ZooKeeperServer start() throws IOException, InterruptedException {
-        Preconditions.checkState(workDir != null,
-            "The localBaseFsLocation must be set before starting cluster.");
-
-        setupTestEnv();
-        stop();
-
-        File dir = new File(workDir, "zookeeper").getAbsoluteFile();
-        recreateDir(dir, clean);
-        int tickTimeToUse;
-        if (this.tickTime > 0) {
-            tickTimeToUse = this.tickTime;
-        } else {
-            tickTimeToUse = TICK_TIME;
-        }
-        this.zooKeeperServer = new ZooKeeperServer(dir, dir, tickTimeToUse);
-        standaloneServerFactory = new NIOServerCnxnFactory();
-
-        // NOTE: Changed from the original, where InetSocketAddress was
-        // originally created to bind to the wildcard IP, we now configure it.
-        logger.info("Zookeeper force binding to: " + this.bindIP);
-        standaloneServerFactory.configure(new InetSocketAddress(bindIP, clientPort), 1000);
-
-        // Start up this ZK server
-        standaloneServerFactory.startup(zooKeeperServer);
-
-        String serverHostname;
-        if (bindIP.equals("0.0.0.0")) {
-            serverHostname = "localhost";
-        } else {
-            serverHostname = bindIP;
-        }
-        if (!waitForServerUp(serverHostname, clientPort, CONNECTION_TIMEOUT)) {
-            throw new IOException("Waiting for startup of standalone server");
-        }
-
-        started = true;
-        logger.info("Zookeeper Minicluster service started on client port: " + clientPort);
-        return zooKeeperServer;
+    standaloneServerFactory.shutdown();
+    if (!waitForServerDown(clientPort, CONNECTION_TIMEOUT)) {
+      throw new IOException("Waiting for shutdown of standalone server");
     }
 
-    public void stop() throws IOException {
-        if (!started) {
-            return;
-        }
+    // clear everything
+    started = false;
+    standaloneServerFactory = null;
+    zooKeeperServer = null;
 
-        standaloneServerFactory.shutdown();
-        if (!waitForServerDown(clientPort, CONNECTION_TIMEOUT)) {
-            throw new IOException("Waiting for shutdown of standalone server");
-        }
+    logger.info("Zookeeper Minicluster service shut down.");
+  }
 
-        // clear everything
-        started = false;
-        standaloneServerFactory = null;
-        zooKeeperServer = null;
-
-        logger.info("Zookeeper Minicluster service shut down.");
+  private void recreateDir(File dir, boolean clean) throws IOException {
+    if (dir.exists() && clean) {
+      FileUtil.fullyDelete(dir);
+    } else if (dir.exists() && !clean) {
+      // the directory's exist, and we don't want to clean, so exit
+      return;
     }
+    try {
+      dir.mkdirs();
+    } catch (SecurityException e) {
+      throw new IOException("creating dir: " + dir, e);
+    }
+  }
 
-    private void recreateDir(File dir, boolean clean) throws IOException {
-        if (dir.exists() && clean) {
-            FileUtil.fullyDelete(dir);
-        } else if (dir.exists() && !clean) {
-            // the directory's exist, and we don't want to clean, so exit
-            return;
-        }
+  // / XXX: From o.a.zk.t.ClientBase
+  private static void setupTestEnv() {
+    // during the tests we run with 100K prealloc in the logs.
+    // on windows systems prealloc of 64M was seen to take ~15seconds
+    // resulting in test failure (client timeout on first session).
+    // set env and directly in order to handle static init/gc issues
+    System.setProperty("zookeeper.preAllocSize", "100");
+    FileTxnLog.setPreallocSize(100 * 1024);
+  }
+
+  // XXX: From o.a.zk.t.ClientBase
+  private static boolean waitForServerDown(int port, long timeout) {
+    long start = System.currentTimeMillis();
+    while (true) {
+      try {
+        Socket sock = new Socket("localhost", port);
         try {
-            dir.mkdirs();
-        } catch (SecurityException e) {
-            throw new IOException("creating dir: " + dir, e);
+          OutputStream outstream = sock.getOutputStream();
+          outstream.write("stat".getBytes());
+          outstream.flush();
+        } finally {
+          sock.close();
         }
+      } catch (IOException e) {
+        return true;
+      }
+
+      if (System.currentTimeMillis() > start + timeout) {
+        break;
+      }
+      try {
+        Thread.sleep(250);
+      } catch (InterruptedException e) {
+        // ignore
+      }
     }
+    return false;
+  }
 
-    // / XXX: From o.a.zk.t.ClientBase
-    private static void setupTestEnv() {
-        // during the tests we run with 100K prealloc in the logs.
-        // on windows systems prealloc of 64M was seen to take ~15seconds
-        // resulting in test failure (client timeout on first session).
-        // set env and directly in order to handle static init/gc issues
-        System.setProperty("zookeeper.preAllocSize", "100");
-        FileTxnLog.setPreallocSize(100 * 1024);
-    }
+  // XXX: From o.a.zk.t.ClientBase
+  private static boolean waitForServerUp(String hostname, int port, long timeout) {
+    long start = System.currentTimeMillis();
+    while (true) {
+      try {
+        Socket sock = new Socket(hostname, port);
+        BufferedReader reader = null;
+        try {
+          OutputStream outstream = sock.getOutputStream();
+          outstream.write("stat".getBytes());
+          outstream.flush();
 
-    // XXX: From o.a.zk.t.ClientBase
-    private static boolean waitForServerDown(int port, long timeout) {
-        long start = System.currentTimeMillis();
-        while (true) {
-            try {
-                Socket sock = new Socket("localhost", port);
-                try {
-                    OutputStream outstream = sock.getOutputStream();
-                    outstream.write("stat".getBytes());
-                    outstream.flush();
-                } finally {
-                    sock.close();
-                }
-            } catch (IOException e) {
-                return true;
-            }
-
-            if (System.currentTimeMillis() > start + timeout) {
-                break;
-            }
-            try {
-                Thread.sleep(250);
-            } catch (InterruptedException e) {
-                // ignore
-            }
+          Reader isr = new InputStreamReader(sock.getInputStream());
+          reader = new BufferedReader(isr);
+          String line = reader.readLine();
+          if (line != null && line.startsWith("Zookeeper version:")) {
+            return true;
+          }
+        } finally {
+          sock.close();
+          if (reader != null) {
+            reader.close();
+          }
         }
-        return false;
-    }
-
-    // XXX: From o.a.zk.t.ClientBase
-    private static boolean waitForServerUp(String hostname, int port, long timeout) {
-        long start = System.currentTimeMillis();
-        while (true) {
-            try {
-                Socket sock = new Socket(hostname, port);
-                BufferedReader reader = null;
-                try {
-                    OutputStream outstream = sock.getOutputStream();
-                    outstream.write("stat".getBytes());
-                    outstream.flush();
-
-                    Reader isr = new InputStreamReader(sock.getInputStream());
-                    reader = new BufferedReader(isr);
-                    String line = reader.readLine();
-                    if (line != null && line.startsWith("Zookeeper version:")) {
-                        return true;
-                    }
-                } finally {
-                    sock.close();
-                    if (reader != null) {
-                        reader.close();
-                    }
-                }
-            } catch (IOException e) {
-                // ignore as this is expected
-                logger.info("server " + hostname + ":" + port + " not up " + e);
-            }
-
-            if (System.currentTimeMillis() > start + timeout) {
-                break;
-            }
-            try {
-                Thread.sleep(250);
-            } catch (InterruptedException e) {
-                // ignore
-            }
-        }
-        return false;
+      } catch (IOException e) {
+        // ignore as this is expected
+        logger.info("server " + hostname + ":" + port + " not up " + e);
+      }
+
+      if (System.currentTimeMillis() > start + timeout) {
+        break;
+      }
+      try {
+        Thread.sleep(250);
+      } catch (InterruptedException e) {
+        // ignore
+      }
     }
+    return false;
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java
index b316691ce..1ccca51b2 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/model/HoodieTestUtils.java
@@ -16,6 +16,9 @@
 
 package com.uber.hoodie.common.model;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
@@ -34,18 +37,6 @@ import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
 import com.uber.hoodie.common.util.AvroUtils;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RemoteIterator;
-import org.apache.hadoop.util.StringUtils;
-import org.junit.rules.TemporaryFolder;
-
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
@@ -64,224 +55,262 @@ import java.util.Random;
 import java.util.UUID;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.util.StringUtils;
+import org.junit.rules.TemporaryFolder;
 
 public class HoodieTestUtils {
-    public static FileSystem fs = FSUtils.getFs();
-    public static final String TEST_EXTENSION = ".test";
-    public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
-    public static final int DEFAULT_TASK_PARTITIONID = 1;
-    public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
-    private static Random rand = new Random(46474747);
 
-    public static void resetFS() {
-        HoodieTestUtils.fs = FSUtils.getFs();
+  public static FileSystem fs = FSUtils.getFs();
+  public static final String TEST_EXTENSION = ".test";
+  public static final String RAW_TRIPS_TEST_NAME = "raw_trips";
+  public static final int DEFAULT_TASK_PARTITIONID = 1;
+  public static final String[] DEFAULT_PARTITION_PATHS = {"2016/03/15", "2015/03/16", "2015/03/17"};
+  private static Random rand = new Random(46474747);
+
+  public static void resetFS() {
+    HoodieTestUtils.fs = FSUtils.getFs();
+  }
+
+  public static HoodieTableMetaClient init(String basePath) throws IOException {
+    return initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
+  }
+
+  public static HoodieTableMetaClient initTableType(String basePath, HoodieTableType tableType)
+      throws IOException {
+    Properties properties = new Properties();
+    properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, RAW_TRIPS_TEST_NAME);
+    properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
+    properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME,
+        HoodieAvroPayload.class.getName());
+    return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
+  }
+
+  public static HoodieTableMetaClient initOnTemp() throws IOException {
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    String basePath = folder.getRoot().getAbsolutePath();
+    return HoodieTestUtils.init(basePath);
+  }
+
+  public static String makeNewCommitTime() {
+    return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+  }
+
+  public static final void createCommitFiles(String basePath, String... commitTimes)
+      throws IOException {
+    for (String commitTime : commitTimes) {
+      new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
+          .makeCommitFileName(commitTime)).createNewFile();
     }
+  }
 
-    public static HoodieTableMetaClient init(String basePath) throws IOException {
-        return initTableType(basePath, HoodieTableType.COPY_ON_WRITE);
+  public static final void createInflightCommitFiles(String basePath, String... commitTimes)
+      throws IOException {
+    for (String commitTime : commitTimes) {
+      new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
+          .makeInflightCommitFileName(commitTime)).createNewFile();
     }
+  }
 
-    public static HoodieTableMetaClient initTableType(String basePath, HoodieTableType tableType) throws IOException {
-        Properties properties = new Properties();
-        properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, RAW_TRIPS_TEST_NAME);
-        properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
-        properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, HoodieAvroPayload.class.getName());
-        return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
+  public static final String createNewDataFile(String basePath, String partitionPath,
+      String commitTime) throws IOException {
+    String fileID = UUID.randomUUID().toString();
+    return createDataFile(basePath, partitionPath, commitTime, fileID);
+  }
+
+  public static final String createDataFile(String basePath, String partitionPath,
+      String commitTime, String fileID) throws IOException {
+    String folderPath = basePath + "/" + partitionPath + "/";
+    new File(folderPath).mkdirs();
+    new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID))
+        .createNewFile();
+    return fileID;
+  }
+
+  public static final String createNewLogFile(String basePath, String partitionPath,
+      String commitTime, String fileID, Optional<Integer> version) throws IOException {
+    String folderPath = basePath + "/" + partitionPath + "/";
+    boolean makeDir = fs.mkdirs(new Path(folderPath));
+    if (!makeDir) {
+      throw new IOException("cannot create directory for path " + folderPath);
     }
-
-    public static HoodieTableMetaClient initOnTemp() throws IOException {
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        String basePath = folder.getRoot().getAbsolutePath();
-        return HoodieTestUtils.init(basePath);
+    boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils
+        .makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_TASK_PARTITIONID))));
+    if (!createFile) {
+      throw new IOException(StringUtils
+          .format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
     }
+    return fileID;
+  }
 
-    public static String makeNewCommitTime() {
-        return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+  public static final void createCompactionCommitFiles(String basePath, String... commitTimes)
+      throws IOException {
+    for (String commitTime : commitTimes) {
+      boolean createFile = fs.createNewFile(new Path(
+          basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
+              .makeCompactionFileName(commitTime)));
+      if (!createFile) {
+        throw new IOException("cannot create commit file for commit " + commitTime);
+      }
     }
+  }
 
-    public static final void createCommitFiles(String basePath, String... commitTimes) throws IOException {
-        for (String commitTime: commitTimes) {
-            new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTimeline.makeCommitFileName(commitTime)).createNewFile();
-        }
+  public static final String getDataFilePath(String basePath, String partitionPath,
+      String commitTime, String fileID) throws IOException {
+    return basePath + "/" + partitionPath + "/" + FSUtils
+        .makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID);
+  }
+
+  public static final String getLogFilePath(String basePath, String partitionPath,
+      String commitTime, String fileID, Optional<Integer> version) throws IOException {
+    return basePath + "/" + partitionPath + "/" + FSUtils
+        .makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_TASK_PARTITIONID));
+  }
+
+  public static final String getCommitFilePath(String basePath, String commitTime)
+      throws IOException {
+    return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
+        + HoodieTimeline.COMMIT_EXTENSION;
+  }
+
+  public static final boolean doesDataFileExist(String basePath, String partitionPath,
+      String commitTime, String fileID) throws IOException {
+    return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists();
+  }
+
+  public static final boolean doesLogFileExist(String basePath, String partitionPath,
+      String commitTime, String fileID, Optional<Integer> version) throws IOException {
+    return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists();
+  }
+
+  public static final boolean doesCommitExist(String basePath, String commitTime) {
+    return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
+        + HoodieTimeline.COMMIT_EXTENSION).exists();
+  }
+
+  public static final boolean doesInflightExist(String basePath, String commitTime) {
+    return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime
+        + HoodieTimeline.INFLIGHT_EXTENSION).exists();
+  }
+
+  public static String makeInflightTestFileName(String instant) {
+    return instant + TEST_EXTENSION + HoodieTimeline.INFLIGHT_EXTENSION;
+  }
+
+  public static void createCleanFiles(String basePath, String commitTime) throws IOException {
+    Path commitFile =
+        new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline
+            .makeCleanerFileName(commitTime));
+    FileSystem fs = FSUtils.getFs();
+    FSDataOutputStream os = fs.create(commitFile, true);
+    try {
+      HoodieCleanStat cleanStats = new HoodieCleanStat(
+          HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
+          DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)],
+          new ArrayList<>(), new ArrayList<>(),
+          new ArrayList<>(), commitTime);
+      // Create the clean metadata
+      HoodieCleanMetadata cleanMetadata =
+          AvroUtils.convertCleanMetadata(commitTime, Optional.of(0L), Arrays.asList(cleanStats));
+      // Write empty clean metadata
+      os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
+    } finally {
+      os.close();
     }
+  }
 
-    public static final void createInflightCommitFiles(String basePath, String... commitTimes) throws IOException {
-        for (String commitTime: commitTimes) {
-            new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTimeline.makeInflightCommitFileName(commitTime)).createNewFile();
-        }
+  public static String makeTestFileName(String instant) {
+    return instant + TEST_EXTENSION;
+  }
+
+  public static String makeCommitFileName(String instant) {
+    return instant + ".commit";
+  }
+
+  public static void assertStreamEquals(String message, Stream<?> expected, Stream<?> actual) {
+    Iterator<?> iter1 = expected.iterator(), iter2 = actual.iterator();
+    while (iter1.hasNext() && iter2.hasNext()) {
+      assertEquals(message, iter1.next(), iter2.next());
     }
+    assert !iter1.hasNext() && !iter2.hasNext();
+  }
 
-    public static final String createNewDataFile(String basePath, String partitionPath, String commitTime) throws IOException {
-        String fileID = UUID.randomUUID().toString();
-        return createDataFile(basePath, partitionPath, commitTime, fileID);
-    }
-
-    public static final String createDataFile(String basePath, String partitionPath, String commitTime, String fileID) throws IOException {
-        String folderPath = basePath + "/" + partitionPath + "/";
-        new File(folderPath).mkdirs();
-        new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID)).createNewFile();
-        return fileID;
-    }
-
-    public static final String createNewLogFile(String basePath, String partitionPath, String commitTime, String fileID, Optional<Integer> version) throws IOException {
-        String folderPath = basePath + "/" + partitionPath + "/";
-        boolean makeDir = fs.mkdirs(new Path(folderPath));
-        if(!makeDir) {
-            throw new IOException("cannot create directory for path " + folderPath);
-        }
-        boolean createFile = fs.createNewFile(new Path(folderPath + FSUtils.makeLogFileName(fileID, ".log",commitTime, version.orElse(DEFAULT_TASK_PARTITIONID))));
-        if(!createFile) {
-            throw new IOException(StringUtils.format("cannot create data file for commit %s and fileId %s", commitTime, fileID));
-        }
-        return fileID;
-    }
-
-    public static final void createCompactionCommitFiles(String basePath, String... commitTimes) throws IOException {
-        for (String commitTime: commitTimes) {
-            boolean createFile = fs.createNewFile(new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + HoodieTimeline.makeCompactionFileName(commitTime)));
-            if(!createFile) {
-                throw new IOException("cannot create commit file for commit " + commitTime);
-            }
-        }
-    }
-
-    public static final String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) throws IOException {
-        return basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID);
-    }
-
-    public static final String getLogFilePath(String basePath, String partitionPath, String commitTime, String fileID, Optional<Integer> version) throws IOException {
-        return basePath + "/" + partitionPath + "/" + FSUtils.makeLogFileName(fileID, ".log", commitTime, version.orElse(DEFAULT_TASK_PARTITIONID));
-    }
-
-    public static final String getCommitFilePath(String basePath, String commitTime) throws IOException {
-        return basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION;
-    }
-
-    public static final boolean doesDataFileExist(String basePath, String partitionPath, String commitTime, String fileID) throws IOException {
-        return new File(getDataFilePath(basePath, partitionPath, commitTime, fileID)).exists();
-    }
-
-    public static final boolean doesLogFileExist(String basePath, String partitionPath, String commitTime, String fileID, Optional<Integer> version) throws IOException {
-        return new File(getLogFilePath(basePath, partitionPath, commitTime, fileID, version)).exists();
-    }
-
-    public static final boolean doesCommitExist(String basePath, String commitTime) {
-        return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTimeline.COMMIT_EXTENSION).exists();
-    }
-
-    public static final boolean doesInflightExist(String basePath, String commitTime) {
-        return new File(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME+ "/" + commitTime + HoodieTimeline.INFLIGHT_EXTENSION).exists();
-    }
-
-    public static String makeInflightTestFileName(String instant) {
-        return instant + TEST_EXTENSION + HoodieTimeline.INFLIGHT_EXTENSION;
-    }
-
-    public static void createCleanFiles(String basePath, String commitTime) throws IOException {
-        Path commitFile =
-                new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + HoodieTimeline.makeCleanerFileName(commitTime));
-        FileSystem fs = FSUtils.getFs();
-        FSDataOutputStream os = fs.create(commitFile, true);
-        try {
-            HoodieCleanStat cleanStats = new HoodieCleanStat(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS,
-                    DEFAULT_PARTITION_PATHS[rand.nextInt(DEFAULT_PARTITION_PATHS.length)],
-                    new ArrayList<>(), new ArrayList<>(),
-                    new ArrayList<>(), commitTime);
-            // Create the clean metadata
-            HoodieCleanMetadata cleanMetadata =
-                    AvroUtils.convertCleanMetadata(commitTime, Optional.of(0L), Arrays.asList(cleanStats));
-            // Write empty clean metadata
-            os.write(AvroUtils.serializeCleanMetadata(cleanMetadata).get());
-        } finally {
-            os.close();
-        }
-    }
-
-    public static String makeTestFileName(String instant) {
-        return instant + TEST_EXTENSION;
-    }
-
-    public static String makeCommitFileName(String instant) {
-        return instant + ".commit";
-    }
-
-    public static void assertStreamEquals(String message, Stream<?> expected, Stream<?> actual) {
-        Iterator<?> iter1 = expected.iterator(), iter2 = actual.iterator();
-        while(iter1.hasNext() && iter2.hasNext())
-            assertEquals(message, iter1.next(), iter2.next());
-        assert !iter1.hasNext() && !iter2.hasNext();
-    }
-
-    public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz)
-        throws IOException, ClassNotFoundException {
-        // Using Kyro as the default serializer in Spark Jobs
-        Kryo kryo = new Kryo();
-        kryo.register(HoodieTableMetaClient.class, new JavaSerializer());
-
-        ByteArrayOutputStream baos = new ByteArrayOutputStream();
-        Output output = new Output(baos);
-        kryo.writeObject(output, object);
-        output.close();
-
-        Input input = new Input(new ByteArrayInputStream(baos.toByteArray()));
-        T deseralizedObject = kryo.readObject(input, clazz);
-        input.close();
-        return deseralizedObject;
-    }
-
-    public static void writeRecordsToLogFiles(String basePath, Schema schema, List<HoodieRecord> updatedRecords) {
-        Map<HoodieRecordLocation, List<HoodieRecord>> groupedUpdated = updatedRecords.stream()
-            .collect(Collectors.groupingBy(HoodieRecord::getCurrentLocation));
-
-        groupedUpdated.entrySet().forEach(s -> {
-            HoodieRecordLocation location = s.getKey();
-            String partitionPath = s.getValue().get(0).getPartitionPath();
-
-            Writer logWriter;
-            try {
-                logWriter = HoodieLogFormat.newWriterBuilder()
-                    .onParentPath(new Path(basePath, partitionPath))
-                    .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
-                    .withFileId(location.getFileId())
-                    .overBaseCommit(location.getCommitTime())
-                    .withFs(fs).build();
-
-                Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-                metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, location.getCommitTime());
-                logWriter.appendBlock(new HoodieAvroDataBlock(s.getValue().stream().map(r -> {
-                    try {
-                        GenericRecord val = (GenericRecord) r.getData().getInsertValue(schema).get();
-                        HoodieAvroUtils.addHoodieKeyToRecord(val,
-                            r.getRecordKey(),
-                            r.getPartitionPath(),
-                            "");
-                        return (IndexedRecord) val;
-                    } catch (IOException e) {
-                        return null;
-                    }
-                }).collect(Collectors.toList()), schema, metadata));
-                logWriter.close();
-            } catch (Exception e) {
-                fail(e.toString());
-            }
-        });
-    }
-
-    public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath)
-        throws IOException {
-        RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true);
-        List<FileStatus> returns = Lists.newArrayList();
-        while(itr.hasNext()) {
-            LocatedFileStatus status = itr.next();
-            if(status.getPath().getName().contains(".parquet")) {
-                returns.add(status);
-            }
-        }
-        return returns.toArray(new FileStatus[returns.size()]);
+  public static <T extends Serializable> T serializeDeserialize(T object, Class<T> clazz)
+      throws IOException, ClassNotFoundException {
+    // Using Kyro as the default serializer in Spark Jobs
+    Kryo kryo = new Kryo();
+    kryo.register(HoodieTableMetaClient.class, new JavaSerializer());
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    Output output = new Output(baos);
+    kryo.writeObject(output, object);
+    output.close();
+
+    Input input = new Input(new ByteArrayInputStream(baos.toByteArray()));
+    T deseralizedObject = kryo.readObject(input, clazz);
+    input.close();
+    return deseralizedObject;
+  }
+
+  public static void writeRecordsToLogFiles(String basePath, Schema schema,
+      List<HoodieRecord> updatedRecords) {
+    Map<HoodieRecordLocation, List<HoodieRecord>> groupedUpdated = updatedRecords.stream()
+        .collect(Collectors.groupingBy(HoodieRecord::getCurrentLocation));
+
+    groupedUpdated.entrySet().forEach(s -> {
+      HoodieRecordLocation location = s.getKey();
+      String partitionPath = s.getValue().get(0).getPartitionPath();
+
+      Writer logWriter;
+      try {
+        logWriter = HoodieLogFormat.newWriterBuilder()
+            .onParentPath(new Path(basePath, partitionPath))
+            .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+            .withFileId(location.getFileId())
+            .overBaseCommit(location.getCommitTime())
+            .withFs(fs).build();
+
+        Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+        metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, location.getCommitTime());
+        logWriter.appendBlock(new HoodieAvroDataBlock(s.getValue().stream().map(r -> {
+          try {
+            GenericRecord val = (GenericRecord) r.getData().getInsertValue(schema).get();
+            HoodieAvroUtils.addHoodieKeyToRecord(val,
+                r.getRecordKey(),
+                r.getPartitionPath(),
+                "");
+            return (IndexedRecord) val;
+          } catch (IOException e) {
+            return null;
+          }
+        }).collect(Collectors.toList()), schema, metadata));
+        logWriter.close();
+      } catch (Exception e) {
+        fail(e.toString());
+      }
+    });
+  }
+
+  public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath)
+      throws IOException {
+    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true);
+    List<FileStatus> returns = Lists.newArrayList();
+    while (itr.hasNext()) {
+      LocatedFileStatus status = itr.next();
+      if (status.getPath().getName().contains(".parquet")) {
+        returns.add(status);
+      }
     }
+    return returns.toArray(new FileStatus[returns.size()]);
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/table/HoodieTableMetaClientTest.java b/hoodie-common/src/test/java/com/uber/hoodie/common/table/HoodieTableMetaClientTest.java
index b70e03c48..d771b8236 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/HoodieTableMetaClientTest.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/HoodieTableMetaClientTest.java
@@ -16,11 +16,20 @@
 
 package com.uber.hoodie.common.table;
 
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
 import com.google.common.collect.Lists;
 import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
+import java.io.IOException;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SequenceFile;
@@ -29,116 +38,106 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.IOException;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
 public class HoodieTableMetaClientTest {
-    private HoodieTableMetaClient metaClient;
-    private String basePath;
 
-    @Before
-    public void init() throws IOException {
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        this.basePath = folder.getRoot().getAbsolutePath();
-        metaClient = HoodieTestUtils.init(basePath);
-    }
+  private HoodieTableMetaClient metaClient;
+  private String basePath;
 
-    @Test
-    public void checkMetadata() {
-        assertEquals("Table name should be raw_trips", HoodieTestUtils.RAW_TRIPS_TEST_NAME,
-            metaClient.getTableConfig().getTableName());
-        assertEquals("Basepath should be the one assigned", basePath, metaClient.getBasePath());
-        assertEquals("Metapath should be ${basepath}/.hoodie", basePath + "/.hoodie",
-            metaClient.getMetaPath());
-    }
+  @Before
+  public void init() throws IOException {
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    this.basePath = folder.getRoot().getAbsolutePath();
+    metaClient = HoodieTestUtils.init(basePath);
+  }
 
-    @Test
-    public void checkSerDe() throws IOException, ClassNotFoundException {
-        // check if this object is serialized and de-serialized, we are able to read from the file system
-        HoodieTableMetaClient deseralizedMetaClient =
-            HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
-        assertNotNull(deseralizedMetaClient);
-        HoodieActiveTimeline commitTimeline = deseralizedMetaClient.getActiveTimeline();
-        HoodieInstant instant =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
-        commitTimeline.createInflight(instant);
-        commitTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
-        commitTimeline = commitTimeline.reload();
-        HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
-        assertEquals("Commit should be 1 and completed", completedInstant,
-            commitTimeline.getInstants().findFirst().get());
-        assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
-            commitTimeline.getInstantDetails(completedInstant).get());
-    }
+  @Test
+  public void checkMetadata() {
+    assertEquals("Table name should be raw_trips", HoodieTestUtils.RAW_TRIPS_TEST_NAME,
+        metaClient.getTableConfig().getTableName());
+    assertEquals("Basepath should be the one assigned", basePath, metaClient.getBasePath());
+    assertEquals("Metapath should be ${basepath}/.hoodie", basePath + "/.hoodie",
+        metaClient.getMetaPath());
+  }
 
-    @Test
-    public void checkCommitTimeline() throws IOException {
-        HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
-        HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
-        assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
+  @Test
+  public void checkSerDe() throws IOException, ClassNotFoundException {
+    // check if this object is serialized and de-serialized, we are able to read from the file system
+    HoodieTableMetaClient deseralizedMetaClient =
+        HoodieTestUtils.serializeDeserialize(metaClient, HoodieTableMetaClient.class);
+    assertNotNull(deseralizedMetaClient);
+    HoodieActiveTimeline commitTimeline = deseralizedMetaClient.getActiveTimeline();
+    HoodieInstant instant =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
+    commitTimeline.createInflight(instant);
+    commitTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
+    commitTimeline = commitTimeline.reload();
+    HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
+    assertEquals("Commit should be 1 and completed", completedInstant,
+        commitTimeline.getInstants().findFirst().get());
+    assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
+        commitTimeline.getInstantDetails(completedInstant).get());
+  }
 
-        HoodieInstant instant =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
-        activeTimeline.createInflight(instant);
-        activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
+  @Test
+  public void checkCommitTimeline() throws IOException {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline();
+    assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
 
-        // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached
-        activeTimeline = metaClient.getActiveTimeline();
-        activeCommitTimeline = activeTimeline.getCommitTimeline();
-        assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
+    HoodieInstant instant =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
+    activeTimeline.createInflight(instant);
+    activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes()));
 
-        HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
-        activeTimeline = activeTimeline.reload();
-        activeCommitTimeline = activeTimeline.getCommitTimeline();
-        assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty());
-        assertEquals("Commit should be 1", completedInstant,
-            activeCommitTimeline.getInstants().findFirst().get());
-        assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
-            activeCommitTimeline.getInstantDetails(completedInstant).get());
-    }
+    // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached
+    activeTimeline = metaClient.getActiveTimeline();
+    activeCommitTimeline = activeTimeline.getCommitTimeline();
+    assertTrue("Should be empty commit timeline", activeCommitTimeline.empty());
 
-    @Test
-    public void checkArchiveCommitTimeline() throws IOException {
-        Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getMetaPath());
-        SequenceFile.Writer writer = SequenceFile
-            .createWriter(HoodieTestUtils.fs.getConf(), SequenceFile.Writer.file(archiveLogPath),
-                SequenceFile.Writer.keyClass(Text.class),
-                SequenceFile.Writer.valueClass(Text.class));
+    HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant);
+    activeTimeline = activeTimeline.reload();
+    activeCommitTimeline = activeTimeline.getCommitTimeline();
+    assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty());
+    assertEquals("Commit should be 1", completedInstant,
+        activeCommitTimeline.getInstants().findFirst().get());
+    assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(),
+        activeCommitTimeline.getInstantDetails(completedInstant).get());
+  }
 
-        writer.append(new Text("1"), new Text("data1"));
-        writer.append(new Text("2"), new Text("data2"));
-        writer.append(new Text("3"), new Text("data3"));
+  @Test
+  public void checkArchiveCommitTimeline() throws IOException {
+    Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getMetaPath());
+    SequenceFile.Writer writer = SequenceFile
+        .createWriter(HoodieTestUtils.fs.getConf(), SequenceFile.Writer.file(archiveLogPath),
+            SequenceFile.Writer.keyClass(Text.class),
+            SequenceFile.Writer.valueClass(Text.class));
 
-        IOUtils.closeStream(writer);
+    writer.append(new Text("1"), new Text("data1"));
+    writer.append(new Text("2"), new Text("data2"));
+    writer.append(new Text("3"), new Text("data3"));
 
-        HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
+    IOUtils.closeStream(writer);
 
-        HoodieInstant instant1 =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
-        HoodieInstant instant2 =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
-        HoodieInstant instant3 =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
+    HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
 
-        assertEquals(Lists.newArrayList(instant1, instant2, instant3),
-            archivedTimeline.getInstants().collect(Collectors.toList()));
+    HoodieInstant instant1 =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
+    HoodieInstant instant2 =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "2");
+    HoodieInstant instant3 =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
 
-        assertArrayEquals(new Text("data1").getBytes(),
-            archivedTimeline.getInstantDetails(instant1).get());
-        assertArrayEquals(new Text("data2").getBytes(),
-            archivedTimeline.getInstantDetails(instant2).get());
-        assertArrayEquals(new Text("data3").getBytes(),
-            archivedTimeline.getInstantDetails(instant3).get());
-    }
+    assertEquals(Lists.newArrayList(instant1, instant2, instant3),
+        archivedTimeline.getInstants().collect(Collectors.toList()));
 
+    assertArrayEquals(new Text("data1").getBytes(),
+        archivedTimeline.getInstantDetails(instant1).get());
+    assertArrayEquals(new Text("data2").getBytes(),
+        archivedTimeline.getInstantDetails(instant2).get());
+    assertArrayEquals(new Text("data3").getBytes(),
+        archivedTimeline.getInstantDetails(instant3).get());
+  }
 
 
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java b/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
index 607cdaea5..8fc7fb46f 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
@@ -16,13 +16,18 @@
 
 package com.uber.hoodie.common.table.log;
 
+import static com.uber.hoodie.common.util.SchemaTestUtil.getSimpleSchema;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
 import com.google.common.collect.Maps;
 import com.uber.hoodie.common.minicluster.MiniClusterUtil;
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieTableType;
 import com.uber.hoodie.common.model.HoodieTestUtils;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.log.HoodieLogFormat.Reader;
 import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
 import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
@@ -35,6 +40,15 @@ import com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.common.util.SchemaTestUtil;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
@@ -48,22 +62,6 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
-import static com.uber.hoodie.common.util.SchemaTestUtil.getSimpleSchema;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
 @SuppressWarnings("Duplicates")
 public class HoodieLogFormatTest {
 
@@ -140,7 +138,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     // Write out a block
     writer = writer.appendBlock(dataBlock);
     // Get the size of the block
@@ -170,7 +168,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     writer = writer.appendBlock(dataBlock);
     long size1 = writer.getCurrentSize();
     writer.close();
@@ -222,7 +220,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     writer = writer.appendBlock(dataBlock);
     long size1 = writer.getCurrentSize();
     // do not close this writer - this simulates a data note appending to a log dying without closing the file
@@ -254,11 +252,12 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     writer = writer.appendBlock(dataBlock);
     writer.close();
 
-    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
+    Reader reader = HoodieLogFormat
+        .newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
     assertTrue("We wrote a block, we should be able to read it", reader.hasNext());
     HoodieLogBlock nextBlock = reader.next();
     assertEquals("The next block should be a data block", HoodieLogBlockType.AVRO_DATA_BLOCK,
@@ -281,7 +280,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     writer = writer.appendBlock(dataBlock);
     writer.close();
 
@@ -304,7 +303,8 @@ public class HoodieLogFormatTest {
     writer = writer.appendBlock(dataBlock);
     writer.close();
 
-    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
+    Reader reader = HoodieLogFormat
+        .newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
     assertTrue("First block should be available", reader.hasNext());
     HoodieLogBlock nextBlock = reader.next();
     HoodieAvroDataBlock dataBlockRead = (HoodieAvroDataBlock) nextBlock;
@@ -338,7 +338,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records,
-            getSimpleSchema(), metadata);
+        getSimpleSchema(), metadata);
     writer = writer.appendBlock(dataBlock);
     writer.close();
 
@@ -358,7 +358,8 @@ public class HoodieLogFormatTest {
     outputStream.close();
 
     // First round of reads - we should be able to read the first block and then EOF
-    Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
+    Reader reader = HoodieLogFormat
+        .newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
     assertTrue("First block should be available", reader.hasNext());
     reader.next();
     assertTrue("We should have corrupted block next", reader.hasNext());
@@ -393,7 +394,8 @@ public class HoodieLogFormatTest {
     writer.close();
 
     // Second round of reads - we should be able to read the first and last block
-    reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
+    reader = HoodieLogFormat
+        .newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema(), true);
     assertTrue("First block should be available", reader.hasNext());
     reader.next();
     assertTrue("We should get the 1st corrupted block next", reader.hasNext());
@@ -424,7 +426,7 @@ public class HoodieLogFormatTest {
     Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     // Write 2
@@ -438,7 +440,8 @@ public class HoodieLogFormatTest {
         .map(s -> s.getPath().toString())
         .collect(Collectors.toList());
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles,
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+        allLogFiles,
         schema, "100");
     assertEquals("", 200, scanner.getTotalLogRecords());
     Set<String> readKeys = new HashSet<>(200);
@@ -469,7 +472,7 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "100");
 
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     // Write 2
@@ -493,9 +496,11 @@ public class HoodieLogFormatTest {
         .map(s -> s.getPath().toString())
         .collect(Collectors.toList());
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles,
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+        allLogFiles,
         schema, "100");
-    assertEquals("We only read 200 records, but only 200 of them are valid", 200, scanner.getTotalLogRecords());
+    assertEquals("We only read 200 records, but only 200 of them are valid", 200,
+        scanner.getTotalLogRecords());
     Set<String> readKeys = new HashSet<>(200);
     scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
     assertEquals("Stream collect should return all 200 records", 200, readKeys.size());
@@ -523,7 +528,7 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
     writer.close();
 
@@ -561,7 +566,8 @@ public class HoodieLogFormatTest {
         .map(s -> s.getPath().toString())
         .collect(Collectors.toList());
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles,
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+        allLogFiles,
         schema, "100");
     assertEquals("We would read 200 records", 200,
         scanner.getTotalLogRecords());
@@ -592,7 +598,7 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     // Write 2
@@ -609,7 +615,8 @@ public class HoodieLogFormatTest {
     // Delete 50 keys
     List<String> deletedKeys = originalKeys.subList(0, 50);
 
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]), metadata);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]),
+        metadata);
     writer = writer.appendBlock(deleteBlock);
 
     List<String> allLogFiles = FSUtils
@@ -617,7 +624,8 @@ public class HoodieLogFormatTest {
         .map(s -> s.getPath().toString())
         .collect(Collectors.toList());
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles,
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+        allLogFiles,
         schema, "100");
     assertEquals("We still would read 200 records", 200,
         scanner.getTotalLogRecords());
@@ -632,25 +640,26 @@ public class HoodieLogFormatTest {
 
     // Rollback the last block
     HoodieCommandBlock commandBlock = new HoodieCommandBlock(
-            HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
+        HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
     writer = writer.appendBlock(commandBlock);
 
     readKeys.clear();
     scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema, "100");
     scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
-    assertEquals("Stream collect should return all 200 records after rollback of delete", 200, readKeys.size());
+    assertEquals("Stream collect should return all 200 records after rollback of delete", 200,
+        readKeys.size());
   }
 
   @Test
   public void testAvroLogRecordReaderWithFailedRollbacks()
-          throws IOException, URISyntaxException, InterruptedException {
+      throws IOException, URISyntaxException, InterruptedException {
 
     // Write a Data block and Delete block with same InstantTime (written in same batch)
     Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
     // Set a small threshold so that every block is a new version
     Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-            .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
-            .overBaseCommit("100").withFs(fs).build();
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
+        .overBaseCommit("100").withFs(fs).build();
 
     // Write 1
     List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -658,7 +667,7 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     // Write 2
@@ -667,23 +676,24 @@ public class HoodieLogFormatTest {
     writer = writer.appendBlock(dataBlock);
 
     List<String> originalKeys = records1.stream()
-            .map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
-            .collect(
-                    Collectors.toList());
+        .map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
+        .collect(
+            Collectors.toList());
 
     // Delete 50 keys
     List<String> deletedKeys = originalKeys.subList(0, 50);
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]), metadata);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]),
+        metadata);
     writer = writer.appendBlock(deleteBlock);
 
     // Attemp 1 : Write 2 rollback blocks (1 data block + 1 delete bloc) for a failed write
     HoodieCommandBlock commandBlock = new HoodieCommandBlock(
-            HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
+        HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
     try {
       writer = writer.appendBlock(commandBlock);
       // Say job failed, retry writing 2 rollback in the next rollback(..) attempt
       throw new Exception("simulating failure");
-    } catch(Exception e) {
+    } catch (Exception e) {
       // it's okay
     }
     // Attempt 2 : Write 2 rollback blocks (1 data block + 1 delete bloc) for a failed write
@@ -691,14 +701,15 @@ public class HoodieLogFormatTest {
     writer = writer.appendBlock(commandBlock);
 
     List<String> allLogFiles = FSUtils
-            .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
-            .map(s -> s.getPath().toString())
-            .collect(Collectors.toList());
+        .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
+        .map(s -> s.getPath().toString())
+        .collect(Collectors.toList());
 
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles,
-            schema, "100");
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+        allLogFiles,
+        schema, "100");
     assertEquals("We would read 100 records", 100,
-            scanner.getTotalLogRecords());
+        scanner.getTotalLogRecords());
 
     final List<String> readKeys = new ArrayList<>(100);
     scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
@@ -707,14 +718,14 @@ public class HoodieLogFormatTest {
 
   @Test
   public void testAvroLogRecordReaderWithInsertDeleteAndRollback()
-    throws IOException, URISyntaxException, InterruptedException {
+      throws IOException, URISyntaxException, InterruptedException {
 
     // Write a Data block and Delete block with same InstantTime (written in same batch)
     Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
     // Set a small threshold so that every block is a new version
     Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-            .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
-            .overBaseCommit("100").withFs(fs).build();
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
+        .overBaseCommit("100").withFs(fs).build();
 
     // Write 1
     List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -722,43 +733,45 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "100");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     List<String> originalKeys = records1.stream()
-            .map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
-            .collect(
-                    Collectors.toList());
+        .map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
+        .collect(
+            Collectors.toList());
 
     // Delete 50 keys
     List<String> deletedKeys = originalKeys.subList(0, 50);
-    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]), metadata);
+    HoodieDeleteBlock deleteBlock = new HoodieDeleteBlock(deletedKeys.toArray(new String[50]),
+        metadata);
     writer = writer.appendBlock(deleteBlock);
 
     // Write 2 rollback blocks (1 data block + 1 delete bloc) for a failed write
     HoodieCommandBlock commandBlock = new HoodieCommandBlock(
-            HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
+        HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
     writer = writer.appendBlock(commandBlock);
     writer = writer.appendBlock(commandBlock);
 
     List<String> allLogFiles = FSUtils
-            .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
-            .map(s -> s.getPath().toString())
-            .collect(Collectors.toList());
+        .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
+        .map(s -> s.getPath().toString())
+        .collect(Collectors.toList());
 
     HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
-            allLogFiles, schema, "100");
+        allLogFiles, schema, "100");
     assertEquals("We would read 0 records", 0,
-            scanner.getTotalLogRecords());
+        scanner.getTotalLogRecords());
   }
 
   @Test
-  public void testAvroLogRecordReaderWithInvalidRollback() throws IOException, URISyntaxException, InterruptedException {
+  public void testAvroLogRecordReaderWithInvalidRollback()
+      throws IOException, URISyntaxException, InterruptedException {
     Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
     // Set a small threshold so that every block is a new version
     Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-            .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
-            .overBaseCommit("100").withFs(fs).build();
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
+        .overBaseCommit("100").withFs(fs).build();
 
     // Write 1
     List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
@@ -766,23 +779,23 @@ public class HoodieLogFormatTest {
     metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, "100");
     metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, "101");
     HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1,
-            schema, metadata);
+        schema, metadata);
     writer = writer.appendBlock(dataBlock);
 
     // Write invalid rollback for a failed write (possible for in-flight commits)
     HoodieCommandBlock commandBlock = new HoodieCommandBlock(
-            HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
+        HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata);
     writer = writer.appendBlock(commandBlock);
 
     List<String> allLogFiles = FSUtils
-            .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
-            .map(s -> s.getPath().toString())
-            .collect(Collectors.toList());
+        .getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
+        .map(s -> s.getPath().toString())
+        .collect(Collectors.toList());
 
     HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
-            allLogFiles, schema, "100");
+        allLogFiles, schema, "100");
     assertEquals("We still would read 100 records", 100,
-            scanner.getTotalLogRecords());
+        scanner.getTotalLogRecords());
     final List<String> readKeys = new ArrayList<>(100);
     scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
     assertEquals("Stream collect should return all 150 records", 100, readKeys.size());
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/HoodieActiveTimelineTest.java b/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/HoodieActiveTimelineTest.java
index cc520acf2..22285a6c5 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/HoodieActiveTimelineTest.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/HoodieActiveTimelineTest.java
@@ -16,11 +16,18 @@
 
 package com.uber.hoodie.common.table.string;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
+import java.io.IOException;
+import java.util.Optional;
+import java.util.stream.Stream;
 import org.apache.hadoop.fs.Path;
 import org.junit.After;
 import org.junit.Before;
@@ -28,109 +35,104 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
-import java.io.IOException;
-import java.util.Optional;
-import java.util.stream.Stream;
-
-import static org.junit.Assert.*;
-
 public class HoodieActiveTimelineTest {
-    private HoodieActiveTimeline timeline;
-    private HoodieTableMetaClient metaClient;
-    @Rule
-    public final ExpectedException exception = ExpectedException.none();
 
-    @Before
-    public void setUp() throws Exception {
-        this.metaClient = HoodieTestUtils.initOnTemp();
-    }
+  private HoodieActiveTimeline timeline;
+  private HoodieTableMetaClient metaClient;
+  @Rule
+  public final ExpectedException exception = ExpectedException.none();
 
-    @After
-    public void tearDown() throws Exception {
-        HoodieTestUtils.fs.delete(new Path(this.metaClient.getBasePath()), true);
-    }
+  @Before
+  public void setUp() throws Exception {
+    this.metaClient = HoodieTestUtils.initOnTemp();
+  }
 
-    @Test
-    public void testLoadingInstantsFromFiles() throws IOException {
-        HoodieInstant instant1 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
-        HoodieInstant instant2 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "3");
-        HoodieInstant instant3 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "5");
-        HoodieInstant instant4 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "8");
-        HoodieInstant instant1_complete =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
-        HoodieInstant instant2_complete =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
-        HoodieInstant instant3_complete =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "5");
-        HoodieInstant instant4_complete =
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "8");
+  @After
+  public void tearDown() throws Exception {
+    HoodieTestUtils.fs.delete(new Path(this.metaClient.getBasePath()), true);
+  }
 
-        HoodieInstant instant5 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "9");
+  @Test
+  public void testLoadingInstantsFromFiles() throws IOException {
+    HoodieInstant instant1 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1");
+    HoodieInstant instant2 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "3");
+    HoodieInstant instant3 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "5");
+    HoodieInstant instant4 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "8");
+    HoodieInstant instant1_complete =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "1");
+    HoodieInstant instant2_complete =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "3");
+    HoodieInstant instant3_complete =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "5");
+    HoodieInstant instant4_complete =
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "8");
 
-        timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
-        timeline.saveAsComplete(instant1, Optional.empty());
-        timeline.saveAsComplete(instant2, Optional.empty());
-        timeline.saveAsComplete(instant3, Optional.empty());
-        timeline.saveAsComplete(instant4, Optional.empty());
-        timeline.createInflight(instant5);
-        timeline = timeline.reload();
+    HoodieInstant instant5 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "9");
 
-        assertEquals("Total instants should be 5", 5, timeline.countInstants());
-        HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream
-            .of(instant1_complete, instant2_complete, instant3_complete, instant4_complete,
-                instant5), timeline.getInstants());
-        HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream
-            .of(instant1_complete, instant2_complete, instant3_complete, instant4_complete,
-                instant5), timeline.getCommitTimeline().getInstants());
-        HoodieTestUtils.assertStreamEquals("Check the instants stream",
-            Stream.of(instant1_complete, instant2_complete, instant3_complete, instant4_complete),
-            timeline.getCommitTimeline().filterCompletedInstants().getInstants());
-        HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5),
-            timeline.getCommitTimeline().filterInflights().getInstants());
-    }
+    timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
+    timeline.saveAsComplete(instant1, Optional.empty());
+    timeline.saveAsComplete(instant2, Optional.empty());
+    timeline.saveAsComplete(instant3, Optional.empty());
+    timeline.saveAsComplete(instant4, Optional.empty());
+    timeline.createInflight(instant5);
+    timeline = timeline.reload();
 
-    @Test
-    public void testTimelineOperationsBasic() throws Exception {
-        timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
-        assertTrue(timeline.empty());
-        assertEquals("", 0, timeline.countInstants());
-        assertEquals("", Optional.empty(), timeline.firstInstant());
-        assertEquals("", Optional.empty(), timeline.nthInstant(5));
-        assertEquals("", Optional.empty(), timeline.nthInstant(-1));
-        assertEquals("", Optional.empty(), timeline.lastInstant());
-        assertFalse("", timeline.containsInstant(
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "01")));
-    }
+    assertEquals("Total instants should be 5", 5, timeline.countInstants());
+    HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream
+        .of(instant1_complete, instant2_complete, instant3_complete, instant4_complete,
+            instant5), timeline.getInstants());
+    HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream
+        .of(instant1_complete, instant2_complete, instant3_complete, instant4_complete,
+            instant5), timeline.getCommitTimeline().getInstants());
+    HoodieTestUtils.assertStreamEquals("Check the instants stream",
+        Stream.of(instant1_complete, instant2_complete, instant3_complete, instant4_complete),
+        timeline.getCommitTimeline().filterCompletedInstants().getInstants());
+    HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5),
+        timeline.getCommitTimeline().filterInflights().getInstants());
+  }
 
-    @Test
-    public void testTimelineOperations() throws Exception {
-        timeline = new MockHoodieTimeline(
-            Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"),
-            Stream.of("21", "23"));
-        HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"),
-            timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11")
-                .getInstants().map(HoodieInstant::getTimestamp));
-        HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"),
-            timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2)
-                .getInstants().map(HoodieInstant::getTimestamp));
-        assertFalse(timeline.empty());
-        assertFalse(timeline.getCommitTimeline().filterInflights().empty());
-        assertEquals("", 12, timeline.countInstants());
-        HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants();
-        assertEquals("", 10, activeCommitTimeline.countInstants());
+  @Test
+  public void testTimelineOperationsBasic() throws Exception {
+    timeline = new HoodieActiveTimeline(HoodieTestUtils.fs, metaClient.getMetaPath());
+    assertTrue(timeline.empty());
+    assertEquals("", 0, timeline.countInstants());
+    assertEquals("", Optional.empty(), timeline.firstInstant());
+    assertEquals("", Optional.empty(), timeline.nthInstant(5));
+    assertEquals("", Optional.empty(), timeline.nthInstant(-1));
+    assertEquals("", Optional.empty(), timeline.lastInstant());
+    assertFalse("", timeline.containsInstant(
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "01")));
+  }
 
-        assertEquals("", "01", activeCommitTimeline.firstInstant().get().getTimestamp());
-        assertEquals("", "11", activeCommitTimeline.nthInstant(5).get().getTimestamp());
-        assertEquals("", "19", activeCommitTimeline.lastInstant().get().getTimestamp());
-        assertEquals("", "09", activeCommitTimeline.nthFromLastInstant(5).get().getTimestamp());
-        assertTrue("", activeCommitTimeline.containsInstant(
-            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "09")));
-        assertFalse("", activeCommitTimeline.isBeforeTimelineStarts("02"));
-        assertTrue("", activeCommitTimeline.isBeforeTimelineStarts("00"));
-    }
+  @Test
+  public void testTimelineOperations() throws Exception {
+    timeline = new MockHoodieTimeline(
+        Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"),
+        Stream.of("21", "23"));
+    HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"),
+        timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11")
+            .getInstants().map(HoodieInstant::getTimestamp));
+    HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"),
+        timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2)
+            .getInstants().map(HoodieInstant::getTimestamp));
+    assertFalse(timeline.empty());
+    assertFalse(timeline.getCommitTimeline().filterInflights().empty());
+    assertEquals("", 12, timeline.countInstants());
+    HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants();
+    assertEquals("", 10, activeCommitTimeline.countInstants());
+
+    assertEquals("", "01", activeCommitTimeline.firstInstant().get().getTimestamp());
+    assertEquals("", "11", activeCommitTimeline.nthInstant(5).get().getTimestamp());
+    assertEquals("", "19", activeCommitTimeline.lastInstant().get().getTimestamp());
+    assertEquals("", "09", activeCommitTimeline.nthFromLastInstant(5).get().getTimestamp());
+    assertTrue("", activeCommitTimeline.containsInstant(
+        new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "09")));
+    assertFalse("", activeCommitTimeline.isBeforeTimelineStarts("02"));
+    assertTrue("", activeCommitTimeline.isBeforeTimelineStarts("00"));
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/MockHoodieTimeline.java b/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/MockHoodieTimeline.java
index 050bbe145..5e3b48844 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/MockHoodieTimeline.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/string/MockHoodieTimeline.java
@@ -19,7 +19,6 @@ package com.uber.hoodie.common.table.string;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
-
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.function.Function;
@@ -27,18 +26,19 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 public class MockHoodieTimeline extends HoodieActiveTimeline {
-    public MockHoodieTimeline(Stream<String> completed, Stream<String> inflights)
-        throws IOException {
-        super();
-        this.instants = Stream.concat(completed
-                .map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)),
-            inflights.map(
-                s -> new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, s)))
-            .sorted(Comparator.comparing(new Function<HoodieInstant, String>() {
-                @Override
-                public String apply(HoodieInstant hoodieInstant) {
-                    return hoodieInstant.getFileName();
-                }
-            })).collect(Collectors.toList());
-    }
+
+  public MockHoodieTimeline(Stream<String> completed, Stream<String> inflights)
+      throws IOException {
+    super();
+    this.instants = Stream.concat(completed
+            .map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)),
+        inflights.map(
+            s -> new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, s)))
+        .sorted(Comparator.comparing(new Function<HoodieInstant, String>() {
+          @Override
+          public String apply(HoodieInstant hoodieInstant) {
+            return hoodieInstant.getFileName();
+          }
+        })).collect(Collectors.toList());
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemViewTest.java b/hoodie-common/src/test/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemViewTest.java
index 1e273fa1e..992b86416 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemViewTest.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/view/HoodieTableFileSystemViewTest.java
@@ -16,9 +16,12 @@
 
 package com.uber.hoodie.common.table.view;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-
 import com.uber.hoodie.common.model.FileSlice;
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieFileGroup;
@@ -30,499 +33,501 @@ import com.uber.hoodie.common.table.TableFileSystemView;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.FSUtils;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
-import java.util.UUID;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import static org.junit.Assert.*;
-
 @SuppressWarnings("ResultOfMethodCallIgnored")
 public class HoodieTableFileSystemViewTest {
-    private HoodieTableMetaClient metaClient;
-    private String basePath;
-    private TableFileSystemView fsView;
-    private TableFileSystemView.ReadOptimizedView roView;
-    private TableFileSystemView.RealtimeView rtView;
 
-    @Before
-    public void init() throws IOException {
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        this.basePath = folder.getRoot().getAbsolutePath();
-        metaClient = HoodieTestUtils.init(basePath);
-        fsView = new HoodieTableFileSystemView(metaClient,
-            metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
-        roView = (TableFileSystemView.ReadOptimizedView) fsView;
-        rtView = (TableFileSystemView.RealtimeView) fsView;
+  private HoodieTableMetaClient metaClient;
+  private String basePath;
+  private TableFileSystemView fsView;
+  private TableFileSystemView.ReadOptimizedView roView;
+  private TableFileSystemView.RealtimeView rtView;
+
+  @Before
+  public void init() throws IOException {
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    this.basePath = folder.getRoot().getAbsolutePath();
+    metaClient = HoodieTestUtils.init(basePath);
+    fsView = new HoodieTableFileSystemView(metaClient,
+        metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
+    roView = (TableFileSystemView.ReadOptimizedView) fsView;
+    rtView = (TableFileSystemView.RealtimeView) fsView;
+  }
+
+  private void refreshFsView(FileStatus[] statuses) {
+    metaClient = new HoodieTableMetaClient(HoodieTestUtils.fs, basePath, true);
+    if (statuses != null) {
+      fsView = new HoodieTableFileSystemView(metaClient,
+          metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(),
+          statuses);
+    } else {
+      fsView = new HoodieTableFileSystemView(metaClient,
+          metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
     }
+    roView = (TableFileSystemView.ReadOptimizedView) fsView;
+    rtView = (TableFileSystemView.RealtimeView) fsView;
+  }
 
-    private void refreshFsView(FileStatus[] statuses) {
-        metaClient = new HoodieTableMetaClient(HoodieTestUtils.fs, basePath, true);
-        if (statuses != null) {
-            fsView = new HoodieTableFileSystemView(metaClient,
-                    metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants(),
-                    statuses);
-        } else {
-            fsView = new HoodieTableFileSystemView(metaClient,
-                    metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
-        }
-        roView = (TableFileSystemView.ReadOptimizedView) fsView;
-        rtView = (TableFileSystemView.RealtimeView) fsView;
-    }
+  @Test
+  public void testGetLatestDataFilesForFileId() throws IOException {
+    String partitionPath = "2016/05/01";
+    new File(basePath + "/" + partitionPath).mkdirs();
+    String fileId = UUID.randomUUID().toString();
 
-    @Test
-    public void testGetLatestDataFilesForFileId() throws IOException {
-        String partitionPath = "2016/05/01";
-        new File(basePath + "/" + partitionPath).mkdirs();
-        String fileId = UUID.randomUUID().toString();
+    assertFalse("No commit, should not find any data file",
+        roView.getLatestDataFiles(partitionPath)
+            .filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().isPresent());
 
-        assertFalse("No commit, should not find any data file",
-            roView.getLatestDataFiles(partitionPath)
-                    .filter(dfile -> dfile.getFileId().equals(fileId)).findFirst().isPresent());
+    // Only one commit, but is not safe
+    String commitTime1 = "1";
+    String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId);
+    new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
+    refreshFsView(null);
+    assertFalse("No commit, should not find any data file",
+        roView.getLatestDataFiles(partitionPath)
+            .filter(dfile -> dfile.getFileId().equals(fileId))
+            .findFirst().isPresent());
 
-        // Only one commit, but is not safe
-        String commitTime1 = "1";
-        String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId);
-        new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
-        refreshFsView(null);
-        assertFalse("No commit, should not find any data file",
-            roView.getLatestDataFiles(partitionPath)
-                    .filter(dfile -> dfile.getFileId().equals(fileId))
-                    .findFirst().isPresent());
+    // Make this commit safe
+    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
+    HoodieInstant instant1 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
+    commitTimeline.saveAsComplete(instant1, Optional.empty());
+    refreshFsView(null);
+    assertEquals("", fileName1, roView
+        .getLatestDataFiles(partitionPath)
+        .filter(dfile -> dfile.getFileId().equals(fileId))
+        .findFirst().get()
+        .getFileName());
 
-        // Make this commit safe
-        HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
-        HoodieInstant instant1 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
-        commitTimeline.saveAsComplete(instant1, Optional.empty());
-        refreshFsView(null);
-        assertEquals("", fileName1, roView
-                .getLatestDataFiles(partitionPath)
-                .filter(dfile -> dfile.getFileId().equals(fileId))
-                .findFirst().get()
-                .getFileName());
+    // Do another commit, but not safe
+    String commitTime2 = "2";
+    String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId);
+    new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
+    refreshFsView(null);
+    assertEquals("", fileName1, roView
+        .getLatestDataFiles(partitionPath)
+        .filter(dfile -> dfile.getFileId().equals(fileId))
+        .findFirst().get()
+        .getFileName());
 
-        // Do another commit, but not safe
-        String commitTime2 = "2";
-        String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId);
-        new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
-        refreshFsView(null);
-        assertEquals("", fileName1, roView
-                .getLatestDataFiles(partitionPath)
-                .filter(dfile -> dfile.getFileId().equals(fileId))
-                .findFirst().get()
-                .getFileName());
+    // Make it safe
+    HoodieInstant instant2 =
+        new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
+    commitTimeline.saveAsComplete(instant2, Optional.empty());
+    refreshFsView(null);
+    assertEquals("", fileName2, roView
+        .getLatestDataFiles(partitionPath)
+        .filter(dfile -> dfile.getFileId().equals(fileId))
+        .findFirst().get()
+        .getFileName());
+  }
 
-        // Make it safe
-        HoodieInstant instant2 =
-            new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime2);
-        commitTimeline.saveAsComplete(instant2, Optional.empty());
-        refreshFsView(null);
-        assertEquals("", fileName2, roView
-                .getLatestDataFiles(partitionPath)
-                .filter(dfile -> dfile.getFileId().equals(fileId))
-                .findFirst().get()
-                .getFileName());
-    }
+  @Test
+  public void testStreamLatestVersionInPartition() throws IOException {
+    // Put some files in the partition
+    String fullPartitionPath = basePath + "/2016/05/01/";
+    new File(fullPartitionPath).mkdirs();
+    String commitTime1 = "1";
+    String commitTime2 = "2";
+    String commitTime3 = "3";
+    String commitTime4 = "4";
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+    String fileId4 = UUID.randomUUID().toString();
 
-    @Test
-    public void testStreamLatestVersionInPartition() throws IOException {
-        // Put some files in the partition
-        String fullPartitionPath = basePath + "/2016/05/01/";
-        new File(fullPartitionPath).mkdirs();
-        String commitTime1 = "1";
-        String commitTime2 = "2";
-        String commitTime3 = "3";
-        String commitTime4 = "4";
-        String fileId1 = UUID.randomUUID().toString();
-        String fileId2 = UUID.randomUUID().toString();
-        String fileId3 = UUID.randomUUID().toString();
-        String fileId4 = UUID.randomUUID().toString();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
+        .createNewFile();
 
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
-                .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1))
-                .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0))
-                .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
-                .createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
 
+    // Now we list the entire partition
+    FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
+    assertEquals(11, statuses.length);
+    refreshFsView(null);
 
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
+    // Check files as of lastest commit.
+    List<FileSlice> allSlices = rtView.getAllFileSlices("2016/05/01").collect(Collectors.toList());
+    assertEquals(8, allSlices.size());
+    Map<String, Long> fileSliceMap = allSlices.stream().collect(Collectors.groupingBy(
+        slice -> slice.getFileId(), Collectors.counting()));
+    assertEquals(2, fileSliceMap.get(fileId1).longValue());
+    assertEquals(3, fileSliceMap.get(fileId2).longValue());
+    assertEquals(2, fileSliceMap.get(fileId3).longValue());
+    assertEquals(1, fileSliceMap.get(fileId4).longValue());
 
-        // Now we list the entire partition
-        FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
-        assertEquals(11, statuses.length);
-        refreshFsView(null);
-
-        // Check files as of lastest commit.
-        List<FileSlice> allSlices = rtView.getAllFileSlices("2016/05/01").collect(Collectors.toList());
-        assertEquals(8, allSlices.size());
-        Map<String, Long> fileSliceMap = allSlices.stream().collect(Collectors.groupingBy(
-                slice -> slice.getFileId(), Collectors.counting()));
-        assertEquals(2, fileSliceMap.get(fileId1).longValue());
-        assertEquals(3, fileSliceMap.get(fileId2).longValue());
-        assertEquals(2, fileSliceMap.get(fileId3).longValue());
-        assertEquals(1, fileSliceMap.get(fileId4).longValue());
-
-
-        List<HoodieDataFile> dataFileList =
-                roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4)
-                .collect(Collectors.toList());
-        assertEquals(3, dataFileList.size());
-        Set<String> filenames = Sets.newHashSet();
-        for (HoodieDataFile status : dataFileList) {
-            filenames.add(status.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
-
-        filenames = Sets.newHashSet();
-        List<HoodieLogFile> logFilesList =
-                rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4)
-                .map(slice -> slice.getLogFiles())
-                .flatMap(logFileList -> logFileList)
-                .collect(Collectors.toList());
-        assertEquals(logFilesList.size(), 4);
-        for (HoodieLogFile logFile: logFilesList) {
-            filenames.add(logFile.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
-        assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1)));
-        assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
-        assertTrue(filenames.contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
-
-        // Reset the max commit time
-        List<HoodieDataFile> dataFiles =
-                roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
-                .collect(Collectors.toList());
-        assertEquals(dataFiles.size(), 3);
-        filenames = Sets.newHashSet();
-        for (HoodieDataFile status : dataFiles) {
-            filenames.add(status.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
-
-        logFilesList =
-                rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3)
-                        .map(slice -> slice.getLogFiles())
-                        .flatMap(logFileList -> logFileList).collect(Collectors.toList());
-        assertEquals(logFilesList.size(), 1);
-        assertTrue(logFilesList.get(0).getFileName().equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
-    }
-
-    @Test
-    public void testStreamEveryVersionInPartition() throws IOException {
-        // Put some files in the partition
-        String fullPartitionPath = basePath + "/2016/05/01/";
-        new File(fullPartitionPath).mkdirs();
-        String commitTime1 = "1";
-        String commitTime2 = "2";
-        String commitTime3 = "3";
-        String commitTime4 = "4";
-        String fileId1 = UUID.randomUUID().toString();
-        String fileId2 = UUID.randomUUID().toString();
-        String fileId3 = UUID.randomUUID().toString();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
-            .createNewFile();
-
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
-
-        // Now we list the entire partition
-        FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
-        assertEquals(7, statuses.length);
-
-        refreshFsView(null);
-        List<HoodieFileGroup> fileGroups =
-            fsView.getAllFileGroups("2016/05/01").collect(Collectors.toList());
-        assertEquals(3, fileGroups.size());
-
-        for (HoodieFileGroup fileGroup : fileGroups) {
-            String fileId = fileGroup.getId();
-            Set<String> filenames = Sets.newHashSet();
-            fileGroup.getAllDataFiles().forEach(dataFile -> {
-                assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId());
-                filenames.add(dataFile.getFileName());
-            });
-            if (fileId.equals(fileId1)) {
-                assertEquals(filenames,
-                    Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId1),
-                        FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
-            } else if (fileId.equals(fileId2)) {
-                assertEquals(filenames,
-                    Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId2),
-                        FSUtils.makeDataFileName(commitTime2, 1, fileId2),
-                        FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
-            } else {
-                assertEquals(filenames,
-                    Sets.newHashSet(FSUtils.makeDataFileName(commitTime3, 1, fileId3),
-                        FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
-            }
-        }
-    }
-
-    @Test
-    public void streamLatestVersionInRange() throws IOException {
-        // Put some files in the partition
-        String fullPartitionPath = basePath + "/2016/05/01/";
-        new File(fullPartitionPath).mkdirs();
-        String commitTime1 = "1";
-        String commitTime2 = "2";
-        String commitTime3 = "3";
-        String commitTime4 = "4";
-        String fileId1 = UUID.randomUUID().toString();
-        String fileId2 = UUID.randomUUID().toString();
-        String fileId3 = UUID.randomUUID().toString();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0))
-                .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId1))
-            .createNewFile();
-
-
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
-                .createNewFile();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
-            .createNewFile();
-
-
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
-
-        // Now we list the entire partition
-        FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
-        assertEquals(9, statuses.length);
-
-        refreshFsView(statuses);
-        List<HoodieDataFile> dataFiles = roView
-            .getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3))
+    List<HoodieDataFile> dataFileList =
+        roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4)
             .collect(Collectors.toList());
-        assertEquals(3, dataFiles.size());
-        Set<String> filenames = Sets.newHashSet();
-        for (HoodieDataFile status : dataFiles) {
-            filenames.add(status.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
+    assertEquals(3, dataFileList.size());
+    Set<String> filenames = Sets.newHashSet();
+    for (HoodieDataFile status : dataFileList) {
+      filenames.add(status.getFileName());
+    }
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
 
+    filenames = Sets.newHashSet();
+    List<HoodieLogFile> logFilesList =
+        rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime4)
+            .map(slice -> slice.getLogFiles())
+            .flatMap(logFileList -> logFileList)
+            .collect(Collectors.toList());
+    assertEquals(logFilesList.size(), 4);
+    for (HoodieLogFile logFile : logFilesList) {
+      filenames.add(logFile.getFileName());
+    }
+    assertTrue(filenames
+        .contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
+    assertTrue(filenames
+        .contains(FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 1)));
+    assertTrue(filenames
+        .contains(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
+    assertTrue(filenames
+        .contains(FSUtils.makeLogFileName(fileId4, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)));
 
-        List<FileSlice> slices = rtView
-                .getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4))
-                .collect(Collectors.toList());
+    // Reset the max commit time
+    List<HoodieDataFile> dataFiles =
+        roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3)
+            .collect(Collectors.toList());
+    assertEquals(dataFiles.size(), 3);
+    filenames = Sets.newHashSet();
+    for (HoodieDataFile status : dataFiles) {
+      filenames.add(status.getFileName());
+    }
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
+
+    logFilesList =
+        rtView.getLatestFileSlicesBeforeOrOn("2016/05/01", commitTime3)
+            .map(slice -> slice.getLogFiles())
+            .flatMap(logFileList -> logFileList).collect(Collectors.toList());
+    assertEquals(logFilesList.size(), 1);
+    assertTrue(logFilesList.get(0).getFileName()
+        .equals(FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime3, 0)));
+  }
+
+  @Test
+  public void testStreamEveryVersionInPartition() throws IOException {
+    // Put some files in the partition
+    String fullPartitionPath = basePath + "/2016/05/01/";
+    new File(fullPartitionPath).mkdirs();
+    String commitTime1 = "1";
+    String commitTime2 = "2";
+    String commitTime3 = "3";
+    String commitTime4 = "4";
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
+        .createNewFile();
+
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
+
+    // Now we list the entire partition
+    FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
+    assertEquals(7, statuses.length);
+
+    refreshFsView(null);
+    List<HoodieFileGroup> fileGroups =
+        fsView.getAllFileGroups("2016/05/01").collect(Collectors.toList());
+    assertEquals(3, fileGroups.size());
+
+    for (HoodieFileGroup fileGroup : fileGroups) {
+      String fileId = fileGroup.getId();
+      Set<String> filenames = Sets.newHashSet();
+      fileGroup.getAllDataFiles().forEach(dataFile -> {
+        assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId());
+        filenames.add(dataFile.getFileName());
+      });
+      if (fileId.equals(fileId1)) {
+        assertEquals(filenames,
+            Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId1),
+                FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
+      } else if (fileId.equals(fileId2)) {
+        assertEquals(filenames,
+            Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId2),
+                FSUtils.makeDataFileName(commitTime2, 1, fileId2),
+                FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
+      } else {
+        assertEquals(filenames,
+            Sets.newHashSet(FSUtils.makeDataFileName(commitTime3, 1, fileId3),
+                FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
+      }
+    }
+  }
+
+  @Test
+  public void streamLatestVersionInRange() throws IOException {
+    // Put some files in the partition
+    String fullPartitionPath = basePath + "/2016/05/01/";
+    new File(fullPartitionPath).mkdirs();
+    String commitTime1 = "1";
+    String commitTime2 = "2";
+    String commitTime3 = "3";
+    String commitTime4 = "4";
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId1))
+        .createNewFile();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
+        .createNewFile();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
+        .createNewFile();
+
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
+
+    // Now we list the entire partition
+    FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
+    assertEquals(9, statuses.length);
+
+    refreshFsView(statuses);
+    List<HoodieDataFile> dataFiles = roView
+        .getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3))
+        .collect(Collectors.toList());
+    assertEquals(3, dataFiles.size());
+    Set<String> filenames = Sets.newHashSet();
+    for (HoodieDataFile status : dataFiles) {
+      filenames.add(status.getFileName());
+    }
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
+
+    List<FileSlice> slices = rtView
+        .getLatestFileSliceInRange(Lists.newArrayList(commitTime3, commitTime4))
+        .collect(Collectors.toList());
+    assertEquals(3, slices.size());
+    for (FileSlice slice : slices) {
+      if (slice.getFileId().equals(fileId1)) {
+        assertEquals(slice.getBaseCommitTime(), commitTime3);
+        assertTrue(slice.getDataFile().isPresent());
+        assertEquals(slice.getLogFiles().count(), 0);
+      } else if (slice.getFileId().equals(fileId2)) {
+        assertEquals(slice.getBaseCommitTime(), commitTime4);
+        assertFalse(slice.getDataFile().isPresent());
+        assertEquals(slice.getLogFiles().count(), 1);
+      } else if (slice.getFileId().equals(fileId3)) {
+        assertEquals(slice.getBaseCommitTime(), commitTime4);
+        assertTrue(slice.getDataFile().isPresent());
+        assertEquals(slice.getLogFiles().count(), 0);
+      }
+    }
+  }
+
+  @Test
+  public void streamLatestVersionsBefore() throws IOException {
+    // Put some files in the partition
+    String partitionPath = "2016/05/01/";
+    String fullPartitionPath = basePath + "/" + partitionPath;
+    new File(fullPartitionPath).mkdirs();
+    String commitTime1 = "1";
+    String commitTime2 = "2";
+    String commitTime3 = "3";
+    String commitTime4 = "4";
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
+        .createNewFile();
+
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
+
+    // Now we list the entire partition
+    FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
+    assertEquals(7, statuses.length);
+
+    refreshFsView(null);
+    List<HoodieDataFile> dataFiles =
+        roView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2)
+            .collect(Collectors.toList());
+    assertEquals(2, dataFiles.size());
+    Set<String> filenames = Sets.newHashSet();
+    for (HoodieDataFile status : dataFiles) {
+      filenames.add(status.getFileName());
+    }
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime2, 1, fileId2)));
+  }
+
+  @Test
+  public void streamLatestVersions() throws IOException {
+    // Put some files in the partition
+    String partitionPath = "2016/05/01/";
+    String fullPartitionPath = basePath + "/" + partitionPath;
+    new File(fullPartitionPath).mkdirs();
+    String commitTime1 = "1";
+    String commitTime2 = "2";
+    String commitTime3 = "3";
+    String commitTime4 = "4";
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
+        .createNewFile();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils
+        .makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
+        .createNewFile();
+
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
+        .createNewFile();
+    new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
+        .createNewFile();
+
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
+
+    // Now we list the entire partition
+    FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
+    assertEquals(10, statuses.length);
+
+    refreshFsView(statuses);
+
+    List<HoodieFileGroup> fileGroups = fsView
+        .getAllFileGroups(partitionPath)
+        .collect(Collectors.toList());
+    assertEquals(3, fileGroups.size());
+    for (HoodieFileGroup fileGroup : fileGroups) {
+      List<FileSlice> slices = fileGroup.getAllFileSlices().collect(Collectors.toList());
+      if (fileGroup.getId().equals(fileId1)) {
+        assertEquals(2, slices.size());
+        assertEquals(commitTime4, slices.get(0).getBaseCommitTime());
+        assertEquals(commitTime1, slices.get(1).getBaseCommitTime());
+      } else if (fileGroup.getId().equals(fileId2)) {
         assertEquals(3, slices.size());
-        for (FileSlice slice: slices) {
-            if (slice.getFileId().equals(fileId1)) {
-                assertEquals(slice.getBaseCommitTime(), commitTime3);
-                assertTrue(slice.getDataFile().isPresent());
-                assertEquals(slice.getLogFiles().count(), 0);
-            } else if (slice.getFileId().equals(fileId2)) {
-                assertEquals(slice.getBaseCommitTime(), commitTime4);
-                assertFalse(slice.getDataFile().isPresent());
-                assertEquals(slice.getLogFiles().count(), 1);
-            } else if (slice.getFileId().equals(fileId3)) {
-                assertEquals(slice.getBaseCommitTime(), commitTime4);
-                assertTrue(slice.getDataFile().isPresent());
-                assertEquals(slice.getLogFiles().count(), 0);
-            }
-        }
+        assertEquals(commitTime3, slices.get(0).getBaseCommitTime());
+        assertEquals(commitTime2, slices.get(1).getBaseCommitTime());
+        assertEquals(commitTime1, slices.get(2).getBaseCommitTime());
+      } else if (fileGroup.getId().equals(fileId3)) {
+        assertEquals(2, slices.size());
+        assertEquals(commitTime4, slices.get(0).getBaseCommitTime());
+        assertEquals(commitTime3, slices.get(1).getBaseCommitTime());
+      }
     }
 
-    @Test
-    public void streamLatestVersionsBefore() throws IOException {
-        // Put some files in the partition
-        String partitionPath = "2016/05/01/";
-        String fullPartitionPath = basePath + "/" + partitionPath;
-        new File(fullPartitionPath).mkdirs();
-        String commitTime1 = "1";
-        String commitTime2 = "2";
-        String commitTime3 = "3";
-        String commitTime4 = "4";
-        String fileId1 = UUID.randomUUID().toString();
-        String fileId2 = UUID.randomUUID().toString();
-        String fileId3 = UUID.randomUUID().toString();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
-            .createNewFile();
-
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
-
-        // Now we list the entire partition
-        FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
-        assertEquals(7, statuses.length);
-
-        refreshFsView(null);
-        List<HoodieDataFile> dataFiles =
-                roView.getLatestDataFilesBeforeOrOn(partitionPath, commitTime2)
-                .collect(Collectors.toList());
-        assertEquals(2, dataFiles.size());
-        Set<String> filenames = Sets.newHashSet();
-        for (HoodieDataFile status : dataFiles) {
-            filenames.add(status.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime2, 1, fileId2)));
-    }
-
-    @Test
-    public void streamLatestVersions() throws IOException {
-        // Put some files in the partition
-        String partitionPath = "2016/05/01/";
-        String fullPartitionPath = basePath + "/" + partitionPath;
-        new File(fullPartitionPath).mkdirs();
-        String commitTime1 = "1";
-        String commitTime2 = "2";
-        String commitTime3 = "3";
-        String commitTime4 = "4";
-        String fileId1 = UUID.randomUUID().toString();
-        String fileId2 = UUID.randomUUID().toString();
-        String fileId3 = UUID.randomUUID().toString();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0))
-            .createNewFile();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime2, 0))
-                .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2))
-            .createNewFile();
-
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3))
-            .createNewFile();
-        new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3))
-            .createNewFile();
-
-
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime4 + ".commit").createNewFile();
-
-        // Now we list the entire partition
-        FileStatus[] statuses = HoodieTestUtils.fs.listStatus(new Path(fullPartitionPath));
-        assertEquals(10, statuses.length);
-
-        refreshFsView(statuses);
-
-        List<HoodieFileGroup> fileGroups = fsView
-                .getAllFileGroups(partitionPath)
-                .collect(Collectors.toList());
-        assertEquals(3, fileGroups.size());
-        for (HoodieFileGroup fileGroup: fileGroups) {
-            List<FileSlice> slices = fileGroup.getAllFileSlices().collect(Collectors.toList());
-            if (fileGroup.getId().equals(fileId1)) {
-                assertEquals(2, slices.size());
-                assertEquals(commitTime4, slices.get(0).getBaseCommitTime());
-                assertEquals(commitTime1, slices.get(1).getBaseCommitTime());
-            } else if (fileGroup.getId().equals(fileId2)) {
-                assertEquals(3, slices.size());
-                assertEquals(commitTime3, slices.get(0).getBaseCommitTime());
-                assertEquals(commitTime2, slices.get(1).getBaseCommitTime());
-                assertEquals(commitTime1, slices.get(2).getBaseCommitTime());
-            } else if (fileGroup.getId().equals(fileId3)) {
-                assertEquals(2, slices.size());
-                assertEquals(commitTime4, slices.get(0).getBaseCommitTime());
-                assertEquals(commitTime3, slices.get(1).getBaseCommitTime());
-            }
-        }
-
-        List<HoodieDataFile> statuses1 =
-                roView.getLatestDataFiles().collect(Collectors.toList());
-        assertEquals(3, statuses1.size());
-        Set<String> filenames = Sets.newHashSet();
-        for (HoodieDataFile status : statuses1) {
-            filenames.add(status.getFileName());
-        }
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
-        assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
+    List<HoodieDataFile> statuses1 =
+        roView.getLatestDataFiles().collect(Collectors.toList());
+    assertEquals(3, statuses1.size());
+    Set<String> filenames = Sets.newHashSet();
+    for (HoodieDataFile status : statuses1) {
+      filenames.add(status.getFileName());
     }
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId1)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2)));
+    assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/SchemaTestUtil.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/SchemaTestUtil.java
index e42137391..9af169294 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/SchemaTestUtil.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/SchemaTestUtil.java
@@ -19,111 +19,111 @@ package com.uber.hoodie.common.util;
 import com.uber.hoodie.avro.MercifulJsonConverter;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.file.FileSystem;
 import java.nio.file.FileSystemNotFoundException;
 import java.nio.file.FileSystems;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.avro.io.DecoderFactory;
 
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
 public class SchemaTestUtil {
-    public static Schema getSimpleSchema() throws IOException {
-        return new Schema.Parser()
-            .parse(SchemaTestUtil.class.getResourceAsStream("/simple-test.avro"));
+
+  public static Schema getSimpleSchema() throws IOException {
+    return new Schema.Parser()
+        .parse(SchemaTestUtil.class.getResourceAsStream("/simple-test.avro"));
+  }
+
+  public static List<IndexedRecord> generateTestRecords(int from, int limit)
+      throws IOException, URISyntaxException {
+    return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
+  }
+
+  private static List<IndexedRecord> toRecords(Schema writerSchema, Schema readerSchema, int from,
+      int limit) throws IOException, URISyntaxException {
+    GenericDatumReader<IndexedRecord> reader =
+        new GenericDatumReader<>(writerSchema, readerSchema);
+    // Required to register the necessary JAR:// file system
+    URI resource = SchemaTestUtil.class.getClass().getResource("/sample.data").toURI();
+    Path dataPath;
+    if (resource.toString().contains("!")) {
+      dataPath = uriToPath(resource);
+    } else {
+      dataPath = Paths.get(SchemaTestUtil.class.getClass().getResource("/sample.data").toURI());
     }
 
-    public static List<IndexedRecord> generateTestRecords(int from, int limit)
-        throws IOException, URISyntaxException {
-        return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
-    }
-
-    private static List<IndexedRecord> toRecords(Schema writerSchema, Schema readerSchema, int from,
-        int limit) throws IOException, URISyntaxException {
-        GenericDatumReader<IndexedRecord> reader =
-            new GenericDatumReader<>(writerSchema, readerSchema);
-        // Required to register the necessary JAR:// file system
-        URI resource = SchemaTestUtil.class.getClass().getResource("/sample.data").toURI();
-        Path dataPath;
-        if(resource.toString().contains("!")) {
-            dataPath = uriToPath(resource);
-        } else {
-            dataPath = Paths.get(SchemaTestUtil.class.getClass().getResource("/sample.data").toURI());
-        }
-
-        try (Stream<String> stream = Files.lines(dataPath)) {
-            return stream.skip(from).limit(limit).map(s -> {
-                try {
-                    return reader.read(null, DecoderFactory.get().jsonDecoder(writerSchema, s));
-                } catch (IOException e) {
-                    throw new HoodieIOException("Could not read data from simple_data.json", e);
-                }
-            }).collect(Collectors.toList());
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not read data from simple_data.json", e);
-        }
-    }
-
-    static Path uriToPath(URI uri) throws IOException {
-        final Map<String, String> env = new HashMap<>();
-        final String[] array = uri.toString().split("!");
-        FileSystem fs;
+    try (Stream<String> stream = Files.lines(dataPath)) {
+      return stream.skip(from).limit(limit).map(s -> {
         try {
-            fs = FileSystems.getFileSystem(URI.create(array[0]));
-        } catch (FileSystemNotFoundException e) {
-            fs = FileSystems.newFileSystem(URI.create(array[0]), env);
+          return reader.read(null, DecoderFactory.get().jsonDecoder(writerSchema, s));
+        } catch (IOException e) {
+          throw new HoodieIOException("Could not read data from simple_data.json", e);
         }
-        return fs.getPath(array[1]);
+      }).collect(Collectors.toList());
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not read data from simple_data.json", e);
     }
+  }
 
-    public static List<IndexedRecord> generateHoodieTestRecords(int from, int limit)
-        throws IOException, URISyntaxException {
-        List<IndexedRecord> records = generateTestRecords(from, limit);
-        Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
-        return records.stream()
-            .map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema))
-            .map(p -> {
-                p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString());
-                p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
-                return p;
-            }).collect(
-                Collectors.toList());
-
+  static Path uriToPath(URI uri) throws IOException {
+    final Map<String, String> env = new HashMap<>();
+    final String[] array = uri.toString().split("!");
+    FileSystem fs;
+    try {
+      fs = FileSystems.getFileSystem(URI.create(array[0]));
+    } catch (FileSystemNotFoundException e) {
+      fs = FileSystems.newFileSystem(URI.create(array[0]), env);
     }
+    return fs.getPath(array[1]);
+  }
 
-    public static Schema getEvolvedSchema() throws IOException {
-        return new Schema.Parser()
-            .parse(SchemaTestUtil.class.getResourceAsStream("/simple-test-evolved.avro"));
-    }
+  public static List<IndexedRecord> generateHoodieTestRecords(int from, int limit)
+      throws IOException, URISyntaxException {
+    List<IndexedRecord> records = generateTestRecords(from, limit);
+    Schema hoodieFieldsSchema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
+    return records.stream()
+        .map(s -> HoodieAvroUtils.rewriteRecord((GenericRecord) s, hoodieFieldsSchema))
+        .map(p -> {
+          p.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, UUID.randomUUID().toString());
+          p.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, "0000/00/00");
+          return p;
+        }).collect(
+            Collectors.toList());
 
-    public static List<IndexedRecord> generateEvolvedTestRecords(int from, int limit)
-        throws IOException, URISyntaxException {
-        return toRecords(getSimpleSchema(), getEvolvedSchema(), from, limit);
-    }
+  }
 
-    public static Schema getComplexEvolvedSchema() throws IOException {
-        return new Schema.Parser()
-            .parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avro"));
-    }
+  public static Schema getEvolvedSchema() throws IOException {
+    return new Schema.Parser()
+        .parse(SchemaTestUtil.class.getResourceAsStream("/simple-test-evolved.avro"));
+  }
 
-    public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber,
-                                                           String commitTime, String fileId) throws IOException {
-        TestRecord record = new TestRecord(commitTime, recordNumber, fileId);
-        MercifulJsonConverter converter = new MercifulJsonConverter(schema);
-        return converter.convert(record.toJsonString());
-    }
+  public static List<IndexedRecord> generateEvolvedTestRecords(int from, int limit)
+      throws IOException, URISyntaxException {
+    return toRecords(getSimpleSchema(), getEvolvedSchema(), from, limit);
+  }
+
+  public static Schema getComplexEvolvedSchema() throws IOException {
+    return new Schema.Parser()
+        .parse(SchemaTestUtil.class.getResourceAsStream("/complex-test-evolved.avro"));
+  }
+
+  public static GenericRecord generateAvroRecordFromJson(Schema schema, int recordNumber,
+      String commitTime, String fileId) throws IOException {
+    TestRecord record = new TestRecord(commitTime, recordNumber, fileId);
+    MercifulJsonConverter converter = new MercifulJsonConverter(schema);
+    return converter.convert(record.toJsonString());
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestFSUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestFSUtils.java
index c5d19b50e..edcc1509b 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestFSUtils.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestFSUtils.java
@@ -16,48 +16,47 @@
 
 package com.uber.hoodie.common.util;
 
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
 
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.UUID;
-
-import static org.junit.Assert.assertTrue;
+import org.junit.Test;
 
 public class TestFSUtils {
 
-    @Test
-    public void testMakeDataFileName() {
-        String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
-        int taskPartitionId = 2;
-        String fileName = UUID.randomUUID().toString();
-        assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)
-                .equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet"));
-    }
+  @Test
+  public void testMakeDataFileName() {
+    String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+    int taskPartitionId = 2;
+    String fileName = UUID.randomUUID().toString();
+    assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)
+        .equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet"));
+  }
 
-    @Test
-    public void testMaskFileName() {
-        String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
-        int taskPartitionId = 2;
-        assertTrue(FSUtils.maskWithoutFileId(commitTime, taskPartitionId)
-                .equals("*_" + taskPartitionId + "_" + commitTime + ".parquet"));
-    }
+  @Test
+  public void testMaskFileName() {
+    String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+    int taskPartitionId = 2;
+    assertTrue(FSUtils.maskWithoutFileId(commitTime, taskPartitionId)
+        .equals("*_" + taskPartitionId + "_" + commitTime + ".parquet"));
+  }
 
-    @Test
-    public void testGetCommitTime() {
-        String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
-        int taskPartitionId = 2;
-        String fileName = UUID.randomUUID().toString();
-        String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
-        assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
-    }
+  @Test
+  public void testGetCommitTime() {
+    String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+    int taskPartitionId = 2;
+    String fileName = UUID.randomUUID().toString();
+    String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
+    assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime));
+  }
 
-    @Test
-    public void testGetFileNameWithoutMeta() {
-        String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
-        int taskPartitionId = 2;
-        String fileName = UUID.randomUUID().toString();
-        String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
-        assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
-    }
+  @Test
+  public void testGetFileNameWithoutMeta() {
+    String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
+    int taskPartitionId = 2;
+    String fileName = UUID.randomUUID().toString();
+    String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName);
+    assertTrue(FSUtils.getFileId(fullFileName).equals(fileName));
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestNumericUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestNumericUtils.java
index 99f8a67f2..8a7141869 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestNumericUtils.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestNumericUtils.java
@@ -16,20 +16,22 @@
 
 package com.uber.hoodie.common.util;
 
+import static org.junit.Assert.assertTrue;
+
 import org.junit.Test;
-import static org.junit.Assert.*;
 
 public class TestNumericUtils {
-    @Test
-    public void testHumanReadableByteCount() {
-        assertTrue(NumericUtils.humanReadableByteCount(0).equals("0.0 B"));
-        assertTrue(NumericUtils.humanReadableByteCount(27).equals("27.0 B"));
-        assertTrue(NumericUtils.humanReadableByteCount(1023).equals("1023.0 B"));
-        assertTrue(NumericUtils.humanReadableByteCount(1024).equals("1.0 KB"));
-        assertTrue(NumericUtils.humanReadableByteCount(110592).equals("108.0 KB"));
-        assertTrue(NumericUtils.humanReadableByteCount(28991029248L).equals("27.0 GB"));
-        assertTrue(NumericUtils.humanReadableByteCount(1855425871872L).equals("1.7 TB"));
-        assertTrue(NumericUtils.humanReadableByteCount(9223372036854775807L).equals("8.0 EB"));
 
-    }
+  @Test
+  public void testHumanReadableByteCount() {
+    assertTrue(NumericUtils.humanReadableByteCount(0).equals("0.0 B"));
+    assertTrue(NumericUtils.humanReadableByteCount(27).equals("27.0 B"));
+    assertTrue(NumericUtils.humanReadableByteCount(1023).equals("1023.0 B"));
+    assertTrue(NumericUtils.humanReadableByteCount(1024).equals("1.0 KB"));
+    assertTrue(NumericUtils.humanReadableByteCount(110592).equals("108.0 KB"));
+    assertTrue(NumericUtils.humanReadableByteCount(28991029248L).equals("27.0 GB"));
+    assertTrue(NumericUtils.humanReadableByteCount(1855425871872L).equals("1.7 TB"));
+    assertTrue(NumericUtils.humanReadableByteCount(9223372036854775807L).equals("8.0 EB"));
+
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestParquetUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestParquetUtils.java
index 4145ed6ed..266cb1158 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestParquetUtils.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestParquetUtils.java
@@ -16,10 +16,17 @@
 
 package com.uber.hoodie.common.util;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.BloomFilter;
 import com.uber.hoodie.common.model.HoodieRecord;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
@@ -31,62 +38,55 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.UUID;
-
-import static org.junit.Assert.*;
-
 public class TestParquetUtils {
 
 
-    private String basePath;
+  private String basePath;
 
-    @Before
-    public void setup() throws IOException {
-        // Create a temp folder as the base path
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        basePath = folder.getRoot().getAbsolutePath();
+  @Before
+  public void setup() throws IOException {
+    // Create a temp folder as the base path
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    basePath = folder.getRoot().getAbsolutePath();
+  }
+
+  @Test
+  public void testHoodieWriteSupport() throws Exception {
+
+    List<String> rowKeys = new ArrayList<>();
+    for (int i = 0; i < 1000; i++) {
+      rowKeys.add(UUID.randomUUID().toString());
     }
 
-    @Test
-    public void testHoodieWriteSupport() throws Exception {
+    // Write out a parquet file
+    Schema schema = HoodieAvroUtils.getRecordKeySchema();
+    BloomFilter filter = new BloomFilter(1000, 0.0001);
+    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
+        new AvroSchemaConverter().convert(schema), schema, filter);
 
-        List<String> rowKeys = new ArrayList<>();
-        for (int i = 0; i < 1000; i++) {
-            rowKeys.add(UUID.randomUUID().toString());
-        }
-
-        // Write out a parquet file
-        Schema schema = HoodieAvroUtils.getRecordKeySchema();
-        BloomFilter filter = new BloomFilter(1000, 0.0001);
-        HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
-
-
-        String filePath = basePath + "/test.parquet";
-        ParquetWriter writer = new ParquetWriter(new Path(filePath),
-                writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024, ParquetWriter.DEFAULT_PAGE_SIZE);
-        for (String rowKey : rowKeys) {
-            GenericRecord rec = new GenericData.Record(schema);
-            rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey);
-            writer.write(rec);
-            filter.add(rowKey);
-        }
-        writer.close();
-
-
-        // Read and verify
-        List<String> rowKeysInFile = new ArrayList<>(ParquetUtils.readRowKeysFromParquet(new Path(filePath)));
-        Collections.sort(rowKeysInFile);
-        Collections.sort(rowKeys);
-
-        assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile);
-        BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(new Path(filePath));
-        for (String rowKey : rowKeys) {
-            assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey));
-        }
+    String filePath = basePath + "/test.parquet";
+    ParquetWriter writer = new ParquetWriter(new Path(filePath),
+        writeSupport, CompressionCodecName.GZIP, 120 * 1024 * 1024,
+        ParquetWriter.DEFAULT_PAGE_SIZE);
+    for (String rowKey : rowKeys) {
+      GenericRecord rec = new GenericData.Record(schema);
+      rec.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, rowKey);
+      writer.write(rec);
+      filter.add(rowKey);
     }
+    writer.close();
+
+    // Read and verify
+    List<String> rowKeysInFile = new ArrayList<>(
+        ParquetUtils.readRowKeysFromParquet(new Path(filePath)));
+    Collections.sort(rowKeysInFile);
+    Collections.sort(rowKeys);
+
+    assertEquals("Did not read back the expected list of keys", rowKeys, rowKeysInFile);
+    BloomFilter filterInFile = ParquetUtils.readBloomFilterFromParquetMetadata(new Path(filePath));
+    for (String rowKey : rowKeys) {
+      assertTrue("key should be found in bloom filter", filterInFile.mightContain(rowKey));
+    }
+  }
 }
diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestRecord.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestRecord.java
index 7852749a0..5df7f3252 100644
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestRecord.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestRecord.java
@@ -17,82 +17,87 @@
 package com.uber.hoodie.common.util;
 
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import org.codehaus.jackson.annotate.JsonAutoDetect;
-import org.codehaus.jackson.annotate.JsonMethod;
-import org.codehaus.jackson.map.ObjectMapper;
-
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
+import org.codehaus.jackson.annotate.JsonAutoDetect;
+import org.codehaus.jackson.annotate.JsonMethod;
+import org.codehaus.jackson.map.ObjectMapper;
 
 @JsonIgnoreProperties(ignoreUnknown = true)
 @SuppressWarnings({"unused", "FieldCanBeLocal", "MismatchedQueryAndUpdateOfCollection"})
 public class TestRecord implements Serializable {
-    class TestMapItemRecord implements Serializable {
-        private String item1;
-        private String item2;
 
-        TestMapItemRecord(String item1, String item2) {
-            this.item1 = item1;
-            this.item2 = item2;
-        }
+  class TestMapItemRecord implements Serializable {
+
+    private String item1;
+    private String item2;
+
+    TestMapItemRecord(String item1, String item2) {
+      this.item1 = item1;
+      this.item2 = item2;
     }
+  }
 
-    class TestNestedRecord implements Serializable {
-        private boolean isAdmin;
-        private String userId;
+  class TestNestedRecord implements Serializable {
 
-        TestNestedRecord(boolean isAdmin, String userId) {
-            this.isAdmin = isAdmin;
-            this.userId = userId;
-        }
+    private boolean isAdmin;
+    private String userId;
+
+    TestNestedRecord(boolean isAdmin, String userId) {
+      this.isAdmin = isAdmin;
+      this.userId = userId;
     }
+  }
 
-    private String _hoodie_commit_time;
-    private String _hoodie_record_key;
-    private String _hoodie_partition_path;
-    private String _hoodie_file_name;
-    private String _hoodie_commit_seqno;
+  private String _hoodie_commit_time;
+  private String _hoodie_record_key;
+  private String _hoodie_partition_path;
+  private String _hoodie_file_name;
+  private String _hoodie_commit_seqno;
 
-    private String field1;
-    private String field2;
-    private String name;
-    private Integer favoriteIntNumber;
-    private Long favoriteNumber;
-    private Float favoriteFloatNumber;
-    private Double favoriteDoubleNumber;
-    private Map<String, TestMapItemRecord> tags;
-    private TestNestedRecord testNestedRecord;
-    private String[] stringArray;
+  private String field1;
+  private String field2;
+  private String name;
+  private Integer favoriteIntNumber;
+  private Long favoriteNumber;
+  private Float favoriteFloatNumber;
+  private Double favoriteDoubleNumber;
+  private Map<String, TestMapItemRecord> tags;
+  private TestNestedRecord testNestedRecord;
+  private String[] stringArray;
 
-    public TestRecord(String commitTime, int recordNumber, String fileId) {
-        this._hoodie_commit_time = commitTime;
-        this._hoodie_record_key = "key" + recordNumber;
-        this._hoodie_partition_path = commitTime;
-        this._hoodie_file_name = fileId;
-        this._hoodie_commit_seqno = commitTime + recordNumber;
+  public TestRecord(String commitTime, int recordNumber, String fileId) {
+    this._hoodie_commit_time = commitTime;
+    this._hoodie_record_key = "key" + recordNumber;
+    this._hoodie_partition_path = commitTime;
+    this._hoodie_file_name = fileId;
+    this._hoodie_commit_seqno = commitTime + recordNumber;
 
-        String commitTimeSuffix = "@" + commitTime;
-        int commitHashCode = commitTime.hashCode();
+    String commitTimeSuffix = "@" + commitTime;
+    int commitHashCode = commitTime.hashCode();
 
-        this.field1 = "field" + recordNumber;
-        this.field2 = "field" + recordNumber + commitTimeSuffix;
-        this.name = "name" + recordNumber;
-        this.favoriteIntNumber = recordNumber + commitHashCode;
-        this.favoriteNumber = (long)(recordNumber + commitHashCode);
-        this.favoriteFloatNumber = (float)((recordNumber + commitHashCode) / 1024.0);
-        this.favoriteDoubleNumber = (recordNumber + commitHashCode) / 1024.0;
-        this.tags = new HashMap<>();
-        this.tags.put("mapItem1", new TestMapItemRecord("item" + recordNumber, "item" + recordNumber + commitTimeSuffix));
-        this.tags.put("mapItem2", new TestMapItemRecord("item2" + recordNumber, "item2" + recordNumber + commitTimeSuffix));
-        this.testNestedRecord = new TestNestedRecord(false, "UserId" + recordNumber + commitTimeSuffix);
-        this.stringArray = new String[]{"stringArray0" + commitTimeSuffix, "stringArray1" + commitTimeSuffix};
-    }
+    this.field1 = "field" + recordNumber;
+    this.field2 = "field" + recordNumber + commitTimeSuffix;
+    this.name = "name" + recordNumber;
+    this.favoriteIntNumber = recordNumber + commitHashCode;
+    this.favoriteNumber = (long) (recordNumber + commitHashCode);
+    this.favoriteFloatNumber = (float) ((recordNumber + commitHashCode) / 1024.0);
+    this.favoriteDoubleNumber = (recordNumber + commitHashCode) / 1024.0;
+    this.tags = new HashMap<>();
+    this.tags.put("mapItem1",
+        new TestMapItemRecord("item" + recordNumber, "item" + recordNumber + commitTimeSuffix));
+    this.tags.put("mapItem2",
+        new TestMapItemRecord("item2" + recordNumber, "item2" + recordNumber + commitTimeSuffix));
+    this.testNestedRecord = new TestNestedRecord(false, "UserId" + recordNumber + commitTimeSuffix);
+    this.stringArray = new String[]{"stringArray0" + commitTimeSuffix,
+        "stringArray1" + commitTimeSuffix};
+  }
 
-    public String toJsonString() throws IOException {
-        ObjectMapper mapper = new ObjectMapper();
-        mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
-        return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(this);
-    }
+  public String toJsonString() throws IOException {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
+    return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(this);
+  }
 }
diff --git a/hoodie-common/src/test/resources/log4j-surefire.properties b/hoodie-common/src/test/resources/log4j-surefire.properties
index 017045b23..ea3e93545 100644
--- a/hoodie-common/src/test/resources/log4j-surefire.properties
+++ b/hoodie-common/src/test/resources/log4j-surefire.properties
@@ -13,13 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 log4j.rootLogger=WARN, A1
 log4j.category.com.uber=INFO
 log4j.category.com.uber.hoodie.table.log=WARN
 log4j.category.com.uber.hoodie.common.util=WARN
 log4j.category.org.apache.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/hoodie-hadoop-mr/pom.xml b/hoodie-hadoop-mr/pom.xml
index fe91cc91b..73ee95e51 100644
--- a/hoodie-hadoop-mr/pom.xml
+++ b/hoodie-hadoop-mr/pom.xml
@@ -15,7 +15,9 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <parent>
     <artifactId>hoodie</artifactId>
     <groupId>com.uber.hoodie</groupId>
@@ -107,7 +109,8 @@
               <goal>shade</goal>
             </goals>
             <configuration>
-              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml</dependencyReducedPomLocation>
+              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
+              </dependencyReducedPomLocation>
               <minimizeJar>true</minimizeJar>
               <artifactSet>
                 <includes>
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieHiveUtil.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieHiveUtil.java
index 3eed58d67..12b4abf40 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieHiveUtil.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieHiveUtil.java
@@ -22,47 +22,48 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 public class HoodieHiveUtil {
-    public static final Logger LOG =
-        LogManager.getLogger(HoodieHiveUtil.class);
 
-    public static final String HOODIE_CONSUME_MODE_PATTERN = "hoodie.%s.consume.mode";
-    public static final String HOODIE_START_COMMIT_PATTERN = "hoodie.%s.consume.start.timestamp";
-    public static final String HOODIE_MAX_COMMIT_PATTERN = "hoodie.%s.consume.max.commits";
-    public static final String INCREMENTAL_SCAN_MODE = "INCREMENTAL";
-    public static final String LATEST_SCAN_MODE = "LATEST";
-    public static final String DEFAULT_SCAN_MODE = LATEST_SCAN_MODE;
-    public static final int DEFAULT_MAX_COMMITS = 1;
-    public static final int MAX_COMMIT_ALL = -1;
-    public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
+  public static final Logger LOG =
+      LogManager.getLogger(HoodieHiveUtil.class);
 
-    public static Integer readMaxCommits(JobContext job, String tableName) {
-        String maxCommitName = String.format(HOODIE_MAX_COMMIT_PATTERN, tableName);
-        int maxCommits = job.getConfiguration().getInt(maxCommitName, DEFAULT_MAX_COMMITS);
-        if (maxCommits == MAX_COMMIT_ALL) {
-            maxCommits = Integer.MAX_VALUE;
-        }
-        LOG.info("Read max commits - " + maxCommits);
-        return maxCommits;
+  public static final String HOODIE_CONSUME_MODE_PATTERN = "hoodie.%s.consume.mode";
+  public static final String HOODIE_START_COMMIT_PATTERN = "hoodie.%s.consume.start.timestamp";
+  public static final String HOODIE_MAX_COMMIT_PATTERN = "hoodie.%s.consume.max.commits";
+  public static final String INCREMENTAL_SCAN_MODE = "INCREMENTAL";
+  public static final String LATEST_SCAN_MODE = "LATEST";
+  public static final String DEFAULT_SCAN_MODE = LATEST_SCAN_MODE;
+  public static final int DEFAULT_MAX_COMMITS = 1;
+  public static final int MAX_COMMIT_ALL = -1;
+  public static final int DEFAULT_LEVELS_TO_BASEPATH = 3;
+
+  public static Integer readMaxCommits(JobContext job, String tableName) {
+    String maxCommitName = String.format(HOODIE_MAX_COMMIT_PATTERN, tableName);
+    int maxCommits = job.getConfiguration().getInt(maxCommitName, DEFAULT_MAX_COMMITS);
+    if (maxCommits == MAX_COMMIT_ALL) {
+      maxCommits = Integer.MAX_VALUE;
     }
+    LOG.info("Read max commits - " + maxCommits);
+    return maxCommits;
+  }
 
-    public static String readStartCommitTime(JobContext job, String tableName) {
-        String startCommitTimestampName = String.format(HOODIE_START_COMMIT_PATTERN, tableName);
-        LOG.info("Read start commit time - " + job.getConfiguration().get(startCommitTimestampName));
-        return job.getConfiguration().get(startCommitTimestampName);
-    }
+  public static String readStartCommitTime(JobContext job, String tableName) {
+    String startCommitTimestampName = String.format(HOODIE_START_COMMIT_PATTERN, tableName);
+    LOG.info("Read start commit time - " + job.getConfiguration().get(startCommitTimestampName));
+    return job.getConfiguration().get(startCommitTimestampName);
+  }
 
-    public static String readMode(JobContext job, String tableName) {
-        String modePropertyName = String.format(HOODIE_CONSUME_MODE_PATTERN, tableName);
-        String mode =job.getConfiguration().get(modePropertyName, DEFAULT_SCAN_MODE);
-        LOG.info(modePropertyName + ": " + mode);
-        return mode;
-    }
+  public static String readMode(JobContext job, String tableName) {
+    String modePropertyName = String.format(HOODIE_CONSUME_MODE_PATTERN, tableName);
+    String mode = job.getConfiguration().get(modePropertyName, DEFAULT_SCAN_MODE);
+    LOG.info(modePropertyName + ": " + mode);
+    return mode;
+  }
 
-    public static Path getNthParent(Path path, int n) {
-        Path parent = path;
-        for (int i = 0; i < n; i++) {
-            parent = parent.getParent();
-        }
-        return parent;
+  public static Path getNthParent(Path path, int n) {
+    Path parent = path;
+    for (int i = 0; i < n; i++) {
+      parent = parent.getParent();
     }
+    return parent;
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java
index 2b30a33b3..03e86d3bd 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieInputFormat.java
@@ -16,6 +16,10 @@
 
 package com.uber.hoodie.hadoop;
 
+import static parquet.filter2.predicate.FilterApi.and;
+import static parquet.filter2.predicate.FilterApi.binaryColumn;
+import static parquet.filter2.predicate.FilterApi.gt;
+
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.model.HoodieRecord;
@@ -26,6 +30,12 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.InvalidDatasetException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
@@ -54,156 +64,151 @@ import parquet.hadoop.metadata.FileMetaData;
 import parquet.hadoop.metadata.ParquetMetadata;
 import parquet.io.api.Binary;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import static parquet.filter2.predicate.FilterApi.and;
-import static parquet.filter2.predicate.FilterApi.binaryColumn;
-import static parquet.filter2.predicate.FilterApi.gt;
-
 /**
- * HoodieInputFormat which understands the Hoodie File Structure and filters
- * files based on the Hoodie Mode. If paths that does not correspond to a hoodie dataset
- * then they are passed in as is (as what FileInputFormat.listStatus() would do).
- * The JobConf could have paths from multipe Hoodie/Non-Hoodie datasets
+ * HoodieInputFormat which understands the Hoodie File Structure and filters files based on the
+ * Hoodie Mode. If paths that does not correspond to a hoodie dataset then they are passed in as is
+ * (as what FileInputFormat.listStatus() would do). The JobConf could have paths from multipe
+ * Hoodie/Non-Hoodie datasets
  */
 @UseFileSplitsFromInputFormat
 public class HoodieInputFormat extends MapredParquetInputFormat
     implements Configurable {
 
-    public static final Log LOG = LogFactory.getLog(HoodieInputFormat.class);
+  public static final Log LOG = LogFactory.getLog(HoodieInputFormat.class);
 
-    protected Configuration conf;
+  protected Configuration conf;
 
-    @Override
-    public FileStatus[] listStatus(JobConf job) throws IOException {
-        // Get all the file status from FileInputFormat and then do the filter
-        FileStatus[] fileStatuses = super.listStatus(job);
-        Map<HoodieTableMetaClient, List<FileStatus>> groupedFileStatus = groupFileStatus(fileStatuses);
-        LOG.info("Found a total of " + groupedFileStatus.size() + " groups");
-        List<FileStatus> returns = new ArrayList<>();
-        for(Map.Entry<HoodieTableMetaClient, List<FileStatus>> entry: groupedFileStatus.entrySet()) {
-            HoodieTableMetaClient metadata = entry.getKey();
-            if (metadata == null) {
-                // Add all the paths which are not hoodie specific
-                returns.addAll(entry.getValue());
-                continue;
-            }
+  @Override
+  public FileStatus[] listStatus(JobConf job) throws IOException {
+    // Get all the file status from FileInputFormat and then do the filter
+    FileStatus[] fileStatuses = super.listStatus(job);
+    Map<HoodieTableMetaClient, List<FileStatus>> groupedFileStatus = groupFileStatus(fileStatuses);
+    LOG.info("Found a total of " + groupedFileStatus.size() + " groups");
+    List<FileStatus> returns = new ArrayList<>();
+    for (Map.Entry<HoodieTableMetaClient, List<FileStatus>> entry : groupedFileStatus.entrySet()) {
+      HoodieTableMetaClient metadata = entry.getKey();
+      if (metadata == null) {
+        // Add all the paths which are not hoodie specific
+        returns.addAll(entry.getValue());
+        continue;
+      }
 
-            FileStatus[] statuses = entry.getValue().toArray(new FileStatus[entry.getValue().size()]);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Hoodie Metadata initialized with completed commit Ts as :" + metadata);
-            }
-            String tableName = metadata.getTableConfig().getTableName();
-            String mode = HoodieHiveUtil.readMode(Job.getInstance(job), tableName);
-            // Get all commits, delta commits, compactions, as all of them produce a base parquet file today
-            HoodieTimeline timeline = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants();
-            TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metadata, timeline, statuses);
+      FileStatus[] statuses = entry.getValue().toArray(new FileStatus[entry.getValue().size()]);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Hoodie Metadata initialized with completed commit Ts as :" + metadata);
+      }
+      String tableName = metadata.getTableConfig().getTableName();
+      String mode = HoodieHiveUtil.readMode(Job.getInstance(job), tableName);
+      // Get all commits, delta commits, compactions, as all of them produce a base parquet file today
+      HoodieTimeline timeline = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline()
+          .filterCompletedInstants();
+      TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metadata,
+          timeline, statuses);
 
-            if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
-                // this is of the form commitTs_partition_sequenceNumber
-                String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
-                // Total number of commits to return in this batch. Set this to -1 to get all the commits.
-                Integer maxCommits = HoodieHiveUtil.readMaxCommits(Job.getInstance(job), tableName);
-                LOG.info("Last Incremental timestamp was set as " + lastIncrementalTs);
-                List<String> commitsToReturn =
-                    timeline.findInstantsAfter(lastIncrementalTs, maxCommits).getInstants()
-                        .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
-                List<HoodieDataFile> filteredFiles = roView
-                        .getLatestDataFilesInRange(commitsToReturn)
-                        .collect(Collectors.toList());
-                for (HoodieDataFile filteredFile : filteredFiles) {
-                    LOG.info("Processing incremental hoodie file - " + filteredFile.getPath());
-                    filteredFile = checkFileStatus(filteredFile);
-                    returns.add(filteredFile.getFileStatus());
-                }
-                LOG.info(
-                    "Total paths to process after hoodie incremental filter " + filteredFiles.size());
-            } else {
-                // filter files on the latest commit found
-                List<HoodieDataFile> filteredFiles = roView.getLatestDataFiles().collect(Collectors.toList());
-                LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
-                for (HoodieDataFile filteredFile : filteredFiles) {
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug("Processing latest hoodie file - " + filteredFile.getPath());
-                    }
-                    filteredFile = checkFileStatus(filteredFile);
-                    returns.add(filteredFile.getFileStatus());
-                }
-            }
+      if (HoodieHiveUtil.INCREMENTAL_SCAN_MODE.equals(mode)) {
+        // this is of the form commitTs_partition_sequenceNumber
+        String lastIncrementalTs = HoodieHiveUtil
+            .readStartCommitTime(Job.getInstance(job), tableName);
+        // Total number of commits to return in this batch. Set this to -1 to get all the commits.
+        Integer maxCommits = HoodieHiveUtil.readMaxCommits(Job.getInstance(job), tableName);
+        LOG.info("Last Incremental timestamp was set as " + lastIncrementalTs);
+        List<String> commitsToReturn =
+            timeline.findInstantsAfter(lastIncrementalTs, maxCommits).getInstants()
+                .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+        List<HoodieDataFile> filteredFiles = roView
+            .getLatestDataFilesInRange(commitsToReturn)
+            .collect(Collectors.toList());
+        for (HoodieDataFile filteredFile : filteredFiles) {
+          LOG.info("Processing incremental hoodie file - " + filteredFile.getPath());
+          filteredFile = checkFileStatus(filteredFile);
+          returns.add(filteredFile.getFileStatus());
         }
-        return returns.toArray(new FileStatus[returns.size()]);
-
+        LOG.info(
+            "Total paths to process after hoodie incremental filter " + filteredFiles.size());
+      } else {
+        // filter files on the latest commit found
+        List<HoodieDataFile> filteredFiles = roView.getLatestDataFiles()
+            .collect(Collectors.toList());
+        LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
+        for (HoodieDataFile filteredFile : filteredFiles) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Processing latest hoodie file - " + filteredFile.getPath());
+          }
+          filteredFile = checkFileStatus(filteredFile);
+          returns.add(filteredFile.getFileStatus());
+        }
+      }
     }
+    return returns.toArray(new FileStatus[returns.size()]);
 
-    /**
-     * Checks the file status for a race condition which can set the file size to 0. 1.
-     * HiveInputFormat does super.listStatus() and gets back a FileStatus[] 2. Then it creates the
-     * HoodieTableMetaClient for the paths listed. 3. Generation of splits looks at FileStatus size
-     * to create splits, which skips this file
-     */
-    private HoodieDataFile checkFileStatus(HoodieDataFile dataFile) throws IOException {
-        Path dataPath = dataFile.getFileStatus().getPath();
+  }
+
+  /**
+   * Checks the file status for a race condition which can set the file size to 0. 1.
+   * HiveInputFormat does super.listStatus() and gets back a FileStatus[] 2. Then it creates the
+   * HoodieTableMetaClient for the paths listed. 3. Generation of splits looks at FileStatus size to
+   * create splits, which skips this file
+   */
+  private HoodieDataFile checkFileStatus(HoodieDataFile dataFile) throws IOException {
+    Path dataPath = dataFile.getFileStatus().getPath();
+    try {
+      if (dataFile.getFileSize() == 0) {
+        FileSystem fs = dataPath.getFileSystem(conf);
+        LOG.info("Refreshing file status " + dataFile.getPath());
+        return new HoodieDataFile(fs.getFileStatus(dataPath));
+      }
+      return dataFile;
+    } catch (IOException e) {
+      throw new HoodieIOException("Could not get FileStatus on path " + dataPath);
+    }
+  }
+
+  private Map<HoodieTableMetaClient, List<FileStatus>> groupFileStatus(FileStatus[] fileStatuses)
+      throws IOException {
+    // This assumes the paths for different tables are grouped together
+    Map<HoodieTableMetaClient, List<FileStatus>> grouped = new HashMap<>();
+    HoodieTableMetaClient metadata = null;
+    String nonHoodieBasePath = null;
+    for (FileStatus status : fileStatuses) {
+      if (!status.getPath().getName().endsWith(".parquet")) {
+        //FIXME(vc): skip non parquet files for now. This wont be needed once log file name start with "."
+        continue;
+      }
+      if ((metadata == null && nonHoodieBasePath == null) || (metadata == null && !status.getPath()
+          .toString()
+          .contains(nonHoodieBasePath)) || (metadata != null && !status.getPath().toString()
+          .contains(metadata.getBasePath()))) {
         try {
-            if (dataFile.getFileSize() == 0) {
-                FileSystem fs = dataPath.getFileSystem(conf);
-                LOG.info("Refreshing file status " + dataFile.getPath());
-                return new HoodieDataFile(fs.getFileStatus(dataPath));
-            }
-            return dataFile;
-        } catch (IOException e) {
-            throw new HoodieIOException("Could not get FileStatus on path " + dataPath);
+          metadata = getTableMetaClient(status.getPath().getFileSystem(conf),
+              status.getPath().getParent());
+          nonHoodieBasePath = null;
+        } catch (InvalidDatasetException e) {
+          LOG.info("Handling a non-hoodie path " + status.getPath());
+          metadata = null;
+          nonHoodieBasePath =
+              status.getPath().getParent().toString();
         }
-    }
-
-    private Map<HoodieTableMetaClient, List<FileStatus>> groupFileStatus(FileStatus[] fileStatuses)
-        throws IOException {
-        // This assumes the paths for different tables are grouped together
-        Map<HoodieTableMetaClient, List<FileStatus>> grouped = new HashMap<>();
-        HoodieTableMetaClient metadata = null;
-        String nonHoodieBasePath = null;
-        for(FileStatus status: fileStatuses) {
-            if (!status.getPath().getName().endsWith(".parquet")) {
-                //FIXME(vc): skip non parquet files for now. This wont be needed once log file name start with "."
-                continue;
-            }
-            if ((metadata == null && nonHoodieBasePath == null) || (metadata == null && !status.getPath().toString()
-                .contains(nonHoodieBasePath)) || (metadata != null && !status.getPath().toString()
-                .contains(metadata.getBasePath()))) {
-                try {
-                    metadata = getTableMetaClient(status.getPath().getFileSystem(conf), status.getPath().getParent());
-                    nonHoodieBasePath = null;
-                } catch (InvalidDatasetException e) {
-                    LOG.info("Handling a non-hoodie path " + status.getPath());
-                    metadata = null;
-                    nonHoodieBasePath =
-                        status.getPath().getParent().toString();
-                }
-                if(!grouped.containsKey(metadata)) {
-                    grouped.put(metadata, new ArrayList<>());
-                }
-            }
-            grouped.get(metadata).add(status);
+        if (!grouped.containsKey(metadata)) {
+          grouped.put(metadata, new ArrayList<>());
         }
-        return grouped;
+      }
+      grouped.get(metadata).add(status);
     }
+    return grouped;
+  }
 
-    public void setConf(Configuration conf) {
-        this.conf = conf;
-    }
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
 
-    public Configuration getConf() {
-        return conf;
-    }
+  public Configuration getConf() {
+    return conf;
+  }
 
-    @Override
-    public RecordReader<Void, ArrayWritable> getRecordReader(final InputSplit split,
-        final JobConf job, final Reporter reporter) throws IOException {
-        // TODO enable automatic predicate pushdown after fixing issues
+  @Override
+  public RecordReader<Void, ArrayWritable> getRecordReader(final InputSplit split,
+      final JobConf job, final Reporter reporter) throws IOException {
+    // TODO enable automatic predicate pushdown after fixing issues
 //        FileSplit fileSplit = (FileSplit) split;
 //        HoodieTableMetadata metadata = getTableMetadata(fileSplit.getPath().getParent());
 //        String tableName = metadata.getTableName();
@@ -213,91 +218,83 @@ public class HoodieInputFormat extends MapredParquetInputFormat
 //            FilterPredicate predicate = constructHoodiePredicate(job, tableName, split);
 //            LOG.info("Setting parquet predicate push down as " + predicate);
 //            ParquetInputFormat.setFilterPredicate(job, predicate);
-            //clearOutExistingPredicate(job);
+    //clearOutExistingPredicate(job);
 //        }
-        return super.getRecordReader(split, job, reporter);
-    }
+    return super.getRecordReader(split, job, reporter);
+  }
 
-    /**
-     * Clears out the filter expression (if this is not done, then ParquetReader will override the FilterPredicate set)
-     *
-     * @param job
-     */
-    private void clearOutExistingPredicate(JobConf job) {
-        job.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
-    }
+  /**
+   * Clears out the filter expression (if this is not done, then ParquetReader will override the
+   * FilterPredicate set)
+   */
+  private void clearOutExistingPredicate(JobConf job) {
+    job.unset(TableScanDesc.FILTER_EXPR_CONF_STR);
+  }
 
-    /**
-     * Constructs the predicate to push down to parquet storage.
-     * This creates the predicate for `hoodie_commit_time` > 'start_commit_time' and ANDs with the existing predicate if one is present already.
-     *
-     * @param job
-     * @param tableName
-     * @return
-     */
-    private FilterPredicate constructHoodiePredicate(JobConf job,
-                                                     String tableName,
-                                                     InputSplit split) throws IOException {
-        FilterPredicate commitTimePushdown = constructCommitTimePushdownPredicate(job, tableName);
-        LOG.info("Commit time predicate - " + commitTimePushdown.toString());
-        FilterPredicate existingPushdown = constructHQLPushdownPredicate(job, split);
-        LOG.info("Existing predicate - " + existingPushdown);
+  /**
+   * Constructs the predicate to push down to parquet storage. This creates the predicate for
+   * `hoodie_commit_time` > 'start_commit_time' and ANDs with the existing predicate if one is
+   * present already.
+   */
+  private FilterPredicate constructHoodiePredicate(JobConf job,
+      String tableName,
+      InputSplit split) throws IOException {
+    FilterPredicate commitTimePushdown = constructCommitTimePushdownPredicate(job, tableName);
+    LOG.info("Commit time predicate - " + commitTimePushdown.toString());
+    FilterPredicate existingPushdown = constructHQLPushdownPredicate(job, split);
+    LOG.info("Existing predicate - " + existingPushdown);
 
-        FilterPredicate hoodiePredicate;
-        if (existingPushdown != null) {
-            hoodiePredicate = and(existingPushdown, commitTimePushdown);
-        } else {
-            hoodiePredicate = commitTimePushdown;
-        }
-        LOG.info("Hoodie Predicate - " + hoodiePredicate);
-        return hoodiePredicate;
+    FilterPredicate hoodiePredicate;
+    if (existingPushdown != null) {
+      hoodiePredicate = and(existingPushdown, commitTimePushdown);
+    } else {
+      hoodiePredicate = commitTimePushdown;
     }
+    LOG.info("Hoodie Predicate - " + hoodiePredicate);
+    return hoodiePredicate;
+  }
 
-    private FilterPredicate constructHQLPushdownPredicate(JobConf job, InputSplit split)
-        throws IOException {
-        String serializedPushdown = job.get(TableScanDesc.FILTER_EXPR_CONF_STR);
-        String columnNamesString = job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
-        if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty()
-            || columnNamesString.isEmpty()) {
-            return null;
-        } else {
-            SearchArgument sarg =
-                SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown));
-            final Path finalPath = ((FileSplit) split).getPath();
-            final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(job, finalPath);
-            final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
-            return ParquetFilterPredicateConverter
-                .toFilterPredicate(sarg, fileMetaData.getSchema());
-        }
+  private FilterPredicate constructHQLPushdownPredicate(JobConf job, InputSplit split)
+      throws IOException {
+    String serializedPushdown = job.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+    String columnNamesString = job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+    if (serializedPushdown == null || columnNamesString == null || serializedPushdown.isEmpty()
+        || columnNamesString.isEmpty()) {
+      return null;
+    } else {
+      SearchArgument sarg =
+          SearchArgumentFactory.create(Utilities.deserializeExpression(serializedPushdown));
+      final Path finalPath = ((FileSplit) split).getPath();
+      final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(job, finalPath);
+      final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
+      return ParquetFilterPredicateConverter
+          .toFilterPredicate(sarg, fileMetaData.getSchema());
     }
+  }
 
-    private FilterPredicate constructCommitTimePushdownPredicate(JobConf job, String tableName)
-        throws IOException {
-        String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
-        Operators.BinaryColumn sequenceColumn =
-            binaryColumn(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
-        FilterPredicate p = gt(sequenceColumn, Binary.fromString(lastIncrementalTs));
-        LOG.info("Setting predicate in InputFormat " + p.toString());
-        return p;
-    }
+  private FilterPredicate constructCommitTimePushdownPredicate(JobConf job, String tableName)
+      throws IOException {
+    String lastIncrementalTs = HoodieHiveUtil.readStartCommitTime(Job.getInstance(job), tableName);
+    Operators.BinaryColumn sequenceColumn =
+        binaryColumn(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+    FilterPredicate p = gt(sequenceColumn, Binary.fromString(lastIncrementalTs));
+    LOG.info("Setting predicate in InputFormat " + p.toString());
+    return p;
+  }
 
-    /**
-     * Read the table metadata from a data path. This assumes certain hierarchy of files which
-     * should be changed once a better way is figured out to pass in the hoodie meta directory
-     *
-     * @param dataPath
-     * @return
-     * @throws IOException
-     */
-    protected static HoodieTableMetaClient getTableMetaClient(FileSystem fs, Path dataPath) {
-        int levels = HoodieHiveUtil.DEFAULT_LEVELS_TO_BASEPATH;
-        if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) {
-            HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath);
-            metadata.readFromFS();
-            levels = metadata.getPartitionDepth();
-        }
-        Path baseDir = HoodieHiveUtil.getNthParent(dataPath, levels);
-        LOG.info("Reading hoodie metadata from path " + baseDir.toString());
-        return new HoodieTableMetaClient(fs, baseDir.toString());
+  /**
+   * Read the table metadata from a data path. This assumes certain hierarchy of files which should
+   * be changed once a better way is figured out to pass in the hoodie meta directory
+   */
+  protected static HoodieTableMetaClient getTableMetaClient(FileSystem fs, Path dataPath) {
+    int levels = HoodieHiveUtil.DEFAULT_LEVELS_TO_BASEPATH;
+    if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) {
+      HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath);
+      metadata.readFromFS();
+      levels = metadata.getPartitionDepth();
     }
+    Path baseDir = HoodieHiveUtil.getNthParent(dataPath, levels);
+    LOG.info("Reading hoodie metadata from path " + baseDir.toString());
+    return new HoodieTableMetaClient(fs, baseDir.toString());
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java
index 769bc4d52..c8ffbcc9a 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/HoodieROTablePathFilter.java
@@ -21,7 +21,11 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.exception.DatasetNotFoundException;
 import com.uber.hoodie.exception.HoodieException;
-
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -29,150 +33,142 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 
-import java.io.Serializable;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.stream.Collectors;
-
 /**
- * Given a path is a part of
- * - Hoodie dataset = accepts ONLY the latest version of each path
- * - Non-Hoodie dataset = then always accept
+ * Given a path is a part of - Hoodie dataset = accepts ONLY the latest version of each path -
+ * Non-Hoodie dataset = then always accept
  *
  * We can set this filter, on a query engine's Hadoop Config and if it respects path filters, then
  * you should be able to query both hoodie and non-hoodie datasets as you would normally do.
  *
- * hadoopConf.setClass("mapreduce.input.pathFilter.class",
- *                      com.uber.hoodie.hadoop.HoodieROTablePathFilter.class,
- *                      org.apache.hadoop.fs.PathFilter.class)
- *
+ * hadoopConf.setClass("mapreduce.input.pathFilter.class", com.uber.hoodie.hadoop.HoodieROTablePathFilter.class,
+ * org.apache.hadoop.fs.PathFilter.class)
  */
 public class HoodieROTablePathFilter implements PathFilter, Serializable {
 
-    public static final Log LOG = LogFactory.getLog(HoodieROTablePathFilter.class);
+  public static final Log LOG = LogFactory.getLog(HoodieROTablePathFilter.class);
 
-    /**
-     * Its quite common, to have all files from a given partition path be passed into accept(),
-     * cache the check for hoodie metadata for known partition paths and the latest versions of files
-     */
-    private HashMap<String, HashSet<Path>> hoodiePathCache;
+  /**
+   * Its quite common, to have all files from a given partition path be passed into accept(), cache
+   * the check for hoodie metadata for known partition paths and the latest versions of files
+   */
+  private HashMap<String, HashSet<Path>> hoodiePathCache;
 
-    /**
-     * Paths that are known to be non-hoodie datasets.
-     */
-    private HashSet<String> nonHoodiePathCache;
+  /**
+   * Paths that are known to be non-hoodie datasets.
+   */
+  private HashSet<String> nonHoodiePathCache;
 
 
-    public HoodieROTablePathFilter() {
-        hoodiePathCache = new HashMap<>();
-        nonHoodiePathCache = new HashSet<>();
+  public HoodieROTablePathFilter() {
+    hoodiePathCache = new HashMap<>();
+    nonHoodiePathCache = new HashSet<>();
+  }
+
+  /**
+   * Obtain the path, two levels from provided path
+   *
+   * @return said path if available, null otherwise
+   */
+  private Path safeGetParentsParent(Path path) {
+    if (path.getParent() != null && path.getParent().getParent() != null
+        && path.getParent().getParent().getParent() != null) {
+      return path.getParent().getParent().getParent();
     }
+    return null;
+  }
 
-    /**
-     * Obtain the path, two levels from provided path
-     *
-     * @return said path if available, null otherwise
-     */
-    private Path safeGetParentsParent(Path path) {
-        if (path.getParent() != null && path.getParent().getParent() != null && path.getParent().getParent().getParent() != null) {
-            return path.getParent().getParent().getParent();
-        }
-        return null;
+
+  @Override
+  public boolean accept(Path path) {
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Checking acceptance for path " + path);
     }
+    Path folder = null;
+    try {
+      FileSystem fs = path.getFileSystem(new Configuration());
+      if (fs.isDirectory(path)) {
+        return true;
+      }
 
-
-    @Override
-    public boolean accept(Path path) {
-
+      // Assumes path is a file
+      folder = path.getParent(); // get the immediate parent.
+      // Try to use the caches.
+      if (nonHoodiePathCache.contains(folder.toString())) {
         if (LOG.isDebugEnabled()) {
-            LOG.debug("Checking acceptance for path " + path);
+          LOG.debug("Accepting non-hoodie path from cache: " + path);
         }
-        Path folder = null;
+        return true;
+      }
+
+      if (hoodiePathCache.containsKey(folder.toString())) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(String.format("%s Hoodie path checked against cache, accept => %s \n",
+              path,
+              hoodiePathCache.get(folder.toString()).contains(path)));
+        }
+        return hoodiePathCache.get(folder.toString()).contains(path);
+      }
+
+      // Perform actual checking.
+      Path baseDir;
+      if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) {
+        HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder);
+        metadata.readFromFS();
+        baseDir = HoodieHiveUtil.getNthParent(folder, metadata.getPartitionDepth());
+      } else {
+        baseDir = safeGetParentsParent(folder);
+      }
+
+      if (baseDir != null) {
         try {
-            FileSystem fs = path.getFileSystem(new Configuration());
-            if (fs.isDirectory(path)) {
-                return true;
-            }
+          HoodieTableMetaClient metaClient =
+              new HoodieTableMetaClient(fs, baseDir.toString());
+          HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
+              metaClient.getActiveTimeline().getCommitTimeline()
+                  .filterCompletedInstants(),
+              fs.listStatus(folder));
+          List<HoodieDataFile> latestFiles = fsView
+              .getLatestDataFiles()
+              .collect(Collectors.toList());
+          // populate the cache
+          if (!hoodiePathCache.containsKey(folder.toString())) {
+            hoodiePathCache.put(folder.toString(), new HashSet<>());
+          }
+          LOG.info("Based on hoodie metadata from base path: " + baseDir.toString() +
+              ", caching " + latestFiles.size() + " files under " + folder);
+          for (HoodieDataFile lfile : latestFiles) {
+            hoodiePathCache.get(folder.toString()).add(new Path(lfile.getPath()));
+          }
 
-            // Assumes path is a file
-            folder = path.getParent(); // get the immediate parent.
-            // Try to use the caches.
-            if (nonHoodiePathCache.contains(folder.toString())) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("Accepting non-hoodie path from cache: " + path);
-                }
-                return true;
-            }
-
-            if (hoodiePathCache.containsKey(folder.toString())) {
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug(String.format("%s Hoodie path checked against cache, accept => %s \n",
-                            path,
-                            hoodiePathCache.get(folder.toString()).contains(path)));
-                }
-                return hoodiePathCache.get(folder.toString()).contains(path);
-            }
-
-            // Perform actual checking.
-            Path baseDir;
-            if (HoodiePartitionMetadata.hasPartitionMetadata(fs, folder)) {
-                HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, folder);
-                metadata.readFromFS();
-                baseDir = HoodieHiveUtil.getNthParent(folder, metadata.getPartitionDepth());
-            } else {
-                baseDir = safeGetParentsParent(folder);
-            }
-
-            if (baseDir != null) {
-                try {
-                    HoodieTableMetaClient metaClient =
-                        new HoodieTableMetaClient(fs, baseDir.toString());
-                    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
-                        metaClient.getActiveTimeline().getCommitTimeline()
-                            .filterCompletedInstants(),
-                            fs.listStatus(folder));
-                    List<HoodieDataFile> latestFiles = fsView
-                            .getLatestDataFiles()
-                            .collect(Collectors.toList());
-                    // populate the cache
-                    if (!hoodiePathCache.containsKey(folder.toString())) {
-                        hoodiePathCache.put(folder.toString(), new HashSet<>());
-                    }
-                    LOG.info("Based on hoodie metadata from base path: " + baseDir.toString() +
-                            ", caching " + latestFiles.size() + " files under "+ folder);
-                    for (HoodieDataFile lfile: latestFiles) {
-                        hoodiePathCache.get(folder.toString()).add(new Path(lfile.getPath()));
-                    }
-
-                    // accept the path, if its among the latest files.
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug(String.format("%s checked after cache population, accept => %s \n",
-                                path,
-                                hoodiePathCache.get(folder.toString()).contains(path)));
-                    }
-                    return hoodiePathCache.get(folder.toString()).contains(path);
-                } catch (DatasetNotFoundException e) {
-                   // Non-hoodie path, accept it.
-                    if (LOG.isDebugEnabled()) {
-                        LOG.debug(String.format("(1) Caching non-hoodie path under %s \n",
-                                folder.toString()));
-                    }
-                    nonHoodiePathCache.add(folder.toString());
-                    return true;
-                }
-            } else {
-                // files is at < 3 level depth in FS tree, can't be hoodie dataset
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug(String.format("(2) Caching non-hoodie path under %s \n", folder.toString()));
-                }
-                nonHoodiePathCache.add(folder.toString());
-                return true;
-            }
-        } catch (Exception e) {
-            String msg = "Error checking path :" + path +", under folder: "+ folder;
-            LOG.error(msg, e);
-            throw new HoodieException(msg, e);
+          // accept the path, if its among the latest files.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(String.format("%s checked after cache population, accept => %s \n",
+                path,
+                hoodiePathCache.get(folder.toString()).contains(path)));
+          }
+          return hoodiePathCache.get(folder.toString()).contains(path);
+        } catch (DatasetNotFoundException e) {
+          // Non-hoodie path, accept it.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(String.format("(1) Caching non-hoodie path under %s \n",
+                folder.toString()));
+          }
+          nonHoodiePathCache.add(folder.toString());
+          return true;
         }
+      } else {
+        // files is at < 3 level depth in FS tree, can't be hoodie dataset
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(String.format("(2) Caching non-hoodie path under %s \n", folder.toString()));
+        }
+        nonHoodiePathCache.add(folder.toString());
+        return true;
+      }
+    } catch (Exception e) {
+      String msg = "Error checking path :" + path + ", under folder: " + folder;
+      LOG.error(msg, e);
+      throw new HoodieException(msg, e);
     }
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/UseFileSplitsFromInputFormat.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/UseFileSplitsFromInputFormat.java
index f69e93cff..01059d67b 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/UseFileSplitsFromInputFormat.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/UseFileSplitsFromInputFormat.java
@@ -24,13 +24,13 @@ import java.lang.annotation.RetentionPolicy;
 import java.lang.annotation.Target;
 
 /**
- * When annotated on a InputFormat, informs the query engines,
- * that they should use the FileSplits provided by the input format
- * to execute the queries
+ * When annotated on a InputFormat, informs the query engines, that they should use the FileSplits
+ * provided by the input format to execute the queries
  */
 @Inherited
 @Documented
 @Target(ElementType.TYPE)
 @Retention(RetentionPolicy.RUNTIME)
 public @interface UseFileSplitsFromInputFormat {
+
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieParquetSerde.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieParquetSerde.java
index 701ab90a9..53c7c8cf7 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieParquetSerde.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieParquetSerde.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
  */
 public class HoodieParquetSerde extends ParquetHiveSerDe {
 
-    public HoodieParquetSerde() {
-        super();
-    }
+  public HoodieParquetSerde() {
+    super();
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeFileSplit.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeFileSplit.java
index 0ce79fc60..5ba7545b7 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeFileSplit.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeFileSplit.java
@@ -18,81 +18,83 @@
 
 package com.uber.hoodie.hadoop.realtime;
 
-import org.apache.hadoop.mapred.FileSplit;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import org.apache.hadoop.mapred.FileSplit;
 
 /**
  * Filesplit that wraps the base split and a list of log files to merge deltas from.
  */
 public class HoodieRealtimeFileSplit extends FileSplit {
 
-    private List<String> deltaFilePaths;
+  private List<String> deltaFilePaths;
 
-    private String maxCommitTime;
+  private String maxCommitTime;
 
-    private String basePath;
+  private String basePath;
 
-    public HoodieRealtimeFileSplit() {
-        super();
+  public HoodieRealtimeFileSplit() {
+    super();
+  }
+
+  public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogFiles,
+      String maxCommitTime) throws IOException {
+    super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(),
+        baseSplit.getLocations());
+    this.deltaFilePaths = deltaLogFiles;
+    this.maxCommitTime = maxCommitTime;
+    this.basePath = basePath;
+  }
+
+  public List<String> getDeltaFilePaths() {
+    return deltaFilePaths;
+  }
+
+  public String getMaxCommitTime() {
+    return maxCommitTime;
+  }
+
+  public String getBasePath() {
+    return basePath;
+  }
+
+  private static void writeString(String str, DataOutput out) throws IOException {
+    byte[] pathBytes = str.getBytes(StandardCharsets.UTF_8);
+    out.writeInt(pathBytes.length);
+    out.write(pathBytes);
+  }
+
+  private static String readString(DataInput in) throws IOException {
+    byte[] pathBytes = new byte[in.readInt()];
+    in.readFully(pathBytes);
+    return new String(pathBytes, StandardCharsets.UTF_8);
+  }
+
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+
+    writeString(maxCommitTime, out);
+    out.writeInt(deltaFilePaths.size());
+    for (String logFilePath : deltaFilePaths) {
+      writeString(logFilePath, out);
     }
+  }
 
-    public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogFiles, String maxCommitTime) throws IOException {
-        super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(), baseSplit.getLocations());
-        this.deltaFilePaths = deltaLogFiles;
-        this.maxCommitTime = maxCommitTime;
-        this.basePath = basePath;
-    }
-
-    public List<String> getDeltaFilePaths() {
-        return deltaFilePaths;
-    }
-
-    public String getMaxCommitTime() {
-        return maxCommitTime;
-    }
-
-    public String getBasePath() {
-        return basePath;
-    }
-
-    private static void writeString(String str, DataOutput out) throws IOException {
-        byte[] pathBytes = str.getBytes(StandardCharsets.UTF_8);
-        out.writeInt(pathBytes.length);
-        out.write(pathBytes);
-    }
-
-    private static String readString(DataInput in) throws IOException {
-        byte[] pathBytes = new byte[in.readInt()];
-        in.readFully(pathBytes);
-        return new String(pathBytes, StandardCharsets.UTF_8);
-    }
-
-
-    @Override
-    public void write(DataOutput out) throws IOException {
-        super.write(out);
-
-        writeString(maxCommitTime, out);
-        out.writeInt(deltaFilePaths.size());
-        for (String logFilePath: deltaFilePaths) {
-            writeString(logFilePath, out);
-        }
-    }
-
-    @Override
-    public void readFields(DataInput in) throws IOException {
-        super.readFields(in);
-
-        maxCommitTime = readString(in);
-        int totalLogFiles = in.readInt();
-        deltaFilePaths = new ArrayList<>(totalLogFiles);
-        for (int i=0; i < totalLogFiles; i++) {
-            deltaFilePaths.add(readString(in));
-        }
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+
+    maxCommitTime = readString(in);
+    int totalLogFiles = in.readInt();
+    deltaFilePaths = new ArrayList<>(totalLogFiles);
+    for (int i = 0; i < totalLogFiles; i++) {
+      deltaFilePaths.add(readString(in));
     }
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java
index f4849109e..c92f0e593 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeInputFormat.java
@@ -19,9 +19,7 @@
 package com.uber.hoodie.hadoop.realtime;
 
 import com.google.common.base.Preconditions;
-
 import com.google.common.collect.Sets;
-
 import com.uber.hoodie.common.model.FileSlice;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
@@ -32,7 +30,16 @@ import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.hadoop.HoodieInputFormat;
 import com.uber.hoodie.hadoop.UseFileSplitsFromInputFormat;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
@@ -47,168 +54,168 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.function.Function;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
 /**
  * Input Format, that provides a real-time view of data in a Hoodie dataset
  */
 @UseFileSplitsFromInputFormat
 public class HoodieRealtimeInputFormat extends HoodieInputFormat implements Configurable {
 
-    public static final Log LOG = LogFactory.getLog(HoodieRealtimeInputFormat.class);
+  public static final Log LOG = LogFactory.getLog(HoodieRealtimeInputFormat.class);
 
-    // These positions have to be deterministic across all tables
-    public static final int HOODIE_COMMIT_TIME_COL_POS = 0;
-    public static final int HOODIE_RECORD_KEY_COL_POS = 2;
-    public static final int HOODIE_PARTITION_PATH_COL_POS = 3;
+  // These positions have to be deterministic across all tables
+  public static final int HOODIE_COMMIT_TIME_COL_POS = 0;
+  public static final int HOODIE_RECORD_KEY_COL_POS = 2;
+  public static final int HOODIE_PARTITION_PATH_COL_POS = 3;
 
-    @Override
-    public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 
-        Stream<FileSplit> fileSplits = Arrays.stream(super.getSplits(job, numSplits)).map(is -> (FileSplit) is);
+    Stream<FileSplit> fileSplits = Arrays.stream(super.getSplits(job, numSplits))
+        .map(is -> (FileSplit) is);
 
-        // obtain all unique parent folders for splits
-        Map<Path, List<FileSplit>> partitionsToParquetSplits = fileSplits.collect(Collectors.groupingBy(split -> split.getPath().getParent()));
-        // TODO(vc): Should we handle also non-hoodie splits here?
-        Map<String, HoodieTableMetaClient> metaClientMap = new HashMap<>();
-        Map<Path, HoodieTableMetaClient> partitionsToMetaClient = partitionsToParquetSplits.keySet().stream()
-                .collect(Collectors.toMap(Function.identity(), p -> {
-                    // find if we have a metaclient already for this partition.
-                    Optional<String> matchingBasePath =  metaClientMap.keySet().stream()
-                            .filter(basePath -> p.toString().startsWith(basePath)).findFirst();
-                    if (matchingBasePath.isPresent()) {
-                        return metaClientMap.get(matchingBasePath.get());
-                    }
+    // obtain all unique parent folders for splits
+    Map<Path, List<FileSplit>> partitionsToParquetSplits = fileSplits
+        .collect(Collectors.groupingBy(split -> split.getPath().getParent()));
+    // TODO(vc): Should we handle also non-hoodie splits here?
+    Map<String, HoodieTableMetaClient> metaClientMap = new HashMap<>();
+    Map<Path, HoodieTableMetaClient> partitionsToMetaClient = partitionsToParquetSplits.keySet()
+        .stream()
+        .collect(Collectors.toMap(Function.identity(), p -> {
+          // find if we have a metaclient already for this partition.
+          Optional<String> matchingBasePath = metaClientMap.keySet().stream()
+              .filter(basePath -> p.toString().startsWith(basePath)).findFirst();
+          if (matchingBasePath.isPresent()) {
+            return metaClientMap.get(matchingBasePath.get());
+          }
 
-                    try {
-                        HoodieTableMetaClient metaClient = getTableMetaClient(p.getFileSystem(conf), p);
-                        metaClientMap.put(metaClient.getBasePath(), metaClient);
-                        return metaClient;
-                    } catch (IOException e) {
-                        throw new HoodieIOException("Error creating hoodie meta client against : " + p, e);
-                    }
-                }));
+          try {
+            HoodieTableMetaClient metaClient = getTableMetaClient(p.getFileSystem(conf), p);
+            metaClientMap.put(metaClient.getBasePath(), metaClient);
+            return metaClient;
+          } catch (IOException e) {
+            throw new HoodieIOException("Error creating hoodie meta client against : " + p, e);
+          }
+        }));
 
-        // for all unique split parents, obtain all delta files based on delta commit timeline, grouped on file id
-        List<HoodieRealtimeFileSplit> rtSplits = new ArrayList<>();
-        partitionsToParquetSplits.keySet().stream().forEach(partitionPath -> {
-            // for each partition path obtain the data & log file groupings, then map back to inputsplits
-            HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
-            HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline());
-            String relPartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), partitionPath);
+    // for all unique split parents, obtain all delta files based on delta commit timeline, grouped on file id
+    List<HoodieRealtimeFileSplit> rtSplits = new ArrayList<>();
+    partitionsToParquetSplits.keySet().stream().forEach(partitionPath -> {
+      // for each partition path obtain the data & log file groupings, then map back to inputsplits
+      HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
+      HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient,
+          metaClient.getActiveTimeline());
+      String relPartitionPath = FSUtils
+          .getRelativePartitionPath(new Path(metaClient.getBasePath()), partitionPath);
 
+      try {
+        Stream<FileSlice> latestFileSlices = fsView.getLatestFileSlices(relPartitionPath);
+
+        // subgroup splits again by file id & match with log files.
+        Map<String, List<FileSplit>> groupedInputSplits = partitionsToParquetSplits
+            .get(partitionPath).stream()
+            .collect(Collectors.groupingBy(split -> FSUtils.getFileId(split.getPath().getName())));
+        latestFileSlices.forEach(fileSlice -> {
+          List<FileSplit> dataFileSplits = groupedInputSplits.get(fileSlice.getFileId());
+          dataFileSplits.forEach(split -> {
             try {
-                Stream<FileSlice> latestFileSlices = fsView.getLatestFileSlices(relPartitionPath);
-
-                // subgroup splits again by file id & match with log files.
-                Map<String, List<FileSplit>> groupedInputSplits = partitionsToParquetSplits.get(partitionPath).stream()
-                        .collect(Collectors.groupingBy(split -> FSUtils.getFileId(split.getPath().getName())));
-                latestFileSlices.forEach(fileSlice -> {
-                    List<FileSplit> dataFileSplits = groupedInputSplits.get(fileSlice.getFileId());
-                    dataFileSplits.forEach(split -> {
-                        try {
-                            List<String> logFilePaths = fileSlice.getLogFiles()
-                                .map(logFile -> logFile.getPath().toString())
-                                .collect(Collectors.toList());
-                            // Get the maxCommit from the last delta or compaction or commit - when bootstrapped from COW table
-                            String maxCommitTime = metaClient.getActiveTimeline()
-                                .getTimelineOfActions(
-                                    Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
-                                        HoodieTimeline.COMPACTION_ACTION,
-                                        HoodieTimeline.DELTA_COMMIT_ACTION))
-                                .filterCompletedInstants().lastInstant().get().getTimestamp();
-                            rtSplits.add(
-                                new HoodieRealtimeFileSplit(split, metaClient.getBasePath(), logFilePaths, maxCommitTime));
-                        } catch (IOException e) {
-                            throw new HoodieIOException("Error creating hoodie real time split ", e);
-                        }
-                    });
-                });
-            } catch (Exception e) {
-                throw new HoodieException("Error obtaining data file/log file grouping: " + partitionPath, e);
+              List<String> logFilePaths = fileSlice.getLogFiles()
+                  .map(logFile -> logFile.getPath().toString())
+                  .collect(Collectors.toList());
+              // Get the maxCommit from the last delta or compaction or commit - when bootstrapped from COW table
+              String maxCommitTime = metaClient.getActiveTimeline()
+                  .getTimelineOfActions(
+                      Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
+                          HoodieTimeline.COMPACTION_ACTION,
+                          HoodieTimeline.DELTA_COMMIT_ACTION))
+                  .filterCompletedInstants().lastInstant().get().getTimestamp();
+              rtSplits.add(
+                  new HoodieRealtimeFileSplit(split, metaClient.getBasePath(), logFilePaths,
+                      maxCommitTime));
+            } catch (IOException e) {
+              throw new HoodieIOException("Error creating hoodie real time split ", e);
             }
+          });
         });
-        LOG.info("Returning a total splits of " + rtSplits.size());
-        return rtSplits.toArray(new InputSplit[rtSplits.size()]);
+      } catch (Exception e) {
+        throw new HoodieException("Error obtaining data file/log file grouping: " + partitionPath,
+            e);
+      }
+    });
+    LOG.info("Returning a total splits of " + rtSplits.size());
+    return rtSplits.toArray(new InputSplit[rtSplits.size()]);
+  }
+
+
+  @Override
+  public FileStatus[] listStatus(JobConf job) throws IOException {
+    // Call the HoodieInputFormat::listStatus to obtain all latest parquet files, based on commit timeline.
+    return super.listStatus(job);
+  }
+
+  /**
+   * Add a field to the existing fields projected
+   */
+  private static Configuration addProjectionField(Configuration conf, String fieldName,
+      int fieldIndex) {
+    String readColNames = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "");
+    String readColIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "");
+
+    String readColNamesPrefix = readColNames + ",";
+    if (readColNames == null || readColNames.isEmpty()) {
+      readColNamesPrefix = "";
+    }
+    String readColIdsPrefix = readColIds + ",";
+    if (readColIds == null || readColIds.isEmpty()) {
+      readColIdsPrefix = "";
     }
 
-
-    @Override
-    public FileStatus[] listStatus(JobConf job) throws IOException {
-        // Call the HoodieInputFormat::listStatus to obtain all latest parquet files, based on commit timeline.
-        return super.listStatus(job);
+    if (!readColNames.contains(fieldName)) {
+      // If not already in the list - then add it
+      conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
+          readColNamesPrefix + fieldName);
+      conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIdsPrefix + fieldIndex);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("Adding extra column " + fieldName
+                + ", to enable log merging cols (%s) ids (%s) ",
+            conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
+            conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)));
+      }
     }
+    return conf;
+  }
 
-    /**
-     * Add a field to the existing fields projected
-     */
-    private static Configuration addProjectionField(Configuration conf, String fieldName,
-        int fieldIndex) {
-        String readColNames = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "");
-        String readColIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "");
+  private static Configuration addRequiredProjectionFields(Configuration configuration) {
+    // Need this to do merge records in HoodieRealtimeRecordReader
+    configuration = addProjectionField(configuration, HoodieRecord.RECORD_KEY_METADATA_FIELD,
+        HOODIE_RECORD_KEY_COL_POS);
+    configuration = addProjectionField(configuration, HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+        HOODIE_COMMIT_TIME_COL_POS);
+    configuration = addProjectionField(configuration,
+        HoodieRecord.PARTITION_PATH_METADATA_FIELD, HOODIE_PARTITION_PATH_COL_POS);
+    return configuration;
+  }
 
-        String readColNamesPrefix = readColNames + ",";
-        if (readColNames == null || readColNames.isEmpty()) {
-            readColNamesPrefix = "";
-        }
-        String readColIdsPrefix = readColIds + ",";
-        if (readColIds == null || readColIds.isEmpty()) {
-            readColIdsPrefix = "";
-        }
+  @Override
+  public RecordReader<Void, ArrayWritable> getRecordReader(final InputSplit split,
+      final JobConf job,
+      final Reporter reporter) throws IOException {
+    LOG.info("Creating record reader with readCols :" + job
+        .get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
+    // sanity check
+    Preconditions.checkArgument(split instanceof HoodieRealtimeFileSplit,
+        "HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit and not with "
+            + split);
+    return new HoodieRealtimeRecordReader((HoodieRealtimeFileSplit) split, job,
+        super.getRecordReader(split, job, reporter));
+  }
 
-        if (!readColNames.contains(fieldName)) {
-            // If not already in the list - then add it
-            conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
-                readColNamesPrefix + fieldName);
-            conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, readColIdsPrefix + fieldIndex);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug(String.format("Adding extra column " + fieldName
-                        + ", to enable log merging cols (%s) ids (%s) ",
-                    conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
-                    conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)));
-            }
-        }
-        return conf;
-    }
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = addRequiredProjectionFields(conf);
+  }
 
-    private static Configuration addRequiredProjectionFields(Configuration configuration) {
-        // Need this to do merge records in HoodieRealtimeRecordReader
-        configuration = addProjectionField(configuration, HoodieRecord.RECORD_KEY_METADATA_FIELD,
-            HOODIE_RECORD_KEY_COL_POS);
-        configuration = addProjectionField(configuration, HoodieRecord.COMMIT_TIME_METADATA_FIELD,
-            HOODIE_COMMIT_TIME_COL_POS);
-        configuration = addProjectionField(configuration,
-            HoodieRecord.PARTITION_PATH_METADATA_FIELD, HOODIE_PARTITION_PATH_COL_POS);
-        return configuration;
-    }
-
-    @Override
-    public RecordReader<Void, ArrayWritable> getRecordReader(final InputSplit split,
-                                                             final JobConf job,
-                                                             final Reporter reporter) throws IOException {
-        LOG.info("Creating record reader with readCols :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
-        // sanity check
-        Preconditions.checkArgument(split instanceof HoodieRealtimeFileSplit,
-                "HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit and not with " + split );
-        return new HoodieRealtimeRecordReader((HoodieRealtimeFileSplit) split, job, super.getRecordReader(split, job, reporter));
-    }
-
-    @Override
-    public void setConf(Configuration conf) {
-        this.conf = addRequiredProjectionFields(conf);
-    }
-
-    @Override
-    public Configuration getConf() {
-        return conf;
-    }
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
 }
diff --git a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java
index 08a23d7ba..00ef57e42 100644
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java
@@ -18,14 +18,21 @@
 
 package com.uber.hoodie.hadoop.realtime;
 
-import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
 import com.uber.hoodie.common.util.FSUtils;
-import com.uber.hoodie.common.util.ReflectionUtils;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericArray;
 import org.apache.avro.generic.GenericFixed;
@@ -51,291 +58,274 @@ import parquet.avro.AvroSchemaConverter;
 import parquet.hadoop.ParquetFileReader;
 import parquet.schema.MessageType;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.stream.Collectors;
-
 /**
- * Record Reader implementation to merge fresh avro data with base parquet data, to support real time
- * queries.
+ * Record Reader implementation to merge fresh avro data with base parquet data, to support real
+ * time queries.
  */
 public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWritable> {
 
-    private final RecordReader<Void, ArrayWritable> parquetReader;
-    private final HoodieRealtimeFileSplit split;
-    private final JobConf jobConf;
+  private final RecordReader<Void, ArrayWritable> parquetReader;
+  private final HoodieRealtimeFileSplit split;
+  private final JobConf jobConf;
 
-    public static final Log LOG = LogFactory.getLog(HoodieRealtimeRecordReader.class);
+  public static final Log LOG = LogFactory.getLog(HoodieRealtimeRecordReader.class);
 
-    private final HashMap<String, ArrayWritable> deltaRecordMap;
-    private final MessageType baseFileSchema;
+  private final HashMap<String, ArrayWritable> deltaRecordMap;
+  private final MessageType baseFileSchema;
 
-    public HoodieRealtimeRecordReader(HoodieRealtimeFileSplit split,
-                                      JobConf job,
-                                      RecordReader<Void, ArrayWritable> realReader) {
-        this.split = split;
-        this.jobConf = job;
-        this.parquetReader = realReader;
-        this.deltaRecordMap = new HashMap<>();
+  public HoodieRealtimeRecordReader(HoodieRealtimeFileSplit split,
+      JobConf job,
+      RecordReader<Void, ArrayWritable> realReader) {
+    this.split = split;
+    this.jobConf = job;
+    this.parquetReader = realReader;
+    this.deltaRecordMap = new HashMap<>();
 
-        LOG.info("cfg ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
-        try {
-            baseFileSchema = readSchema(jobConf, split.getPath());
-            readAndCompactLog();
-        } catch (IOException e) {
-            throw new HoodieIOException(
-                "Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
-        }
+    LOG.info("cfg ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
+    try {
+      baseFileSchema = readSchema(jobConf, split.getPath());
+      readAndCompactLog();
+    } catch (IOException e) {
+      throw new HoodieIOException(
+          "Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
+    }
+  }
+
+  /**
+   * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
+   * twitter parquet to support hive 1.1.0
+   */
+  private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
+    try {
+      return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData()
+          .getSchema();
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
+          e);
+    }
+  }
+
+
+  /**
+   * Goes through the log files and populates a map with latest version of each key logged, since
+   * the base split was written.
+   */
+  private void readAndCompactLog() throws IOException {
+    Schema writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
+    List<String> projectionFields = orderFields(
+        jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
+        jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
+        jobConf.get("partition_columns", ""));
+    // TODO(vc): In the future, the reader schema should be updated based on log files & be able to null out fields not present before
+    Schema readerSchema = generateProjectionSchema(writerSchema, projectionFields);
+
+    LOG.info(
+        String.format("About to read compacted logs %s for base split %s, projecting cols %s",
+            split.getDeltaFilePaths(), split.getPath(), projectionFields));
+    HoodieCompactedLogRecordScanner compactedLogRecordScanner =
+        new HoodieCompactedLogRecordScanner(FSUtils.getFs(), split.getBasePath(),
+            split.getDeltaFilePaths(),
+            readerSchema, split.getMaxCommitTime());
+    // NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
+    // but can return records for completed commits > the commit we are trying to read (if using readCommit() API)
+    for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : compactedLogRecordScanner) {
+      GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(readerSchema)
+          .get();
+      String key = hoodieRecord.getRecordKey();
+      // we assume, a later safe record in the log, is newer than what we have in the map & replace it.
+      ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, writerSchema);
+      deltaRecordMap.put(key, aWritable);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Log record : " + arrayWritableToString(aWritable));
+      }
+    }
+  }
+
+  private static String arrayWritableToString(ArrayWritable writable) {
+    if (writable == null) {
+      return "null";
     }
 
-    /**
-     * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
-     * twitter parquet to support hive 1.1.0
-     */
-    private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
-        try {
-            return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData()
-                .getSchema();
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
-                e);
-        }
+    StringBuilder builder = new StringBuilder();
+    Writable[] values = writable.get();
+    builder.append(String.format("Size: %s,", values.length));
+    for (Writable w : values) {
+      builder.append(w + " ");
+    }
+    return builder.toString();
+  }
+
+  /**
+   * Given a comma separated list of field names and positions at which they appear on Hive, return
+   * a ordered list of field names, that can be passed onto storage.
+   */
+  public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv,
+      String partitioningFieldsCsv) {
+
+    String[] fieldOrders = fieldOrderCsv.split(",");
+    Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(","))
+        .collect(Collectors.toSet());
+    List<String> fieldNames = Arrays.stream(fieldNameCsv.split(","))
+        .filter(fn -> !partitioningFields.contains(fn)).collect(
+            Collectors.toList());
+
+    // Hive does not provide ids for partitioning fields, so check for lengths excluding that.
+    if (fieldNames.size() != fieldOrders.length) {
+      throw new HoodieException(String.format(
+          "Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
+          fieldNames.size(), fieldOrders.length));
+    }
+    TreeMap<Integer, String> orderedFieldMap = new TreeMap<>();
+    for (int ox = 0; ox < fieldOrders.length; ox++) {
+      orderedFieldMap.put(Integer.parseInt(fieldOrders[ox]), fieldNames.get(ox));
+    }
+    return new ArrayList<>(orderedFieldMap.values());
+  }
+
+  /**
+   * Generate a reader schema off the provided writeSchema, to just project out the provided
+   * columns
+   */
+  public static Schema generateProjectionSchema(Schema writeSchema, List<String> fieldNames) {
+    List<Schema.Field> projectedFields = new ArrayList<>();
+    for (String fn : fieldNames) {
+      Schema.Field field = writeSchema.getField(fn);
+      if (field == null) {
+        throw new HoodieException("Field " + fn + " not found log schema. Query cannot proceed!");
+      }
+      projectedFields
+          .add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
     }
 
+    return Schema.createRecord(projectedFields);
+  }
 
-    /**
-     * Goes through the log files and populates a map with latest version of each key logged, since the base split was written.
-     */
-    private void readAndCompactLog() throws IOException {
-        Schema writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
-        List<String> projectionFields = orderFields(
-            jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
-            jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
-            jobConf.get("partition_columns", ""));
-        // TODO(vc): In the future, the reader schema should be updated based on log files & be able to null out fields not present before
-        Schema readerSchema = generateProjectionSchema(writerSchema, projectionFields);
+  /**
+   * Convert the projected read from delta record into an array writable
+   */
+  public static Writable avroToArrayWritable(Object value, Schema schema) {
 
-        LOG.info(
-            String.format("About to read compacted logs %s for base split %s, projecting cols %s",
-                split.getDeltaFilePaths(), split.getPath(), projectionFields));
-        HoodieCompactedLogRecordScanner compactedLogRecordScanner =
-            new HoodieCompactedLogRecordScanner(FSUtils.getFs(), split.getBasePath(), split.getDeltaFilePaths(),
-                readerSchema, split.getMaxCommitTime());
-        // NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
-        // but can return records for completed commits > the commit we are trying to read (if using readCommit() API)
-        for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : compactedLogRecordScanner) {
-            GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(readerSchema)
-                .get();
-            String key = hoodieRecord.getRecordKey();
-            // we assume, a later safe record in the log, is newer than what we have in the map & replace it.
-            ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, writerSchema);
-            deltaRecordMap.put(key, aWritable);
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Log record : " + arrayWritableToString(aWritable));
-            }
-        }
+    // if value is null, make a NullWritable
+    if (value == null) {
+      return NullWritable.get();
     }
 
-    private static String arrayWritableToString(ArrayWritable writable) {
-        if (writable == null) {
-            return "null";
+    switch (schema.getType()) {
+      case STRING:
+        return new Text(value.toString());
+      case BYTES:
+        return new BytesWritable((byte[]) value);
+      case INT:
+        return new IntWritable((Integer) value);
+      case LONG:
+        return new LongWritable((Long) value);
+      case FLOAT:
+        return new FloatWritable((Float) value);
+      case DOUBLE:
+        return new DoubleWritable((Double) value);
+      case BOOLEAN:
+        return new BooleanWritable((Boolean) value);
+      case NULL:
+        return NullWritable.get();
+      case RECORD:
+        GenericRecord record = (GenericRecord) value;
+        Writable[] values1 = new Writable[schema.getFields().size()];
+        int index1 = 0;
+        for (Schema.Field field : schema.getFields()) {
+          values1[index1++] = avroToArrayWritable(record.get(field.name()), field.schema());
         }
-
-        StringBuilder builder = new StringBuilder();
-        Writable[] values = writable.get();
-        builder.append(String.format("Size: %s,", values.length));
-        for (Writable w: values) {
-            builder.append(w + " ");
+        return new ArrayWritable(Writable.class, values1);
+      case ENUM:
+        return new Text(value.toString());
+      case ARRAY:
+        GenericArray arrayValue = (GenericArray) value;
+        Writable[] values2 = new Writable[arrayValue.size()];
+        int index2 = 0;
+        for (Object obj : arrayValue) {
+          values2[index2++] = avroToArrayWritable(obj, schema.getElementType());
         }
-        return builder.toString();
-    }
-
-    /**
-     * Given a comma separated list of field names and positions at which they appear on Hive,
-     * return a ordered list of field names, that can be passed onto storage.
-     *
-     * @param fieldNameCsv
-     * @param fieldOrderCsv
-     * @return
-     */
-    public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv,
-        String partitioningFieldsCsv) {
-
-        String[] fieldOrders = fieldOrderCsv.split(",");
-        Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(","))
-            .collect(Collectors.toSet());
-        List<String> fieldNames = Arrays.stream(fieldNameCsv.split(","))
-            .filter(fn -> !partitioningFields.contains(fn)).collect(
-                Collectors.toList());
-
-        // Hive does not provide ids for partitioning fields, so check for lengths excluding that.
-        if (fieldNames.size() != fieldOrders.length) {
-            throw new HoodieException(String.format(
-                "Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
-                fieldNames.size(), fieldOrders.length));
+        return new ArrayWritable(Writable.class, values2);
+      case MAP:
+        Map mapValue = (Map) value;
+        Writable[] values3 = new Writable[mapValue.size()];
+        int index3 = 0;
+        for (Object entry : mapValue.entrySet()) {
+          Map.Entry mapEntry = (Map.Entry) entry;
+          Writable[] mapValues = new Writable[2];
+          mapValues[0] = new Text(mapEntry.getKey().toString());
+          mapValues[1] = avroToArrayWritable(mapEntry.getValue(), schema.getValueType());
+          values3[index3++] = new ArrayWritable(Writable.class, mapValues);
         }
-        TreeMap<Integer, String> orderedFieldMap = new TreeMap<>();
-        for (int ox = 0; ox < fieldOrders.length; ox++) {
-            orderedFieldMap.put(Integer.parseInt(fieldOrders[ox]), fieldNames.get(ox));
+        return new ArrayWritable(Writable.class, values3);
+      case UNION:
+        List<Schema> types = schema.getTypes();
+        if (types.size() != 2) {
+          throw new IllegalArgumentException("Only support union with 2 fields");
         }
-        return new ArrayList<>(orderedFieldMap.values());
-    }
-
-    /**
-     * Generate a reader schema off the provided writeSchema, to just project out
-     * the provided columns
-     *
-     * @param writeSchema
-     * @param fieldNames
-     * @return
-     */
-    public static Schema generateProjectionSchema(Schema writeSchema, List<String> fieldNames) {
-        List<Schema.Field> projectedFields = new ArrayList<>();
-        for (String fn: fieldNames) {
-            Schema.Field field = writeSchema.getField(fn);
-            if (field == null) {
-                throw new HoodieException("Field "+ fn + " not found log schema. Query cannot proceed!");
-            }
-            projectedFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
-        }
-
-        return Schema.createRecord(projectedFields);
-    }
-
-    /**
-     * Convert the projected read from delta record into an array writable
-     *
-     * @param value
-     * @param schema
-     * @return
-     */
-    public static Writable avroToArrayWritable(Object value, Schema schema) {
-
-        // if value is null, make a NullWritable
-        if (value == null) {
-            return NullWritable.get();
-        }
-
-        switch (schema.getType()) {
-            case STRING:
-                return new Text(value.toString());
-            case BYTES:
-                return new BytesWritable((byte[]) value);
-            case INT:
-                return new IntWritable((Integer) value);
-            case LONG:
-                return new LongWritable((Long) value);
-            case FLOAT:
-                return new FloatWritable((Float) value);
-            case DOUBLE:
-                return new DoubleWritable((Double) value);
-            case BOOLEAN:
-                return new BooleanWritable((Boolean) value);
-            case NULL:
-                return NullWritable.get();
-            case RECORD:
-                GenericRecord record = (GenericRecord) value;
-                Writable[] values1 = new Writable[schema.getFields().size()];
-                int index1 = 0;
-                for (Schema.Field field : schema.getFields()) {
-                    values1[index1++] = avroToArrayWritable(record.get(field.name()), field.schema());
-                }
-                return new ArrayWritable(Writable.class, values1);
-            case ENUM:
-                return new Text(value.toString());
-            case ARRAY:
-                GenericArray arrayValue = (GenericArray) value;
-                Writable[] values2 = new Writable[arrayValue.size()];
-                int index2 = 0;
-                for (Object obj : arrayValue) {
-                    values2[index2++] = avroToArrayWritable(obj, schema.getElementType());
-                }
-                return new ArrayWritable(Writable.class, values2);
-            case MAP:
-                Map mapValue = (Map) value;
-                Writable[] values3 = new Writable[mapValue.size()];
-                int index3 = 0;
-                for (Object entry : mapValue.entrySet()) {
-                    Map.Entry mapEntry = (Map.Entry) entry;
-                    Writable[] mapValues = new Writable[2];
-                    mapValues[0] = new Text(mapEntry.getKey().toString());
-                    mapValues[1] = avroToArrayWritable(mapEntry.getValue(), schema.getValueType());
-                    values3[index3++] = new ArrayWritable(Writable.class, mapValues);
-                }
-                return new ArrayWritable(Writable.class, values3);
-            case UNION:
-                List<Schema> types = schema.getTypes();
-                if (types.size() != 2) {
-                    throw new IllegalArgumentException("Only support union with 2 fields");
-                }
-                Schema s1 = types.get(0);
-                Schema s2 = types.get(1);
-                if (s1.getType() == Schema.Type.NULL) {
-                    return avroToArrayWritable(value, s2);
-                } else if (s2.getType() == Schema.Type.NULL) {
-                    return avroToArrayWritable(value, s1);
-                } else {
-                    throw new IllegalArgumentException("Only support union with null");
-                }
-            case FIXED:
-                return new BytesWritable(((GenericFixed) value).bytes());
-        }
-        return null;
-    }
-
-    @Override
-    public boolean next(Void aVoid, ArrayWritable arrayWritable) throws IOException {
-        // Call the underlying parquetReader.next - which may replace the passed in ArrayWritable with a new block of values
-        boolean result = this.parquetReader.next(aVoid, arrayWritable);
-        if(!result) {
-            // if the result is false, then there are no more records
-            return false;
+        Schema s1 = types.get(0);
+        Schema s2 = types.get(1);
+        if (s1.getType() == Schema.Type.NULL) {
+          return avroToArrayWritable(value, s2);
+        } else if (s2.getType() == Schema.Type.NULL) {
+          return avroToArrayWritable(value, s1);
         } else {
-            // TODO(VC): Right now, we assume all records in log, have a matching base record. (which would be true until we have a way to index logs too)
-            // return from delta records map if we have some match.
-            String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS].toString();
-            if (LOG.isDebugEnabled()) {
-                LOG.debug(String.format("key %s, base values: %s, log values: %s",
-                        key, arrayWritableToString(arrayWritable), arrayWritableToString(deltaRecordMap.get(key))));
-            }
-            if (deltaRecordMap.containsKey(key)) {
-                Writable[] replaceValue = deltaRecordMap.get(key).get();
-                Writable[] originalValue = arrayWritable.get();
-                System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);
-                arrayWritable.set(originalValue);
-            }
-            return true;
+          throw new IllegalArgumentException("Only support union with null");
         }
+      case FIXED:
+        return new BytesWritable(((GenericFixed) value).bytes());
     }
+    return null;
+  }
 
-    @Override
-    public Void createKey() {
-        return parquetReader.createKey();
+  @Override
+  public boolean next(Void aVoid, ArrayWritable arrayWritable) throws IOException {
+    // Call the underlying parquetReader.next - which may replace the passed in ArrayWritable with a new block of values
+    boolean result = this.parquetReader.next(aVoid, arrayWritable);
+    if (!result) {
+      // if the result is false, then there are no more records
+      return false;
+    } else {
+      // TODO(VC): Right now, we assume all records in log, have a matching base record. (which would be true until we have a way to index logs too)
+      // return from delta records map if we have some match.
+      String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS]
+          .toString();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("key %s, base values: %s, log values: %s",
+            key, arrayWritableToString(arrayWritable),
+            arrayWritableToString(deltaRecordMap.get(key))));
+      }
+      if (deltaRecordMap.containsKey(key)) {
+        Writable[] replaceValue = deltaRecordMap.get(key).get();
+        Writable[] originalValue = arrayWritable.get();
+        System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);
+        arrayWritable.set(originalValue);
+      }
+      return true;
     }
+  }
 
-    @Override
-    public ArrayWritable createValue() {
-        return parquetReader.createValue();
-    }
+  @Override
+  public Void createKey() {
+    return parquetReader.createKey();
+  }
 
-    @Override
-    public long getPos() throws IOException {
-        return parquetReader.getPos();
-    }
+  @Override
+  public ArrayWritable createValue() {
+    return parquetReader.createValue();
+  }
 
-    @Override
-    public void close() throws IOException {
-        parquetReader.close();
-    }
+  @Override
+  public long getPos() throws IOException {
+    return parquetReader.getPos();
+  }
 
-    @Override
-    public float getProgress() throws IOException {
-        return parquetReader.getProgress();
-    }
+  @Override
+  public void close() throws IOException {
+    parquetReader.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return parquetReader.getProgress();
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/AnnotationTest.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/AnnotationTest.java
index 9247582e0..ebe3fd805 100644
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/AnnotationTest.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/AnnotationTest.java
@@ -17,22 +17,23 @@
 package com.uber.hoodie.hadoop;
 
 
-import org.junit.Test;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
+
 import java.lang.annotation.Annotation;
+import org.junit.Test;
 
 public class AnnotationTest {
 
-    @Test
-    public void testAnnotation() {
-        assertTrue(HoodieInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class));
-        Annotation[] annotations = HoodieInputFormat.class.getAnnotations();
-        boolean found = false;
-        for (Annotation annotation : annotations) {
-            if ("UseFileSplitsFromInputFormat".equals(annotation.annotationType().getSimpleName())){
-                found = true;
-            }
-        }
-        assertTrue(found);
+  @Test
+  public void testAnnotation() {
+    assertTrue(HoodieInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class));
+    Annotation[] annotations = HoodieInputFormat.class.getAnnotations();
+    boolean found = false;
+    for (Annotation annotation : annotations) {
+      if ("UseFileSplitsFromInputFormat".equals(annotation.annotationType().getSimpleName())) {
+        found = true;
+      }
     }
+    assertTrue(found);
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/HoodieInputFormatTest.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/HoodieInputFormatTest.java
index 997c91f22..5bedb3023 100644
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/HoodieInputFormatTest.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/HoodieInputFormatTest.java
@@ -16,222 +16,235 @@
 
 package com.uber.hoodie.hadoop;
 
+import static org.junit.Assert.assertEquals;
+
 import com.uber.hoodie.common.util.FSUtils;
+import java.io.File;
+import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertEquals;
-
 public class HoodieInputFormatTest {
-    private HoodieInputFormat inputFormat;
-    private JobConf jobConf;
 
-    @Before public void setUp() {
-        inputFormat = new HoodieInputFormat();
-        jobConf = new JobConf();
-        inputFormat.setConf(jobConf);
-    }
+  private HoodieInputFormat inputFormat;
+  private JobConf jobConf;
 
-    @Rule public TemporaryFolder basePath = new TemporaryFolder();
+  @Before
+  public void setUp() {
+    inputFormat = new HoodieInputFormat();
+    jobConf = new JobConf();
+    inputFormat.setConf(jobConf);
+  }
 
-    @Test public void testInputFormatLoad() throws IOException {
-        // initial commit
-        File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
-        InputFormatTestUtil.commit(basePath, "100");
+  @Rule
+  public TemporaryFolder basePath = new TemporaryFolder();
 
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+  @Test
+  public void testInputFormatLoad() throws IOException {
+    // initial commit
+    File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
+    InputFormatTestUtil.commit(basePath, "100");
 
-        InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 10);
-        assertEquals(10, inputSplits.length);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
 
-        FileStatus[] files = inputFormat.listStatus(jobConf);
-        assertEquals(10, files.length);
-    }
+    InputSplit[] inputSplits = inputFormat.getSplits(jobConf, 10);
+    assertEquals(10, inputSplits.length);
 
-    @Test public void testInputFormatUpdates() throws IOException {
-        // initial commit
-        File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
-        InputFormatTestUtil.commit(basePath, "100");
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals(10, files.length);
+  }
 
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+  @Test
+  public void testInputFormatUpdates() throws IOException {
+    // initial commit
+    File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
+    InputFormatTestUtil.commit(basePath, "100");
 
-        FileStatus[] files = inputFormat.listStatus(jobConf);
-        assertEquals(10, files.length);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
 
-        // update files
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", true);
-        // Before the commit
-        files = inputFormat.listStatus(jobConf);
-        assertEquals(10, files.length);
-        ensureFilesInCommit(
-            "Commit 200 has not been committed. We should not see files from this commit", files,
-            "200", 0);
-        InputFormatTestUtil.commit(basePath, "200");
-        files = inputFormat.listStatus(jobConf);
-        assertEquals(10, files.length);
-        ensureFilesInCommit(
-            "5 files have been updated to commit 200. We should see 5 files from commit 200 and 5 files from 100 commit",
-            files, "200", 5);
-        ensureFilesInCommit(
-            "5 files have been updated to commit 200. We should see 5 files from commit 100 and 5 files from 200 commit",
-            files, "100", 5);
-    }
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals(10, files.length);
 
-    @Test public void testIncrementalSimple() throws IOException {
-        // initial commit
-        File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
-        InputFormatTestUtil.commit(basePath, "100");
+    // update files
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", true);
+    // Before the commit
+    files = inputFormat.listStatus(jobConf);
+    assertEquals(10, files.length);
+    ensureFilesInCommit(
+        "Commit 200 has not been committed. We should not see files from this commit", files,
+        "200", 0);
+    InputFormatTestUtil.commit(basePath, "200");
+    files = inputFormat.listStatus(jobConf);
+    assertEquals(10, files.length);
+    ensureFilesInCommit(
+        "5 files have been updated to commit 200. We should see 5 files from commit 200 and 5 files from 100 commit",
+        files, "200", 5);
+    ensureFilesInCommit(
+        "5 files have been updated to commit 200. We should see 5 files from commit 100 and 5 files from 200 commit",
+        files, "100", 5);
+  }
 
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+  @Test
+  public void testIncrementalSimple() throws IOException {
+    // initial commit
+    File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
+    InputFormatTestUtil.commit(basePath, "100");
 
-        InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
 
-        FileStatus[] files = inputFormat.listStatus(jobConf);
-        assertEquals(
-            "We should exclude commit 100 when returning incremental pull with start commit time as 100",
-            0, files.length);
-    }
+    InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
 
-    @Test public void testIncrementalWithMultipleCommits() throws IOException {
-        // initial commit
-        File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
-        InputFormatTestUtil.commit(basePath, "100");
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
-        // update files
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", false);
-        InputFormatTestUtil.commit(basePath, "200");
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals(
+        "We should exclude commit 100 when returning incremental pull with start commit time as 100",
+        0, files.length);
+  }
 
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 4, "300", false);
-        InputFormatTestUtil.commit(basePath, "300");
+  @Test
+  public void testIncrementalWithMultipleCommits() throws IOException {
+    // initial commit
+    File partitionDir = InputFormatTestUtil.prepareDataset(basePath, 10, "100");
+    InputFormatTestUtil.commit(basePath, "100");
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+    // update files
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 5, "200", false);
+    InputFormatTestUtil.commit(basePath, "200");
 
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 3, "400", false);
-        InputFormatTestUtil.commit(basePath, "400");
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 4, "300", false);
+    InputFormatTestUtil.commit(basePath, "300");
 
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 2, "500", false);
-        InputFormatTestUtil.commit(basePath, "500");
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 3, "400", false);
+    InputFormatTestUtil.commit(basePath, "400");
 
-        InputFormatTestUtil.simulateUpdates(partitionDir, "100", 1, "600", false);
-        InputFormatTestUtil.commit(basePath, "600");
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 2, "500", false);
+    InputFormatTestUtil.commit(basePath, "500");
 
-        InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
-        FileStatus[] files = inputFormat.listStatus(jobConf);
-        assertEquals("Pulling 1 commit from 100, should get us the 5 files committed at 200", 5,
-            files.length);
-        ensureFilesInCommit("Pulling 1 commit from 100, should get us the 5 files committed at 200",
-            files, "200", 5);
+    InputFormatTestUtil.simulateUpdates(partitionDir, "100", 1, "600", false);
+    InputFormatTestUtil.commit(basePath, "600");
 
-        InputFormatTestUtil.setupIncremental(jobConf, "100", 3);
-        files = inputFormat.listStatus(jobConf);
+    InputFormatTestUtil.setupIncremental(jobConf, "100", 1);
+    FileStatus[] files = inputFormat.listStatus(jobConf);
+    assertEquals("Pulling 1 commit from 100, should get us the 5 files committed at 200", 5,
+        files.length);
+    ensureFilesInCommit("Pulling 1 commit from 100, should get us the 5 files committed at 200",
+        files, "200", 5);
 
-        assertEquals(
-            "Pulling 3 commits from 100, should get us the 3 files from 400 commit, 1 file from 300 commit and 1 file from 200 commit",
-            5, files.length);
-        ensureFilesInCommit("Pulling 3 commits from 100, should get us the 3 files from 400 commit",
-            files, "400", 3);
-        ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 300 commit",
-            files, "300", 1);
-        ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 200 commit",
-            files, "200", 1);
+    InputFormatTestUtil.setupIncremental(jobConf, "100", 3);
+    files = inputFormat.listStatus(jobConf);
 
-        InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtil.MAX_COMMIT_ALL);
-        files = inputFormat.listStatus(jobConf);
+    assertEquals(
+        "Pulling 3 commits from 100, should get us the 3 files from 400 commit, 1 file from 300 commit and 1 file from 200 commit",
+        5, files.length);
+    ensureFilesInCommit("Pulling 3 commits from 100, should get us the 3 files from 400 commit",
+        files, "400", 3);
+    ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 300 commit",
+        files, "300", 1);
+    ensureFilesInCommit("Pulling 3 commits from 100, should get us the 1 files from 200 commit",
+        files, "200", 1);
 
-        assertEquals(
-            "Pulling all commits from 100, should get us the 1 file from each of 200,300,400,500,400 commits",
-            5, files.length);
-        ensureFilesInCommit(
-            "Pulling all commits from 100, should get us the 1 files from 600 commit", files, "600",
-            1);
-        ensureFilesInCommit(
-            "Pulling all commits from 100, should get us the 1 files from 500 commit", files, "500",
-            1);
-        ensureFilesInCommit(
-            "Pulling all commits from 100, should get us the 1 files from 400 commit", files, "400",
-            1);
-        ensureFilesInCommit(
-            "Pulling all commits from 100, should get us the 1 files from 300 commit", files, "300",
-            1);
-        ensureFilesInCommit(
-            "Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200",
-            1);
-    }
+    InputFormatTestUtil.setupIncremental(jobConf, "100", HoodieHiveUtil.MAX_COMMIT_ALL);
+    files = inputFormat.listStatus(jobConf);
 
-    //TODO enable this after enabling predicate pushdown
-    public void testPredicatePushDown() throws IOException {
-        // initial commit
-        Schema schema = InputFormatTestUtil.readSchema("/sample1.avro");
-        String commit1 = "20160628071126";
-        File partitionDir =
-            InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, 10, commit1);
-        InputFormatTestUtil.commit(basePath, commit1);
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
-        // check whether we have 10 records at this point
-        ensureRecordsInCommit("We need to have 10 records at this point for commit " + commit1, commit1, 10, 10);
+    assertEquals(
+        "Pulling all commits from 100, should get us the 1 file from each of 200,300,400,500,400 commits",
+        5, files.length);
+    ensureFilesInCommit(
+        "Pulling all commits from 100, should get us the 1 files from 600 commit", files, "600",
+        1);
+    ensureFilesInCommit(
+        "Pulling all commits from 100, should get us the 1 files from 500 commit", files, "500",
+        1);
+    ensureFilesInCommit(
+        "Pulling all commits from 100, should get us the 1 files from 400 commit", files, "400",
+        1);
+    ensureFilesInCommit(
+        "Pulling all commits from 100, should get us the 1 files from 300 commit", files, "300",
+        1);
+    ensureFilesInCommit(
+        "Pulling all commits from 100, should get us the 1 files from 200 commit", files, "200",
+        1);
+  }
 
-        // update 2 records in the original parquet file and save it as commit 200
-        String commit2 = "20160629193623";
-        InputFormatTestUtil.simulateParquetUpdates(partitionDir, schema, commit1, 10, 2, commit2);
-        InputFormatTestUtil.commit(basePath, commit2);
+  //TODO enable this after enabling predicate pushdown
+  public void testPredicatePushDown() throws IOException {
+    // initial commit
+    Schema schema = InputFormatTestUtil.readSchema("/sample1.avro");
+    String commit1 = "20160628071126";
+    File partitionDir =
+        InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, 10, commit1);
+    InputFormatTestUtil.commit(basePath, commit1);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+    // check whether we have 10 records at this point
+    ensureRecordsInCommit("We need to have 10 records at this point for commit " + commit1, commit1,
+        10, 10);
 
-        InputFormatTestUtil.setupIncremental(jobConf, commit1, 1);
-        // check whether we have 2 records at this point
-        ensureRecordsInCommit(
-            "We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2, 2, 2);
-        // Make sure we have the 10 records if we roll back the stattime
-        InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
-        ensureRecordsInCommit(
-            "We need to have 8 records that was modified at commit " + commit1 + " and no more", commit1, 8, 10);
-        ensureRecordsInCommit(
-            "We need to have 2 records that was modified at commit " + commit2 + " and no more", commit2, 2, 10);
-    }
+    // update 2 records in the original parquet file and save it as commit 200
+    String commit2 = "20160629193623";
+    InputFormatTestUtil.simulateParquetUpdates(partitionDir, schema, commit1, 10, 2, commit2);
+    InputFormatTestUtil.commit(basePath, commit2);
 
-    private void ensureRecordsInCommit(String msg, String commit,
-        int expectedNumberOfRecordsInCommit, int totalExpected) throws IOException {
-        int actualCount = 0;
-        int totalCount = 0;
-        InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
-        for(InputSplit split:splits) {
-            RecordReader<Void, ArrayWritable>
-                recordReader = inputFormat.getRecordReader(split, jobConf, null);
-            Void key = recordReader.createKey();
-            ArrayWritable writable = recordReader.createValue();
+    InputFormatTestUtil.setupIncremental(jobConf, commit1, 1);
+    // check whether we have 2 records at this point
+    ensureRecordsInCommit(
+        "We need to have 2 records that was modified at commit " + commit2 + " and no more",
+        commit2, 2, 2);
+    // Make sure we have the 10 records if we roll back the stattime
+    InputFormatTestUtil.setupIncremental(jobConf, "0", 2);
+    ensureRecordsInCommit(
+        "We need to have 8 records that was modified at commit " + commit1 + " and no more",
+        commit1, 8, 10);
+    ensureRecordsInCommit(
+        "We need to have 2 records that was modified at commit " + commit2 + " and no more",
+        commit2, 2, 10);
+  }
 
-            while(recordReader.next(key, writable)) {
-                // writable returns an array with [field1, field2, _hoodie_commit_time, _hoodie_commit_seqno]
-                // Take the commit time and compare with the one we are interested in
-                if(commit.equals((writable.get()[2]).toString())) {
-                    actualCount++;
-                }
-                totalCount++;
-            }
+  private void ensureRecordsInCommit(String msg, String commit,
+      int expectedNumberOfRecordsInCommit, int totalExpected) throws IOException {
+    int actualCount = 0;
+    int totalCount = 0;
+    InputSplit[] splits = inputFormat.getSplits(jobConf, 1);
+    for (InputSplit split : splits) {
+      RecordReader<Void, ArrayWritable>
+          recordReader = inputFormat.getRecordReader(split, jobConf, null);
+      Void key = recordReader.createKey();
+      ArrayWritable writable = recordReader.createValue();
+
+      while (recordReader.next(key, writable)) {
+        // writable returns an array with [field1, field2, _hoodie_commit_time, _hoodie_commit_seqno]
+        // Take the commit time and compare with the one we are interested in
+        if (commit.equals((writable.get()[2]).toString())) {
+          actualCount++;
         }
-        assertEquals(msg, expectedNumberOfRecordsInCommit, actualCount);
-        assertEquals(msg, totalExpected, totalCount);
+        totalCount++;
+      }
     }
+    assertEquals(msg, expectedNumberOfRecordsInCommit, actualCount);
+    assertEquals(msg, totalExpected, totalCount);
+  }
 
-    public static void ensureFilesInCommit(String msg, FileStatus[] files, String commit,
-        int expected) {
-        int count = 0;
-        for (FileStatus file : files) {
-            String commitTs = FSUtils.getCommitTime(file.getPath().getName());
-            if (commit.equals(commitTs)) {
-                count++;
-            }
-        }
-        assertEquals(msg, expected, count);
+  public static void ensureFilesInCommit(String msg, FileStatus[] files, String commit,
+      int expected) {
+    int count = 0;
+    for (FileStatus file : files) {
+      String commitTs = FSUtils.getCommitTime(file.getPath().getName());
+      if (commit.equals(commitTs)) {
+        count++;
+      }
     }
+    assertEquals(msg, expected, count);
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/InputFormatTestUtil.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/InputFormatTestUtil.java
index ae57a4fa5..ac14e6484 100644
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/InputFormatTestUtil.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/InputFormatTestUtil.java
@@ -16,20 +16,10 @@
 
 package com.uber.hoodie.hadoop;
 
-import com.uber.hoodie.avro.MercifulJsonConverter;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.SchemaTestUtil;
-import com.uber.hoodie.common.util.TestRecord;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.parquet.avro.AvroParquetWriter;
-import org.junit.rules.TemporaryFolder;
-
 import java.io.File;
 import java.io.FilenameFilter;
 import java.io.IOException;
@@ -37,123 +27,139 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.parquet.avro.AvroParquetWriter;
+import org.junit.rules.TemporaryFolder;
 
 public class InputFormatTestUtil {
-    public static File prepareDataset(TemporaryFolder basePath, int numberOfFiles,
-        String commitNumber) throws IOException {
-        basePath.create();
-        HoodieTestUtils.init(basePath.getRoot().toString());
-        File partitionPath = basePath.newFolder("2016", "05", "01");
-        for (int i = 0; i < numberOfFiles; i++) {
-            File dataFile =
-                new File(partitionPath, FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i));
-            dataFile.createNewFile();
+
+  public static File prepareDataset(TemporaryFolder basePath, int numberOfFiles,
+      String commitNumber) throws IOException {
+    basePath.create();
+    HoodieTestUtils.init(basePath.getRoot().toString());
+    File partitionPath = basePath.newFolder("2016", "05", "01");
+    for (int i = 0; i < numberOfFiles; i++) {
+      File dataFile =
+          new File(partitionPath, FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i));
+      dataFile.createNewFile();
+    }
+    return partitionPath;
+  }
+
+  public static void simulateUpdates(File directory, final String originalCommit,
+      int numberOfFilesUpdated,
+      String newCommit, boolean randomize) throws IOException {
+    List<File> dataFiles = Arrays.asList(directory.listFiles(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        String commitTs = FSUtils.getCommitTime(name);
+        return originalCommit.equals(commitTs);
+      }
+    }));
+    if (randomize) {
+      Collections.shuffle(dataFiles);
+    }
+    List<File> toUpdateList =
+        dataFiles.subList(0, Math.min(numberOfFilesUpdated, dataFiles.size()));
+    for (File file : toUpdateList) {
+      String fileId = FSUtils.getFileId(file.getName());
+      File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
+      dataFile.createNewFile();
+    }
+  }
+
+  public static void commit(TemporaryFolder basePath, String commitNumber) throws IOException {
+    // create the commit
+    new File(basePath.getRoot().toString() + "/.hoodie/", commitNumber + ".commit").createNewFile();
+  }
+
+  public static void setupIncremental(JobConf jobConf, String startCommit,
+      int numberOfCommitsToPull) {
+    String modePropertyName = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN,
+        HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.set(modePropertyName, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
+
+    String startCommitTimestampName = String
+        .format(HoodieHiveUtil.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.set(startCommitTimestampName, startCommit);
+
+    String maxCommitPulls = String
+        .format(HoodieHiveUtil.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
+  }
+
+  public static Schema readSchema(String location) throws IOException {
+    return new Schema.Parser().parse(InputFormatTestUtil.class.getResourceAsStream(location));
+  }
+
+  public static File prepareParquetDataset(TemporaryFolder basePath, Schema schema,
+      int numberOfFiles, int numberOfRecords,
+      String commitNumber) throws IOException {
+    basePath.create();
+    HoodieTestUtils.init(basePath.getRoot().toString());
+    File partitionPath = basePath.newFolder("2016", "05", "01");
+    AvroParquetWriter parquetWriter;
+    for (int i = 0; i < numberOfFiles; i++) {
+      String fileId = FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i);
+      File dataFile =
+          new File(partitionPath, fileId);
+      // dataFile.createNewFile();
+      parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
+          schema);
+      try {
+        for (GenericRecord record : generateAvroRecords(schema, numberOfRecords, commitNumber,
+            fileId)) {
+          parquetWriter.write(record);
         }
-        return partitionPath;
+      } finally {
+        parquetWriter.close();
+      }
     }
+    return partitionPath;
 
-    public static void simulateUpdates(File directory, final String originalCommit, int numberOfFilesUpdated,
-        String newCommit, boolean randomize) throws IOException {
-        List<File> dataFiles = Arrays.asList(directory.listFiles(new FilenameFilter() {
-            @Override public boolean accept(File dir, String name) {
-                String commitTs = FSUtils.getCommitTime(name);
-                return originalCommit.equals(commitTs);
-            }
-        }));
-        if(randomize) {
-            Collections.shuffle(dataFiles);
-        }
-        List<File> toUpdateList =
-            dataFiles.subList(0, Math.min(numberOfFilesUpdated, dataFiles.size()));
-        for (File file : toUpdateList) {
-            String fileId = FSUtils.getFileId(file.getName());
-            File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
-            dataFile.createNewFile();
+  }
+
+  private static Iterable<? extends GenericRecord> generateAvroRecords(Schema schema,
+      int numberOfRecords, String commitTime, String fileId) throws IOException {
+    List<GenericRecord> records = new ArrayList<>(numberOfRecords);
+    for (int i = 0; i < numberOfRecords; i++) {
+      records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, commitTime, fileId));
+    }
+    return records;
+  }
+
+  public static void simulateParquetUpdates(File directory, Schema schema, String originalCommit,
+      int totalNumberOfRecords, int numberOfRecordsToUpdate,
+      String newCommit) throws IOException {
+    File fileToUpdate = directory.listFiles(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.endsWith("parquet");
+      }
+    })[0];
+    String fileId = FSUtils.getFileId(fileToUpdate.getName());
+    File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
+    AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
+        schema);
+    try {
+      for (GenericRecord record : generateAvroRecords(schema, totalNumberOfRecords,
+          originalCommit, fileId)) {
+        if (numberOfRecordsToUpdate > 0) {
+          // update this record
+          record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, newCommit);
+          String oldSeqNo = (String) record.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD);
+          record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
+              oldSeqNo.replace(originalCommit, newCommit));
+          numberOfRecordsToUpdate--;
         }
+        parquetWriter.write(record);
+      }
+    } finally {
+      parquetWriter.close();
     }
 
-    public static void commit(TemporaryFolder basePath, String commitNumber) throws IOException {
-        // create the commit
-        new File(basePath.getRoot().toString() + "/.hoodie/", commitNumber + ".commit").createNewFile();
-    }
-
-    public static void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
-        String modePropertyName = String.format(HoodieHiveUtil.HOODIE_CONSUME_MODE_PATTERN,
-            HoodieTestUtils.RAW_TRIPS_TEST_NAME);
-        jobConf.set(modePropertyName, HoodieHiveUtil.INCREMENTAL_SCAN_MODE);
-
-        String startCommitTimestampName = String.format(HoodieHiveUtil.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
-        jobConf.set(startCommitTimestampName, startCommit);
-
-        String maxCommitPulls = String.format(HoodieHiveUtil.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
-        jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
-    }
-
-    public static Schema readSchema(String location) throws IOException {
-        return new Schema.Parser().parse(InputFormatTestUtil.class.getResourceAsStream(location));
-    }
-
-    public static File prepareParquetDataset(TemporaryFolder basePath, Schema schema, int numberOfFiles, int numberOfRecords,
-        String commitNumber) throws IOException {
-        basePath.create();
-        HoodieTestUtils.init(basePath.getRoot().toString());
-        File partitionPath = basePath.newFolder("2016", "05", "01");
-        AvroParquetWriter parquetWriter;
-        for (int i = 0; i < numberOfFiles; i++) {
-            String fileId = FSUtils.makeDataFileName(commitNumber, 1, "fileid" + i);
-            File dataFile =
-                new File(partitionPath, fileId);
-            // dataFile.createNewFile();
-            parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
-                schema);
-            try {
-                for (GenericRecord record : generateAvroRecords(schema, numberOfRecords, commitNumber, fileId)) {
-                    parquetWriter.write(record);
-                }
-            } finally {
-                parquetWriter.close();
-            }
-        }
-        return partitionPath;
-
-    }
-
-    private static Iterable<? extends GenericRecord> generateAvroRecords(Schema schema, int numberOfRecords, String commitTime, String fileId) throws IOException {
-        List<GenericRecord> records = new ArrayList<>(numberOfRecords);
-        for(int i=0;i<numberOfRecords;i++) {
-            records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, commitTime, fileId));
-        }
-        return records;
-    }
-
-    public static void simulateParquetUpdates(File directory, Schema schema, String originalCommit,
-        int totalNumberOfRecords, int numberOfRecordsToUpdate,
-        String newCommit) throws IOException {
-        File fileToUpdate = directory.listFiles(new FilenameFilter() {
-            @Override public boolean accept(File dir, String name) {
-                return name.endsWith("parquet");
-            }
-        })[0];
-        String fileId = FSUtils.getFileId(fileToUpdate.getName());
-        File dataFile = new File(directory, FSUtils.makeDataFileName(newCommit, 1, fileId));
-        AvroParquetWriter parquetWriter = new AvroParquetWriter(new Path(dataFile.getAbsolutePath()),
-            schema);
-        try {
-            for (GenericRecord record : generateAvroRecords(schema, totalNumberOfRecords,
-                originalCommit, fileId)) {
-                if (numberOfRecordsToUpdate > 0) {
-                    // update this record
-                    record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, newCommit);
-                    String oldSeqNo = (String) record.get(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD);
-                    record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
-                        oldSeqNo.replace(originalCommit, newCommit));
-                    numberOfRecordsToUpdate--;
-                }
-                parquetWriter.write(record);
-            }
-        } finally {
-            parquetWriter.close();
-        }
-
-    }
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java
index 7470e6bbb..e64f918d4 100644
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestHoodieROTablePathFilter.java
@@ -15,63 +15,66 @@
  */
 package com.uber.hoodie.hadoop;
 
-import com.uber.hoodie.common.model.HoodieTestUtils;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
+import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
+import java.io.File;
+import java.io.IOException;
 import org.apache.hadoop.fs.Path;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 
 /**
  */
 public class TestHoodieROTablePathFilter {
 
-    @Test
-    public void testHoodiePaths() throws IOException {
-        // Create a temp folder as the base path
-        HoodieTableMetaClient metaClient = HoodieTestUtils.initOnTemp();
-        String basePath = metaClient.getBasePath();
+  @Test
+  public void testHoodiePaths() throws IOException {
+    // Create a temp folder as the base path
+    HoodieTableMetaClient metaClient = HoodieTestUtils.initOnTemp();
+    String basePath = metaClient.getBasePath();
 
-        HoodieTestUtils.createCommitFiles(basePath, "001", "002");
-        HoodieTestUtils.createInflightCommitFiles(basePath, "003");
+    HoodieTestUtils.createCommitFiles(basePath, "001", "002");
+    HoodieTestUtils.createInflightCommitFiles(basePath, "003");
 
-        HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f1");
-        HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f2");
-        HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f3");
-        HoodieTestUtils.createDataFile(basePath, "2017/01/01", "002", "f2");
-        HoodieTestUtils.createDataFile(basePath, "2017/01/01", "003", "f3");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f1");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f2");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "001", "f3");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "002", "f2");
+    HoodieTestUtils.createDataFile(basePath, "2017/01/01", "003", "f3");
 
-        HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
-        Path partitionPath = new Path("file://" + basePath + File.separator + "2017/01/01");
-        assertTrue("Directories should be accepted", pathFilter.accept(partitionPath));
+    HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
+    Path partitionPath = new Path("file://" + basePath + File.separator + "2017/01/01");
+    assertTrue("Directories should be accepted", pathFilter.accept(partitionPath));
 
-        assertTrue(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f1"))));
-        assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f2"))));
-        assertTrue(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f3"))));
-        assertTrue(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "002", "f2"))));
-        assertFalse(pathFilter.accept(new Path("file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
-    }
+    assertTrue(pathFilter.accept(new Path(
+        "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f1"))));
+    assertFalse(pathFilter.accept(new Path(
+        "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f2"))));
+    assertTrue(pathFilter.accept(new Path(
+        "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "001", "f3"))));
+    assertTrue(pathFilter.accept(new Path(
+        "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "002", "f2"))));
+    assertFalse(pathFilter.accept(new Path(
+        "file:///" + HoodieTestUtils.getDataFilePath(basePath, "2017/01/01", "003", "f3"))));
+  }
 
-    @Test
-    public void testNonHoodiePaths() throws IOException {
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        String basePath = folder.getRoot().getAbsolutePath();
-        HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
+  @Test
+  public void testNonHoodiePaths() throws IOException {
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    String basePath = folder.getRoot().getAbsolutePath();
+    HoodieROTablePathFilter pathFilter = new HoodieROTablePathFilter();
 
-        String path = basePath + File.separator + "nonhoodiefolder";
-        new File(path).mkdirs();
-        assertTrue(pathFilter.accept(new Path("file:///" + path)));
+    String path = basePath + File.separator + "nonhoodiefolder";
+    new File(path).mkdirs();
+    assertTrue(pathFilter.accept(new Path("file:///" + path)));
 
-        path = basePath + File.separator + "nonhoodiefolder/somefile";
-        new File(path).createNewFile();
-        assertTrue(pathFilter.accept(new Path("file:///" + path)));
-    }
+    path = basePath + File.separator + "nonhoodiefolder/somefile";
+    new File(path).createNewFile();
+    assertTrue(pathFilter.accept(new Path("file:///" + path)));
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java
index 73595f6eb..84f02b868 100644
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java
@@ -19,6 +19,8 @@
 package com.uber.hoodie.hadoop.realtime;
 
 
+import static org.junit.Assert.assertTrue;
+
 import com.google.common.collect.Maps;
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.model.HoodieTableType;
@@ -30,6 +32,13 @@ import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.hadoop.InputFormatTestUtil;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
@@ -52,211 +61,234 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import static org.junit.Assert.assertTrue;
-
 public class HoodieRealtimeRecordReaderTest {
 
-    private JobConf jobConf;
+  private JobConf jobConf;
 
-    @Before
-    public void setUp() {
-        jobConf = new JobConf();
+  @Before
+  public void setUp() {
+    jobConf = new JobConf();
+  }
+
+  @Rule
+  public TemporaryFolder basePath = new TemporaryFolder();
+
+  private HoodieLogFormat.Writer writeLogFile(File partitionDir, Schema schema, String fileId,
+      String baseCommit, String newCommit, int numberOfRecords)
+      throws InterruptedException, IOException {
+    HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
+        .onParentPath(new Path(partitionDir.getPath()))
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(fileId)
+        .overBaseCommit(baseCommit).withFs(FSUtils.getFs()).build();
+    List<IndexedRecord> records = new ArrayList<>();
+    for (int i = 0; i < numberOfRecords; i++) {
+      records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, newCommit, "fileid0"));
     }
+    Schema writeSchema = records.get(0).getSchema();
+    Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+    metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, newCommit);
+    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, writeSchema, metadata);
+    writer = writer.appendBlock(dataBlock);
+    long size = writer.getCurrentSize();
+    return writer;
+  }
 
-    @Rule
-    public TemporaryFolder basePath = new TemporaryFolder();
+  @Test
+  public void testReader() throws Exception {
+    // initial commit
+    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
+    HoodieTestUtils
+        .initTableType(basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
+    String commitTime = "100";
+    File partitionDir = InputFormatTestUtil
+        .prepareParquetDataset(basePath, schema, 1, 100, commitTime);
+    InputFormatTestUtil.commit(basePath, commitTime);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
 
-    private HoodieLogFormat.Writer writeLogFile(File partitionDir, Schema schema, String fileId,
-                                                String baseCommit, String newCommit, int numberOfRecords) throws InterruptedException,IOException {
-        HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(new Path(partitionDir.getPath()))
-                .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(fileId)
-                .overBaseCommit(baseCommit).withFs(FSUtils.getFs()).build();
-        List<IndexedRecord> records = new ArrayList<>();
-        for(int i=0; i < numberOfRecords; i++) {
-            records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, newCommit, "fileid0"));
-        }
-        Schema writeSchema = records.get(0).getSchema();
-        Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-        metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, newCommit);
-        HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, writeSchema, metadata);
-        writer = writer.appendBlock(dataBlock);
-        long size = writer.getCurrentSize();
-        return writer;
-    }
+    // update files or generate new log file
+    String newCommitTime = "101";
+    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime,
+        newCommitTime, 100);
+    long size = writer.getCurrentSize();
+    writer.close();
+    assertTrue("block - size should be > 0", size > 0);
 
-    @Test
-    public void testReader() throws Exception {
-        // initial commit
-        Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
-        HoodieTestUtils.initTableType(basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
-        String commitTime = "100";
-        File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, 100, commitTime);
-        InputFormatTestUtil.commit(basePath, commitTime);
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+    //create a split with baseFile (parquet file written earlier) and new log file(s)
+    String logFilePath = writer.getLogFile().getPath().toString();
+    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(new FileSplit(new Path(partitionDir
+        + "/fileid0_1_" + commitTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(),
+        Arrays.asList(logFilePath), newCommitTime);
 
-        // update files or generate new log file
-        String newCommitTime = "101";
-        HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, 100);
-        long size = writer.getCurrentSize();
-        writer.close();
-        assertTrue("block - size should be > 0", size > 0);
-
-        //create a split with baseFile (parquet file written earlier) and new log file(s)
-        String logFilePath = writer.getLogFile().getPath().toString();
-        HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(new FileSplit(new Path(partitionDir
-                + "/fileid0_1_" + commitTime + ".parquet"),0,1,jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
-
-        //create a RecordReader to be used by HoodieRealtimeRecordReader
-        RecordReader<Void, ArrayWritable> reader =
-                new MapredParquetInputFormat().
-                        getRecordReader(new FileSplit(split.getPath(), 0,
-                                        FSUtils.getFs().getLength(split.getPath()), (String[]) null), jobConf, null);
-        JobConf jobConf = new JobConf();
-        List<Schema.Field> fields = schema.getFields();
-        String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
-        String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
-        jobConf.set("partition_columns", "datestr");
-
-        //validate record reader compaction
-        HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);
-
-        //use reader to read base Parquet File and log file, merge in flight and return latest commit
-        //here all 100 records should be updated, see above
-        Void key = recordReader.createKey();
-        ArrayWritable value = recordReader.createValue();
-        while(recordReader.next(key, value)) {
-            Writable[] values = value.get();
-            //check if the record written is with latest commit, here "101"
-            Assert.assertEquals(values[0].toString(), newCommitTime);
-            key = recordReader.createKey();
-            value = recordReader.createValue();
-        }
-    }
-
-    @Test
-    public void testReaderWithNestedAndComplexSchema() throws Exception {
-        // initial commit
-        Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
-        HoodieTestUtils.initTableType(basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
-        String commitTime = "100";
-        int numberOfRecords = 100;
-        int numberOfLogRecords = numberOfRecords / 2;
-        File partitionDir = InputFormatTestUtil.prepareParquetDataset(basePath, schema, 1, numberOfRecords, commitTime);
-        InputFormatTestUtil.commit(basePath, commitTime);
-        // Add the paths
-        FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
-
-        // update files or generate new log file
-        String newCommitTime = "101";
-        HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime, newCommitTime, numberOfLogRecords);
-        long size = writer.getCurrentSize();
-        writer.close();
-        assertTrue("block - size should be > 0", size > 0);
-
-        //create a split with baseFile (parquet file written earlier) and new log file(s)
-        String logFilePath = writer.getLogFile().getPath().toString();
-        HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(new FileSplit(new Path(partitionDir
-          + "/fileid0_1_" + commitTime + ".parquet"),0,1,jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
-
-        //create a RecordReader to be used by HoodieRealtimeRecordReader
-        RecordReader<Void, ArrayWritable> reader =
-          new MapredParquetInputFormat().
+    //create a RecordReader to be used by HoodieRealtimeRecordReader
+    RecordReader<Void, ArrayWritable> reader =
+        new MapredParquetInputFormat().
             getRecordReader(new FileSplit(split.getPath(), 0,
-              FSUtils.getFs().getLength(split.getPath()), (String[]) null), jobConf, null);
-        JobConf jobConf = new JobConf();
-        List<Schema.Field> fields = schema.getFields();
+                FSUtils.getFs().getLength(split.getPath()), (String[]) null), jobConf, null);
+    JobConf jobConf = new JobConf();
+    List<Schema.Field> fields = schema.getFields();
+    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
+    String postions = fields.stream().map(f -> String.valueOf(f.pos()))
+        .collect(Collectors.joining(","));
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
+    jobConf.set("partition_columns", "datestr");
 
-        String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(","));
-        String positions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
-        jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
-        jobConf.set("partition_columns", "datestr");
+    //validate record reader compaction
+    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf,
+        reader);
 
-        // validate record reader compaction
-        HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);
-
-        // use reader to read base Parquet File and log file, merge in flight and return latest commit
-        // here the first 50 records should be updated, see above
-        Void key = recordReader.createKey();
-        ArrayWritable value = recordReader.createValue();
-        int numRecordsRead = 0;
-        while (recordReader.next(key, value)) {
-            int currentRecordNo = numRecordsRead;
-            ++numRecordsRead;
-            Writable[] values = value.get();
-            String recordCommitTime;
-            //check if the record written is with latest commit, here "101"
-            if (numRecordsRead > numberOfLogRecords) {
-                recordCommitTime = commitTime;
-            } else {
-                recordCommitTime = newCommitTime;
-            }
-            String recordCommitTimeSuffix = "@" + recordCommitTime;
-
-            Assert.assertEquals(values[0].toString(), recordCommitTime);
-            key = recordReader.createKey();
-            value = recordReader.createValue();
-
-            // Assert type STRING
-            Assert.assertEquals("test value for field: field1", values[5].toString(), "field" + currentRecordNo);
-            Assert.assertEquals("test value for field: field2",values[6].toString(), "field" + currentRecordNo + recordCommitTimeSuffix);
-            Assert.assertEquals("test value for field: name", values[7].toString(), "name" + currentRecordNo);
-
-            // Assert type INT
-            IntWritable intWritable = (IntWritable)values[8];
-            Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(), currentRecordNo + recordCommitTime.hashCode());
-
-            // Assert type LONG
-            LongWritable longWritable = (LongWritable)values[9];
-            Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(), currentRecordNo + recordCommitTime.hashCode());
-
-            // Assert type FLOAT
-            FloatWritable floatWritable = (FloatWritable)values[10];
-            Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(), (float)((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0);
-
-            // Assert type DOUBLE
-            DoubleWritable doubleWritable = (DoubleWritable)values[11];
-            Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(), (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0);
-
-            // Assert type MAP
-            ArrayWritable mapItem = (ArrayWritable)values[12];
-            Writable[] mapItemValues = mapItem.get();
-            ArrayWritable mapItemValue1 = (ArrayWritable)mapItemValues[0];
-            ArrayWritable mapItemValue2 = (ArrayWritable)mapItemValues[1];
-            Assert.assertEquals("test value for field: tags", mapItemValue1.get()[0].toString(), "mapItem1");
-            Assert.assertEquals("test value for field: tags", mapItemValue2.get()[0].toString(), "mapItem2");
-            ArrayWritable mapItemValue1value = (ArrayWritable)mapItemValue1.get()[1];
-            ArrayWritable mapItemValue2value = (ArrayWritable)mapItemValue2.get()[1];
-            Assert.assertEquals("test value for field: tags", mapItemValue1value.get().length, 2);
-            Assert.assertEquals("test value for field: tags", mapItemValue2value.get().length, 2);
-            Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1", mapItemValue1value.get()[0].toString(), "item" + currentRecordNo);
-            Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1", mapItemValue2value.get()[0].toString(), "item2" + currentRecordNo);
-            Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2", mapItemValue1value.get()[1].toString(), "item" + currentRecordNo + recordCommitTimeSuffix);
-            Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2", mapItemValue2value.get()[1].toString(), "item2" + currentRecordNo + recordCommitTimeSuffix);
-
-            // Assert type RECORD
-            ArrayWritable recordItem = (ArrayWritable)values[13];
-            Writable[] nestedRecord = recordItem.get();
-            Assert.assertEquals("test value for field: testNestedRecord.isAdmin", ((BooleanWritable)nestedRecord[0]).get(), false);
-            Assert.assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(), "UserId" + currentRecordNo + recordCommitTimeSuffix);
-
-            // Assert type ARRAY
-            ArrayWritable arrayValue = (ArrayWritable)values[14];
-            Writable[] arrayValues = arrayValue.get();
-            for (int i = 0; i < arrayValues.length; i++) {
-                Assert.assertEquals("test value for field: stringArray", arrayValues[i].toString(), "stringArray" + i + recordCommitTimeSuffix);
-            }
-        }
+    //use reader to read base Parquet File and log file, merge in flight and return latest commit
+    //here all 100 records should be updated, see above
+    Void key = recordReader.createKey();
+    ArrayWritable value = recordReader.createValue();
+    while (recordReader.next(key, value)) {
+      Writable[] values = value.get();
+      //check if the record written is with latest commit, here "101"
+      Assert.assertEquals(values[0].toString(), newCommitTime);
+      key = recordReader.createKey();
+      value = recordReader.createValue();
     }
+  }
+
+  @Test
+  public void testReaderWithNestedAndComplexSchema() throws Exception {
+    // initial commit
+    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getComplexEvolvedSchema());
+    HoodieTestUtils
+        .initTableType(basePath.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ);
+    String commitTime = "100";
+    int numberOfRecords = 100;
+    int numberOfLogRecords = numberOfRecords / 2;
+    File partitionDir = InputFormatTestUtil
+        .prepareParquetDataset(basePath, schema, 1, numberOfRecords, commitTime);
+    InputFormatTestUtil.commit(basePath, commitTime);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+
+    // update files or generate new log file
+    String newCommitTime = "101";
+    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime,
+        newCommitTime, numberOfLogRecords);
+    long size = writer.getCurrentSize();
+    writer.close();
+    assertTrue("block - size should be > 0", size > 0);
+
+    //create a split with baseFile (parquet file written earlier) and new log file(s)
+    String logFilePath = writer.getLogFile().getPath().toString();
+    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(new FileSplit(new Path(partitionDir
+        + "/fileid0_1_" + commitTime + ".parquet"), 0, 1, jobConf), basePath.getRoot().getPath(),
+        Arrays.asList(logFilePath), newCommitTime);
+
+    //create a RecordReader to be used by HoodieRealtimeRecordReader
+    RecordReader<Void, ArrayWritable> reader =
+        new MapredParquetInputFormat().
+            getRecordReader(new FileSplit(split.getPath(), 0,
+                FSUtils.getFs().getLength(split.getPath()), (String[]) null), jobConf, null);
+    JobConf jobConf = new JobConf();
+    List<Schema.Field> fields = schema.getFields();
+
+    String names = fields.stream().map(f -> f.name()).collect(Collectors.joining(","));
+    String positions = fields.stream().map(f -> String.valueOf(f.pos()))
+        .collect(Collectors.joining(","));
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, positions);
+    jobConf.set("partition_columns", "datestr");
+
+    // validate record reader compaction
+    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf,
+        reader);
+
+    // use reader to read base Parquet File and log file, merge in flight and return latest commit
+    // here the first 50 records should be updated, see above
+    Void key = recordReader.createKey();
+    ArrayWritable value = recordReader.createValue();
+    int numRecordsRead = 0;
+    while (recordReader.next(key, value)) {
+      int currentRecordNo = numRecordsRead;
+      ++numRecordsRead;
+      Writable[] values = value.get();
+      String recordCommitTime;
+      //check if the record written is with latest commit, here "101"
+      if (numRecordsRead > numberOfLogRecords) {
+        recordCommitTime = commitTime;
+      } else {
+        recordCommitTime = newCommitTime;
+      }
+      String recordCommitTimeSuffix = "@" + recordCommitTime;
+
+      Assert.assertEquals(values[0].toString(), recordCommitTime);
+      key = recordReader.createKey();
+      value = recordReader.createValue();
+
+      // Assert type STRING
+      Assert.assertEquals("test value for field: field1", values[5].toString(),
+          "field" + currentRecordNo);
+      Assert.assertEquals("test value for field: field2", values[6].toString(),
+          "field" + currentRecordNo + recordCommitTimeSuffix);
+      Assert.assertEquals("test value for field: name", values[7].toString(),
+          "name" + currentRecordNo);
+
+      // Assert type INT
+      IntWritable intWritable = (IntWritable) values[8];
+      Assert.assertEquals("test value for field: favoriteIntNumber", intWritable.get(),
+          currentRecordNo + recordCommitTime.hashCode());
+
+      // Assert type LONG
+      LongWritable longWritable = (LongWritable) values[9];
+      Assert.assertEquals("test value for field: favoriteNumber", longWritable.get(),
+          currentRecordNo + recordCommitTime.hashCode());
+
+      // Assert type FLOAT
+      FloatWritable floatWritable = (FloatWritable) values[10];
+      Assert.assertEquals("test value for field: favoriteFloatNumber", floatWritable.get(),
+          (float) ((currentRecordNo + recordCommitTime.hashCode()) / 1024.0), 0);
+
+      // Assert type DOUBLE
+      DoubleWritable doubleWritable = (DoubleWritable) values[11];
+      Assert.assertEquals("test value for field: favoriteDoubleNumber", doubleWritable.get(),
+          (currentRecordNo + recordCommitTime.hashCode()) / 1024.0, 0);
+
+      // Assert type MAP
+      ArrayWritable mapItem = (ArrayWritable) values[12];
+      Writable[] mapItemValues = mapItem.get();
+      ArrayWritable mapItemValue1 = (ArrayWritable) mapItemValues[0];
+      ArrayWritable mapItemValue2 = (ArrayWritable) mapItemValues[1];
+      Assert.assertEquals("test value for field: tags", mapItemValue1.get()[0].toString(),
+          "mapItem1");
+      Assert.assertEquals("test value for field: tags", mapItemValue2.get()[0].toString(),
+          "mapItem2");
+      ArrayWritable mapItemValue1value = (ArrayWritable) mapItemValue1.get()[1];
+      ArrayWritable mapItemValue2value = (ArrayWritable) mapItemValue2.get()[1];
+      Assert.assertEquals("test value for field: tags", mapItemValue1value.get().length, 2);
+      Assert.assertEquals("test value for field: tags", mapItemValue2value.get().length, 2);
+      Assert.assertEquals("test value for field: tags[\"mapItem1\"].item1",
+          mapItemValue1value.get()[0].toString(), "item" + currentRecordNo);
+      Assert.assertEquals("test value for field: tags[\"mapItem2\"].item1",
+          mapItemValue2value.get()[0].toString(), "item2" + currentRecordNo);
+      Assert.assertEquals("test value for field: tags[\"mapItem1\"].item2",
+          mapItemValue1value.get()[1].toString(),
+          "item" + currentRecordNo + recordCommitTimeSuffix);
+      Assert.assertEquals("test value for field: tags[\"mapItem2\"].item2",
+          mapItemValue2value.get()[1].toString(),
+          "item2" + currentRecordNo + recordCommitTimeSuffix);
+
+      // Assert type RECORD
+      ArrayWritable recordItem = (ArrayWritable) values[13];
+      Writable[] nestedRecord = recordItem.get();
+      Assert.assertEquals("test value for field: testNestedRecord.isAdmin",
+          ((BooleanWritable) nestedRecord[0]).get(), false);
+      Assert
+          .assertEquals("test value for field: testNestedRecord.userId", nestedRecord[1].toString(),
+              "UserId" + currentRecordNo + recordCommitTimeSuffix);
+
+      // Assert type ARRAY
+      ArrayWritable arrayValue = (ArrayWritable) values[14];
+      Writable[] arrayValues = arrayValue.get();
+      for (int i = 0; i < arrayValues.length; i++) {
+        Assert.assertEquals("test value for field: stringArray", arrayValues[i].toString(),
+            "stringArray" + i + recordCommitTimeSuffix);
+      }
+    }
+  }
 }
diff --git a/hoodie-hadoop-mr/src/test/resources/log4j-surefire.properties b/hoodie-hadoop-mr/src/test/resources/log4j-surefire.properties
index 1c03f27e6..3613e7d12 100644
--- a/hoodie-hadoop-mr/src/test/resources/log4j-surefire.properties
+++ b/hoodie-hadoop-mr/src/test/resources/log4j-surefire.properties
@@ -13,11 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 log4j.rootLogger=WARN, A1
 log4j.category.com.uber=INFO
 log4j.category.org.apache.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/hoodie-hive/pom.xml b/hoodie-hive/pom.xml
index 5fd1f6295..dfefe9bfb 100644
--- a/hoodie-hive/pom.xml
+++ b/hoodie-hive/pom.xml
@@ -15,7 +15,9 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <parent>
     <artifactId>hoodie</artifactId>
     <groupId>com.uber.hoodie</groupId>
diff --git a/hoodie-hive/src/assembly/src.xml b/hoodie-hive/src/assembly/src.xml
index adb5044b1..41f7b276d 100644
--- a/hoodie-hive/src/assembly/src.xml
+++ b/hoodie-hive/src/assembly/src.xml
@@ -15,8 +15,8 @@
   -->
 
 <assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
-          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-          xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
   <id>jar-with-dependencies</id>
   <formats>
     <format>jar</format>
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java
index 4f40355de..5379580ef 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncConfig.java
@@ -19,7 +19,6 @@
 package com.uber.hoodie.hive;
 
 import com.beust.jcommander.Parameter;
-
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java
index 1268e69e8..088e24a9c 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HiveSyncTool.java
@@ -26,6 +26,10 @@ import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
 import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
 import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent.PartitionEventType;
 import com.uber.hoodie.hive.util.SchemaUtil;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -35,20 +39,14 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import parquet.schema.MessageType;
 
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
 
 /**
- * Tool to sync a hoodie HDFS dataset with a hive metastore table.
- * Either use it as a api HiveSyncTool.syncHoodieTable(HiveSyncConfig)
- * or as a command line java -cp hoodie-hive.jar HiveSyncTool [args]
+ * Tool to sync a hoodie HDFS dataset with a hive metastore table. Either use it as a api
+ * HiveSyncTool.syncHoodieTable(HiveSyncConfig) or as a command line java -cp hoodie-hive.jar
+ * HiveSyncTool [args]
  *
- * This utility will get the schema from the latest commit and will sync hive table schema
- * Also this will sync the partitions incrementally
- * (all the partitions modified since the last commit)
+ * This utility will get the schema from the latest commit and will sync hive table schema Also this
+ * will sync the partitions incrementally (all the partitions modified since the last commit)
  */
 @SuppressWarnings("WeakerAccess")
 public class HiveSyncTool {
@@ -64,7 +62,7 @@ public class HiveSyncTool {
   }
 
   public void syncHoodieTable() {
-    switch(hoodieHiveClient.getTableType()) {
+    switch (hoodieHiveClient.getTableType()) {
       case COPY_ON_WRITE:
         syncHoodieTable(false);
         break;
@@ -125,15 +123,15 @@ public class HiveSyncTool {
     // Check and sync schema
     if (!tableExists) {
       LOG.info("Table " + cfg.tableName + " is not found. Creating it");
-      if(!isRealTime) {
+      if (!isRealTime) {
         // TODO - RO Table for MOR only after major compaction (UnboundedCompaction is default for now)
         hoodieHiveClient.createTable(schema, HoodieInputFormat.class.getName(),
-                MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
+            MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
       } else {
-          // Custom serde will not work with ALTER TABLE REPLACE COLUMNS
-          // https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
-          hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
-                  MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
+        // Custom serde will not work with ALTER TABLE REPLACE COLUMNS
+        // https://github.com/apache/hive/blob/release-1.1.0/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java#L3488
+        hoodieHiveClient.createTable(schema, HoodieRealtimeInputFormat.class.getName(),
+            MapredParquetOutputFormat.class.getName(), ParquetHiveSerDe.class.getName());
       }
     } else {
       // Check if the dataset schema has evolved
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java
index 5f0dc9337..6fd5019df 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveClient.java
@@ -198,8 +198,8 @@ public class HoodieHiveClient {
   }
 
   /**
-   * Iterate over the storage partitions and find if there are any new partitions that need
-   * to be added or updated. Generate a list of PartitionEvent based on the changes required.
+   * Iterate over the storage partitions and find if there are any new partitions that need to be
+   * added or updated. Generate a list of PartitionEvent based on the changes required.
    */
   List<PartitionEvent> getPartitionEvents(List<Partition> tablePartitions,
       List<String> partitionStoragePartitions) {
@@ -297,9 +297,9 @@ public class HoodieHiveClient {
   }
 
   /**
-   * Gets the schema for a hoodie dataset.
-   * Depending on the type of table, read from any file written in the latest commit.
-   * We will assume that the schema has not changed within a single atomic write.
+   * Gets the schema for a hoodie dataset. Depending on the type of table, read from any file
+   * written in the latest commit. We will assume that the schema has not changed within a single
+   * atomic write.
    *
    * @return Parquet schema for this dataset
    */
@@ -313,7 +313,8 @@ public class HoodieHiveClient {
               .orElseThrow(() -> new InvalidDatasetException(syncConfig.basePath));
           HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
               .fromBytes(activeTimeline.getInstantDetails(lastCommit).get());
-          String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
+          String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
+              .stream().findAny()
               .orElseThrow(() -> new IllegalArgumentException(
                   "Could not find any data file written for commit " + lastCommit
                       + ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -330,7 +331,8 @@ public class HoodieHiveClient {
             lastDeltaCommitAfterCompaction = metaClient.getActiveTimeline()
                 .getDeltaCommitTimeline()
                 .filterCompletedInstants()
-                .findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE).lastInstant();
+                .findInstantsAfter(lastCompactionCommit.get().getTimestamp(), Integer.MAX_VALUE)
+                .lastInstant();
           }
           LOG.info("Found the last delta commit after last compaction as "
               + lastDeltaCommitAfterCompaction);
@@ -340,8 +342,9 @@ public class HoodieHiveClient {
             // read from the log file wrote
             commitMetadata = HoodieCommitMetadata
                 .fromBytes(activeTimeline.getInstantDetails(lastDeltaCommit).get());
-            filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().filter(s -> s.contains(
-                HoodieLogFile.DELTA_EXTENSION)).findAny()
+            filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
+                .stream().filter(s -> s.contains(
+                    HoodieLogFile.DELTA_EXTENSION)).findAny()
                 .orElseThrow(() -> new IllegalArgumentException(
                     "Could not find any data file written for commit " + lastDeltaCommit
                         + ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -361,10 +364,6 @@ public class HoodieHiveClient {
 
   /**
    * Read schema from a data file from the last compaction commit done.
-   *
-   * @param lastCompactionCommitOpt
-   * @return
-   * @throws IOException
    */
   @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
   private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt)
@@ -377,7 +376,8 @@ public class HoodieHiveClient {
     // Read from the compacted file wrote
     HoodieCompactionMetadata compactionMetadata = HoodieCompactionMetadata
         .fromBytes(activeTimeline.getInstantDetails(lastCompactionCommit).get());
-    String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
+    String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
+        .stream().findAny()
         .orElseThrow(() -> new IllegalArgumentException(
             "Could not find any data file written for compaction " + lastCompactionCommit
                 + ", could not get schema for dataset " + metaClient.getBasePath()));
@@ -386,11 +386,6 @@ public class HoodieHiveClient {
 
   /**
    * Read the schema from the log file on path
-   *
-   * @param lastCompactionCommitOpt
-   * @param path
-   * @return
-   * @throws IOException
    */
   @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
   private MessageType readSchemaFromLogFile(Optional<HoodieInstant> lastCompactionCommitOpt,
@@ -422,7 +417,8 @@ public class HoodieHiveClient {
               + ". File does not exist.");
     }
     ParquetMetadata fileFooter =
-        ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
+        ParquetFileReader
+            .readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
     return fileFooter.getFileMetaData().getSchema();
   }
 
@@ -530,7 +526,7 @@ public class HoodieHiveClient {
       if (connection != null) {
         connection.close();
       }
-      if(client != null) {
+      if (client != null) {
         client.close();
       }
     } catch (SQLException e) {
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSyncException.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSyncException.java
index 8419fdfa7..d490ba061 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSyncException.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/HoodieHiveSyncException.java
@@ -18,23 +18,23 @@ package com.uber.hoodie.hive;
 
 public class HoodieHiveSyncException extends RuntimeException {
 
-    public HoodieHiveSyncException() {
-        super();
-    }
+  public HoodieHiveSyncException() {
+    super();
+  }
 
-    public HoodieHiveSyncException(String message) {
-        super(message);
-    }
+  public HoodieHiveSyncException(String message) {
+    super(message);
+  }
 
-    public HoodieHiveSyncException(String message, Throwable t) {
-        super(message, t);
-    }
+  public HoodieHiveSyncException(String message, Throwable t) {
+    super(message, t);
+  }
 
-    public HoodieHiveSyncException(Throwable t) {
-        super(t);
-    }
+  public HoodieHiveSyncException(Throwable t) {
+    super(t);
+  }
 
-    protected static String format(String message, Object... args) {
-        return String.format(String.valueOf(message), (Object[]) args);
-    }
+  protected static String format(String message, Object... args) {
+    return String.format(String.valueOf(message), (Object[]) args);
+  }
 }
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java
index 8ef9a88fd..794c262e3 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/PartitionValueExtractor.java
@@ -21,11 +21,13 @@ package com.uber.hoodie.hive;
 import java.util.List;
 
 /**
- * HDFS Path contain hive partition values for the keys it is partitioned on.
- * This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
+ * HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
+ * straight forward and requires a pluggable implementation to extract the partition value from HDFS
+ * path.
  *
  * e.g. Hive table partitioned by datestr=yyyy-mm-dd and hdfs path /app/hoodie/dataset1/YYYY=[yyyy]/MM=[mm]/DD=[dd]
  */
 public interface PartitionValueExtractor {
+
   List<String> extractPartitionValuesInPath(String partitionPath);
 }
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaDifference.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaDifference.java
index 7435e803c..2e3b6c406 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaDifference.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SchemaDifference.java
@@ -21,88 +21,92 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
-import parquet.schema.MessageType;
-
 import java.util.List;
 import java.util.Map;
+import parquet.schema.MessageType;
 
 /**
  * Represents the schema difference between the storage schema and hive table schema
  */
 public class SchemaDifference {
+
+  private final MessageType storageSchema;
+  private final Map<String, String> tableSchema;
+  private final List<String> deleteColumns;
+  private final Map<String, String> updateColumnTypes;
+  private final Map<String, String> addColumnTypes;
+
+  private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
+      List<String> deleteColumns, Map<String, String> updateColumnTypes,
+      Map<String, String> addColumnTypes) {
+    this.storageSchema = storageSchema;
+    this.tableSchema = tableSchema;
+    this.deleteColumns = ImmutableList.copyOf(deleteColumns);
+    this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
+    this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
+  }
+
+  public List<String> getDeleteColumns() {
+    return deleteColumns;
+  }
+
+  public Map<String, String> getUpdateColumnTypes() {
+    return updateColumnTypes;
+  }
+
+  public Map<String, String> getAddColumnTypes() {
+    return addColumnTypes;
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
+        .add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
+        .toString();
+  }
+
+  public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
+    return new Builder(storageSchema, tableSchema);
+  }
+
+  public boolean isEmpty() {
+    return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
+  }
+
+  public static class Builder {
+
     private final MessageType storageSchema;
     private final Map<String, String> tableSchema;
-    private final List<String> deleteColumns;
-    private final Map<String, String> updateColumnTypes;
-    private final Map<String, String> addColumnTypes;
+    private List<String> deleteColumns;
+    private Map<String, String> updateColumnTypes;
+    private Map<String, String> addColumnTypes;
 
-    private SchemaDifference(MessageType storageSchema, Map<String, String> tableSchema,
-        List<String> deleteColumns, Map<String, String> updateColumnTypes, Map<String, String> addColumnTypes) {
-        this.storageSchema = storageSchema;
-        this.tableSchema = tableSchema;
-        this.deleteColumns = ImmutableList.copyOf(deleteColumns);
-        this.updateColumnTypes = ImmutableMap.copyOf(updateColumnTypes);
-        this.addColumnTypes = ImmutableMap.copyOf(addColumnTypes);
+    public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
+      this.storageSchema = storageSchema;
+      this.tableSchema = tableSchema;
+      deleteColumns = Lists.newArrayList();
+      updateColumnTypes = Maps.newHashMap();
+      addColumnTypes = Maps.newHashMap();
     }
 
-    public List<String> getDeleteColumns() {
-        return deleteColumns;
+    public Builder deleteTableColumn(String column) {
+      deleteColumns.add(column);
+      return this;
     }
 
-    public Map<String, String> getUpdateColumnTypes() {
-        return updateColumnTypes;
+    public Builder updateTableColumn(String column, String storageColumnType) {
+      updateColumnTypes.put(column, storageColumnType);
+      return this;
     }
 
-    public Map<String, String> getAddColumnTypes() {
-        return addColumnTypes;
+    public Builder addTableColumn(String name, String type) {
+      addColumnTypes.put(name, type);
+      return this;
     }
 
-    @Override public String toString() {
-        return Objects.toStringHelper(this).add("deleteColumns", deleteColumns)
-            .add("updateColumnTypes", updateColumnTypes).add("addColumnTypes", addColumnTypes)
-            .toString();
-    }
-
-    public static Builder newBuilder(MessageType storageSchema, Map<String, String> tableSchema) {
-        return new Builder(storageSchema, tableSchema);
-    }
-
-    public boolean isEmpty() {
-        return deleteColumns.isEmpty() && updateColumnTypes.isEmpty() && addColumnTypes.isEmpty();
-    }
-
-    public static class Builder {
-        private final MessageType storageSchema;
-        private final Map<String, String> tableSchema;
-        private List<String> deleteColumns;
-        private Map<String, String> updateColumnTypes;
-        private Map<String, String> addColumnTypes;
-
-        public Builder(MessageType storageSchema, Map<String, String> tableSchema) {
-            this.storageSchema = storageSchema;
-            this.tableSchema = tableSchema;
-            deleteColumns = Lists.newArrayList();
-            updateColumnTypes = Maps.newHashMap();
-            addColumnTypes = Maps.newHashMap();
-        }
-
-        public Builder deleteTableColumn(String column) {
-            deleteColumns.add(column);
-            return this;
-        }
-
-        public Builder updateTableColumn(String column, String storageColumnType) {
-            updateColumnTypes.put(column, storageColumnType);
-            return this;
-        }
-
-        public Builder addTableColumn(String name, String type) {
-            addColumnTypes.put(name, type);
-            return this;
-        }
-
-        public SchemaDifference build() {
-            return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes, addColumnTypes);
-        }
+    public SchemaDifference build() {
+      return new SchemaDifference(storageSchema, tableSchema, deleteColumns, updateColumnTypes,
+          addColumnTypes);
     }
+  }
 }
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java
index b3071641b..956bbb8b2 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/SlashEncodedDayPartitionValueExtractor.java
@@ -23,9 +23,11 @@ import java.util.List;
 import org.joda.time.DateTime;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
+
 /**
- * HDFS Path contain hive partition values for the keys it is partitioned on.
- * This mapping is not straight forward and requires a pluggable implementation to extract the partition value from HDFS path.
+ * HDFS Path contain hive partition values for the keys it is partitioned on. This mapping is not
+ * straight forward and requires a pluggable implementation to extract the partition value from HDFS
+ * path.
  *
  * This implementation extracts datestr=yyyy-mm-dd from path of type /yyyy/mm/dd
  */
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/ColumnNameXLator.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/ColumnNameXLator.java
index 64049c68e..a06494fe2 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/ColumnNameXLator.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/ColumnNameXLator.java
@@ -17,32 +17,32 @@
 package com.uber.hoodie.hive.util;
 
 import com.google.common.collect.Maps;
-
 import java.util.Iterator;
 import java.util.Map;
 
 public class ColumnNameXLator {
-    private static Map<String, String> xformMap = Maps.newHashMap();
 
-    public static String translateNestedColumn(String colName) {
-        Map.Entry entry;
-        for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
-             colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
-            entry = (Map.Entry) i$.next();
-        }
+  private static Map<String, String> xformMap = Maps.newHashMap();
 
-        return colName;
+  public static String translateNestedColumn(String colName) {
+    Map.Entry entry;
+    for (Iterator i$ = xformMap.entrySet().iterator(); i$.hasNext();
+        colName = colName.replaceAll((String) entry.getKey(), (String) entry.getValue())) {
+      entry = (Map.Entry) i$.next();
     }
 
-    public static String translateColumn(String colName) {
-        return colName;
-    }
+    return colName;
+  }
 
-    public static String translate(String colName, boolean nestedColumn) {
-        return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
-    }
+  public static String translateColumn(String colName) {
+    return colName;
+  }
 
-    static {
-        xformMap.put("\\$", "_dollar_");
-    }
+  public static String translate(String colName, boolean nestedColumn) {
+    return !nestedColumn ? translateColumn(colName) : translateNestedColumn(colName);
+  }
+
+  static {
+    xformMap.put("\\$", "_dollar_");
+  }
 }
diff --git a/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/SchemaUtil.java b/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/SchemaUtil.java
index 2a05ed1cf..9f16c777f 100644
--- a/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/SchemaUtil.java
+++ b/hoodie-hive/src/main/java/com/uber/hoodie/hive/util/SchemaUtil.java
@@ -21,6 +21,10 @@ import com.google.common.collect.Sets;
 import com.uber.hoodie.hive.HiveSyncConfig;
 import com.uber.hoodie.hive.HoodieHiveSyncException;
 import com.uber.hoodie.hive.SchemaDifference;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import parquet.schema.DecimalMetadata;
@@ -30,404 +34,386 @@ import parquet.schema.OriginalType;
 import parquet.schema.PrimitiveType;
 import parquet.schema.Type;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
 /**
  * Schema Utilities
  */
 public class SchemaUtil {
-    private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
 
-    /**
-     * Get the schema difference between the storage schema and hive table schema
-     *
-     * @param storageSchema
-     * @param tableSchema
-     * @param partitionKeys
-     * @return
-     */
-    public static SchemaDifference getSchemaDifference(MessageType storageSchema,
-        Map<String, String> tableSchema, List<String> partitionKeys) {
-        Map<String, String> newTableSchema;
-        try {
-            newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
-        } catch (IOException e) {
-            throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
-                e);
+  private static Logger LOG = LoggerFactory.getLogger(SchemaUtil.class);
+
+  /**
+   * Get the schema difference between the storage schema and hive table schema
+   */
+  public static SchemaDifference getSchemaDifference(MessageType storageSchema,
+      Map<String, String> tableSchema, List<String> partitionKeys) {
+    Map<String, String> newTableSchema;
+    try {
+      newTableSchema = convertParquetSchemaToHiveSchema(storageSchema);
+    } catch (IOException e) {
+      throw new HoodieHiveSyncException("Failed to convert parquet schema to hive schema",
+          e);
+    }
+    LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
+    SchemaDifference.Builder schemaDiffBuilder =
+        SchemaDifference.newBuilder(storageSchema, tableSchema);
+    Set<String> tableColumns = Sets.newHashSet();
+
+    for (Map.Entry<String, String> field : tableSchema.entrySet()) {
+      String fieldName = field.getKey().toLowerCase();
+      String tickSurroundedFieldName = tickSurround(fieldName);
+      if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys
+          .contains(fieldName)) {
+        schemaDiffBuilder.deleteTableColumn(fieldName);
+      } else {
+        // check type
+        String tableColumnType = field.getValue();
+        if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
+          if (partitionKeys.contains(fieldName)) {
+            // Partition key does not have to be part of the storage schema
+            continue;
+          }
+          // We will log this and continue. Hive schema is a superset of all parquet schemas
+          LOG.warn("Ignoring table column " + fieldName
+              + " as its not present in the parquet schema");
+          continue;
         }
-        LOG.info("Getting schema difference for " + tableSchema + "\r\n\r\n" + newTableSchema);
-        SchemaDifference.Builder schemaDiffBuilder =
-            SchemaDifference.newBuilder(storageSchema, tableSchema);
-        Set<String> tableColumns = Sets.newHashSet();
+        tableColumnType = tableColumnType.replaceAll("\\s+", "");
 
-        for (Map.Entry<String, String> field : tableSchema.entrySet()) {
-            String fieldName = field.getKey().toLowerCase();
-            String tickSurroundedFieldName = tickSurround(fieldName);
-            if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName) && !partitionKeys.contains(fieldName)) {
-                schemaDiffBuilder.deleteTableColumn(fieldName);
-            } else {
-                // check type
-                String tableColumnType = field.getValue();
-                if (!isFieldExistsInSchema(newTableSchema, tickSurroundedFieldName)) {
-                    if (partitionKeys.contains(fieldName)) {
-                        // Partition key does not have to be part of the storage schema
-                        continue;
-                    }
-                    // We will log this and continue. Hive schema is a superset of all parquet schemas
-                    LOG.warn("Ignoring table column " + fieldName
-                        + " as its not present in the parquet schema");
-                    continue;
-                }
-                tableColumnType = tableColumnType.replaceAll("\\s+", "");
+        String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
+        expectedType = expectedType.replaceAll("\\s+", "");
+        expectedType = expectedType.replaceAll("`", "");
 
-                String expectedType = getExpectedType(newTableSchema, tickSurroundedFieldName);
-                expectedType = expectedType.replaceAll("\\s+", "");
-                expectedType = expectedType.replaceAll("`", "");
-
-                if (!tableColumnType.equalsIgnoreCase(expectedType)) {
-                    // check for incremental datasets, the schema type change is allowed as per evolution rules
-                    if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
-                        throw new HoodieHiveSyncException(
-                            "Could not convert field Type from " + tableColumnType + " to "
-                                + expectedType + " for field " + fieldName);
-                    }
-                    schemaDiffBuilder.updateTableColumn(fieldName,
-                        getExpectedType(newTableSchema, tickSurroundedFieldName));
-                }
-            }
-            tableColumns.add(tickSurroundedFieldName);
+        if (!tableColumnType.equalsIgnoreCase(expectedType)) {
+          // check for incremental datasets, the schema type change is allowed as per evolution rules
+          if (!isSchemaTypeUpdateAllowed(tableColumnType, expectedType)) {
+            throw new HoodieHiveSyncException(
+                "Could not convert field Type from " + tableColumnType + " to "
+                    + expectedType + " for field " + fieldName);
+          }
+          schemaDiffBuilder.updateTableColumn(fieldName,
+              getExpectedType(newTableSchema, tickSurroundedFieldName));
         }
-
-        for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
-            if (!tableColumns.contains(entry.getKey().toLowerCase())) {
-                schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
-            }
-        }
-        LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
-
-        return schemaDiffBuilder.build();
+      }
+      tableColumns.add(tickSurroundedFieldName);
     }
 
-    private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
-        for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
-            if (entry.getKey().toLowerCase().equals(fieldName)) {
-                return entry.getValue();
-            }
-        }
-        return null;
+    for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
+      if (!tableColumns.contains(entry.getKey().toLowerCase())) {
+        schemaDiffBuilder.addTableColumn(entry.getKey(), entry.getValue());
+      }
     }
+    LOG.info("Difference between schemas: " + schemaDiffBuilder.build().toString());
 
-    private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
-        String fieldName) {
-        for (String entry : newTableSchema.keySet()) {
-            if (entry.toLowerCase().equals(fieldName)) {
-                return true;
-            }
-        }
-        return false;
+    return schemaDiffBuilder.build();
+  }
+
+  private static String getExpectedType(Map<String, String> newTableSchema, String fieldName) {
+    for (Map.Entry<String, String> entry : newTableSchema.entrySet()) {
+      if (entry.getKey().toLowerCase().equals(fieldName)) {
+        return entry.getValue();
+      }
     }
+    return null;
+  }
+
+  private static boolean isFieldExistsInSchema(Map<String, String> newTableSchema,
+      String fieldName) {
+    for (String entry : newTableSchema.keySet()) {
+      if (entry.toLowerCase().equals(fieldName)) {
+        return true;
+      }
+    }
+    return false;
+  }
 
 
-    /**
-     * Returns equivalent Hive table schema read from a parquet file
-     *
-     * @param messageType : Parquet Schema
-     * @return : Hive Table schema read from parquet file MAP[String,String]
-     * @throws IOException
-     */
-    public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
-        throws IOException {
-        Map<String, String> schema = Maps.newLinkedHashMap();
-        List<Type> parquetFields = messageType.getFields();
-        for (Type parquetType : parquetFields) {
-            StringBuilder result = new StringBuilder();
-            String key = parquetType.getName();
-            if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
-                result.append(createHiveArray(parquetType, ""));
-            } else {
-                result.append(convertField(parquetType));
+  /**
+   * Returns equivalent Hive table schema read from a parquet file
+   *
+   * @param messageType : Parquet Schema
+   * @return : Hive Table schema read from parquet file MAP[String,String]
+   */
+  public static Map<String, String> convertParquetSchemaToHiveSchema(MessageType messageType)
+      throws IOException {
+    Map<String, String> schema = Maps.newLinkedHashMap();
+    List<Type> parquetFields = messageType.getFields();
+    for (Type parquetType : parquetFields) {
+      StringBuilder result = new StringBuilder();
+      String key = parquetType.getName();
+      if (parquetType.isRepetition(Type.Repetition.REPEATED)) {
+        result.append(createHiveArray(parquetType, ""));
+      } else {
+        result.append(convertField(parquetType));
+      }
+
+      schema.put(hiveCompatibleFieldName(key, false), result.toString());
+    }
+    return schema;
+  }
+
+  /**
+   * Convert one field data type of parquet schema into an equivalent Hive schema
+   *
+   * @param parquetType : Single paruet field
+   * @return : Equivalent sHive schema
+   */
+  private static String convertField(final Type parquetType) {
+    StringBuilder field = new StringBuilder();
+    if (parquetType.isPrimitive()) {
+      final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
+          parquetType.asPrimitiveType().getPrimitiveTypeName();
+      final OriginalType originalType = parquetType.getOriginalType();
+      if (originalType == OriginalType.DECIMAL) {
+        final DecimalMetadata decimalMetadata =
+            parquetType.asPrimitiveType().getDecimalMetadata();
+        return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
+            append(" , ").append(decimalMetadata.getScale()).append(")").toString();
+      }
+      // TODO - fix the method naming here
+      return parquetPrimitiveTypeName
+          .convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
+            @Override
+            public String convertBOOLEAN(
+                PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "boolean";
             }
 
-            schema.put(hiveCompatibleFieldName(key, false), result.toString());
-        }
-        return schema;
-    }
-
-    /**
-     * Convert one field data type of parquet schema into an equivalent Hive
-     * schema
-     *
-     * @param parquetType : Single paruet field
-     * @return : Equivalent sHive schema
-     */
-    private static String convertField(final Type parquetType) {
-        StringBuilder field = new StringBuilder();
-        if (parquetType.isPrimitive()) {
-            final PrimitiveType.PrimitiveTypeName parquetPrimitiveTypeName =
-                parquetType.asPrimitiveType().getPrimitiveTypeName();
-            final OriginalType originalType = parquetType.getOriginalType();
-            if (originalType == OriginalType.DECIMAL) {
-                final DecimalMetadata decimalMetadata =
-                    parquetType.asPrimitiveType().getDecimalMetadata();
-                return field.append("DECIMAL(").append(decimalMetadata.getPrecision()).
-                    append(" , ").append(decimalMetadata.getScale()).append(")").toString();
+            @Override
+            public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "int";
             }
-            // TODO - fix the method naming here
-            return parquetPrimitiveTypeName
-                .convert(new PrimitiveType.PrimitiveTypeNameConverter<String, RuntimeException>() {
-                    @Override
-                    public String convertBOOLEAN(
-                        PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "boolean";
-                    }
 
-                    @Override
-                    public String convertINT32(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "int";
-                    }
-
-                    @Override
-                    public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "bigint";
-                    }
-
-                    @Override
-                    public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "timestamp-millis";
-                    }
-
-                    @Override
-                    public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "float";
-                    }
-
-                    @Override
-                    public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "double";
-                    }
-
-                    @Override
-                    public String convertFIXED_LEN_BYTE_ARRAY(
-                        PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        return "binary";
-                    }
-
-                    @Override
-                    public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
-                        if (originalType == OriginalType.UTF8
-                            || originalType == OriginalType.ENUM) {
-                            return "string";
-                        } else {
-                            return "binary";
-                        }
-                    }
-                });
-        } else {
-            GroupType parquetGroupType = parquetType.asGroupType();
-            OriginalType originalType = parquetGroupType.getOriginalType();
-            if (originalType != null) {
-                switch (originalType) {
-                    case LIST:
-                        if (parquetGroupType.getFieldCount() != 1) {
-                            throw new UnsupportedOperationException(
-                                "Invalid list type " + parquetGroupType);
-                        }
-                        Type elementType = parquetGroupType.getType(0);
-                        if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
-                            throw new UnsupportedOperationException(
-                                "Invalid list type " + parquetGroupType);
-                        }
-                        return createHiveArray(elementType, parquetGroupType.getName());
-                    case MAP:
-                        if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
-                            .isPrimitive()) {
-                            throw new UnsupportedOperationException(
-                                "Invalid map type " + parquetGroupType);
-                        }
-                        GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
-                        if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
-                            !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
-                            mapKeyValType.getFieldCount() != 2) {
-                            throw new UnsupportedOperationException(
-                                "Invalid map type " + parquetGroupType);
-                        }
-                        Type keyType = mapKeyValType.getType(0);
-                        if (!keyType.isPrimitive() ||
-                            !keyType.asPrimitiveType().getPrimitiveTypeName()
-                                .equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
-                            !keyType.getOriginalType().equals(OriginalType.UTF8)) {
-                            throw new UnsupportedOperationException(
-                                "Map key type must be binary (UTF8): " + keyType);
-                        }
-                        Type valueType = mapKeyValType.getType(1);
-                        return createHiveMap(convertField(keyType), convertField(valueType));
-                    case ENUM:
-                    case UTF8:
-                        return "string";
-                    case MAP_KEY_VALUE:
-                        // MAP_KEY_VALUE was supposed to be used to annotate key and
-                        // value group levels in a
-                        // MAP. However, that is always implied by the structure of
-                        // MAP. Hence, PARQUET-113
-                        // dropped the requirement for having MAP_KEY_VALUE.
-                    default:
-                        throw new UnsupportedOperationException(
-                            "Cannot convert Parquet type " + parquetType);
-                }
-            } else {
-                // if no original type then it's a record
-                return createHiveStruct(parquetGroupType.getFields());
+            @Override
+            public String convertINT64(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "bigint";
             }
-        }
-    }
 
-    /**
-     * Return a 'struct' Hive schema from a list of Parquet fields
-     *
-     * @param parquetFields : list of parquet fields
-     * @return : Equivalent 'struct' Hive schema
-     */
-    private static String createHiveStruct(List<Type> parquetFields) {
-        StringBuilder struct = new StringBuilder();
-        struct.append("STRUCT< ");
-        for (Type field : parquetFields) {
-            //TODO: struct field name is only translated to support special char($)
-            //We will need to extend it to other collection type
-            struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
-            struct.append(convertField(field)).append(", ");
-        }
-        struct.delete(struct.length() - 2, struct.length()); // Remove the last
-        // ", "
-        struct.append(">");
-        String finalStr = struct.toString();
-        // Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
-        // HDrone sync should not fail because of this.
-        finalStr = finalStr.replaceAll("-", "_");
-        return finalStr;
-    }
-
-
-    private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
-        String result = fieldName;
-        if (isNested) {
-            result = ColumnNameXLator.translateNestedColumn(fieldName);
-        }
-        return tickSurround(result);
-    }
-
-    private static String tickSurround(String result) {
-        if (!result.startsWith("`")) {
-            result = "`" + result;
-        }
-        if (!result.endsWith("`")) {
-            result = result + "`";
-        }
-        return result;
-    }
-
-    /**
-     * Create a 'Map' schema from Parquet map field
-     *
-     * @param keyType
-     * @param valueType
-     * @return
-     */
-    private static String createHiveMap(String keyType, String valueType) {
-        return "MAP< " + keyType + ", " + valueType + ">";
-    }
-
-    /**
-     * Create an Array Hive schema from equivalent parquet list type
-     *
-     * @param elementType
-     * @param elementName
-     * @return
-     */
-    private static String createHiveArray(Type elementType, String elementName) {
-        StringBuilder array = new StringBuilder();
-        array.append("ARRAY< ");
-        if (elementType.isPrimitive()) {
-            array.append(convertField(elementType));
-        } else {
-            final GroupType groupType = elementType.asGroupType();
-            final List<Type> groupFields = groupType.getFields();
-            if (groupFields.size() > 1 || (groupFields.size() == 1 && (
-                elementType.getName().equals("array") || elementType.getName()
-                    .equals(elementName + "_tuple")))) {
-                array.append(convertField(elementType));
-            } else {
-                array.append(convertField(groupType.getFields().get(0)));
+            @Override
+            public String convertINT96(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "timestamp-millis";
             }
+
+            @Override
+            public String convertFLOAT(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "float";
+            }
+
+            @Override
+            public String convertDOUBLE(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "double";
+            }
+
+            @Override
+            public String convertFIXED_LEN_BYTE_ARRAY(
+                PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              return "binary";
+            }
+
+            @Override
+            public String convertBINARY(PrimitiveType.PrimitiveTypeName primitiveTypeName) {
+              if (originalType == OriginalType.UTF8
+                  || originalType == OriginalType.ENUM) {
+                return "string";
+              } else {
+                return "binary";
+              }
+            }
+          });
+    } else {
+      GroupType parquetGroupType = parquetType.asGroupType();
+      OriginalType originalType = parquetGroupType.getOriginalType();
+      if (originalType != null) {
+        switch (originalType) {
+          case LIST:
+            if (parquetGroupType.getFieldCount() != 1) {
+              throw new UnsupportedOperationException(
+                  "Invalid list type " + parquetGroupType);
+            }
+            Type elementType = parquetGroupType.getType(0);
+            if (!elementType.isRepetition(Type.Repetition.REPEATED)) {
+              throw new UnsupportedOperationException(
+                  "Invalid list type " + parquetGroupType);
+            }
+            return createHiveArray(elementType, parquetGroupType.getName());
+          case MAP:
+            if (parquetGroupType.getFieldCount() != 1 || parquetGroupType.getType(0)
+                .isPrimitive()) {
+              throw new UnsupportedOperationException(
+                  "Invalid map type " + parquetGroupType);
+            }
+            GroupType mapKeyValType = parquetGroupType.getType(0).asGroupType();
+            if (!mapKeyValType.isRepetition(Type.Repetition.REPEATED) ||
+                !mapKeyValType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE) ||
+                mapKeyValType.getFieldCount() != 2) {
+              throw new UnsupportedOperationException(
+                  "Invalid map type " + parquetGroupType);
+            }
+            Type keyType = mapKeyValType.getType(0);
+            if (!keyType.isPrimitive() ||
+                !keyType.asPrimitiveType().getPrimitiveTypeName()
+                    .equals(PrimitiveType.PrimitiveTypeName.BINARY) ||
+                !keyType.getOriginalType().equals(OriginalType.UTF8)) {
+              throw new UnsupportedOperationException(
+                  "Map key type must be binary (UTF8): " + keyType);
+            }
+            Type valueType = mapKeyValType.getType(1);
+            return createHiveMap(convertField(keyType), convertField(valueType));
+          case ENUM:
+          case UTF8:
+            return "string";
+          case MAP_KEY_VALUE:
+            // MAP_KEY_VALUE was supposed to be used to annotate key and
+            // value group levels in a
+            // MAP. However, that is always implied by the structure of
+            // MAP. Hence, PARQUET-113
+            // dropped the requirement for having MAP_KEY_VALUE.
+          default:
+            throw new UnsupportedOperationException(
+                "Cannot convert Parquet type " + parquetType);
         }
-        array.append(">");
-        return array.toString();
+      } else {
+        // if no original type then it's a record
+        return createHiveStruct(parquetGroupType.getFields());
+      }
+    }
+  }
+
+  /**
+   * Return a 'struct' Hive schema from a list of Parquet fields
+   *
+   * @param parquetFields : list of parquet fields
+   * @return : Equivalent 'struct' Hive schema
+   */
+  private static String createHiveStruct(List<Type> parquetFields) {
+    StringBuilder struct = new StringBuilder();
+    struct.append("STRUCT< ");
+    for (Type field : parquetFields) {
+      //TODO: struct field name is only translated to support special char($)
+      //We will need to extend it to other collection type
+      struct.append(hiveCompatibleFieldName(field.getName(), true)).append(" : ");
+      struct.append(convertField(field)).append(", ");
+    }
+    struct.delete(struct.length() - 2, struct.length()); // Remove the last
+    // ", "
+    struct.append(">");
+    String finalStr = struct.toString();
+    // Struct cannot have - in them. userstore_udr_entities has uuid in struct. This breaks the schema.
+    // HDrone sync should not fail because of this.
+    finalStr = finalStr.replaceAll("-", "_");
+    return finalStr;
+  }
+
+
+  private static String hiveCompatibleFieldName(String fieldName, boolean isNested) {
+    String result = fieldName;
+    if (isNested) {
+      result = ColumnNameXLator.translateNestedColumn(fieldName);
+    }
+    return tickSurround(result);
+  }
+
+  private static String tickSurround(String result) {
+    if (!result.startsWith("`")) {
+      result = "`" + result;
+    }
+    if (!result.endsWith("`")) {
+      result = result + "`";
+    }
+    return result;
+  }
+
+  /**
+   * Create a 'Map' schema from Parquet map field
+   */
+  private static String createHiveMap(String keyType, String valueType) {
+    return "MAP< " + keyType + ", " + valueType + ">";
+  }
+
+  /**
+   * Create an Array Hive schema from equivalent parquet list type
+   */
+  private static String createHiveArray(Type elementType, String elementName) {
+    StringBuilder array = new StringBuilder();
+    array.append("ARRAY< ");
+    if (elementType.isPrimitive()) {
+      array.append(convertField(elementType));
+    } else {
+      final GroupType groupType = elementType.asGroupType();
+      final List<Type> groupFields = groupType.getFields();
+      if (groupFields.size() > 1 || (groupFields.size() == 1 && (
+          elementType.getName().equals("array") || elementType.getName()
+              .equals(elementName + "_tuple")))) {
+        array.append(convertField(elementType));
+      } else {
+        array.append(convertField(groupType.getFields().get(0)));
+      }
+    }
+    array.append(">");
+    return array.toString();
+  }
+
+  public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
+    if (prevType == null || prevType.trim().isEmpty() ||
+        newType == null || newType.trim().isEmpty()) {
+      return false;
+    }
+    prevType = prevType.toLowerCase();
+    newType = newType.toLowerCase();
+    if (prevType.equals(newType)) {
+      return true;
+    } else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
+      return true;
+    } else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
+      return true;
+    } else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
+      return true;
+    }
+    return false;
+  }
+
+  public static String generateSchemaString(MessageType storageSchema) throws IOException {
+    Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
+    StringBuilder columns = new StringBuilder();
+    for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
+      columns.append(hiveSchemaEntry.getKey()).append(" ");
+      columns.append(hiveSchemaEntry.getValue()).append(", ");
+    }
+    // Remove the last ", "
+    columns.delete(columns.length() - 2, columns.length());
+    return columns.toString();
+  }
+
+  public static String generateCreateDDL(MessageType storageSchema,
+      HiveSyncConfig config, String inputFormatClass,
+      String outputFormatClass, String serdeClass) throws IOException {
+    Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
+    String columns = generateSchemaString(storageSchema);
+
+    StringBuilder partitionFields = new StringBuilder();
+    for (String partitionKey : config.partitionFields) {
+      partitionFields.append(partitionKey).append(" ")
+          .append(getPartitionKeyType(hiveSchema, partitionKey));
     }
 
-    public static boolean isSchemaTypeUpdateAllowed(String prevType, String newType) {
-        if (prevType == null || prevType.trim().isEmpty() ||
-            newType == null || newType.trim().isEmpty()) {
-            return false;
-        }
-        prevType = prevType.toLowerCase();
-        newType = newType.toLowerCase();
-        if (prevType.equals(newType)) {
-            return true;
-        } else if (prevType.equalsIgnoreCase("int") && newType.equalsIgnoreCase("bigint")) {
-            return true;
-        } else if (prevType.equalsIgnoreCase("float") && newType.equalsIgnoreCase("double")) {
-            return true;
-        } else if (prevType.contains("struct") && newType.toLowerCase().contains("struct")) {
-            return true;
-        }
-        return false;
+    StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE  IF NOT EXISTS ");
+    sb = sb.append(config.databaseName).append(".").append(config.tableName);
+    sb = sb.append("( ").append(columns).append(")");
+    if (!config.partitionFields.isEmpty()) {
+      sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
     }
+    sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
+    sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
+    sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
+        .append(config.basePath).append("'");
+    return sb.toString();
+  }
 
-    public static String generateSchemaString(MessageType storageSchema) throws IOException {
-        Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
-        StringBuilder columns = new StringBuilder();
-        for (Map.Entry<String, String> hiveSchemaEntry : hiveSchema.entrySet()) {
-            columns.append(hiveSchemaEntry.getKey()).append(" ");
-            columns.append(hiveSchemaEntry.getValue()).append(", ");
-        }
-        // Remove the last ", "
-        columns.delete(columns.length() - 2, columns.length());
-        return columns.toString();
-    }
-
-    public static String generateCreateDDL(MessageType storageSchema,
-        HiveSyncConfig config, String inputFormatClass,
-        String outputFormatClass, String serdeClass) throws IOException {
-        Map<String, String> hiveSchema = convertParquetSchemaToHiveSchema(storageSchema);
-        String columns = generateSchemaString(storageSchema);
-
-        StringBuilder partitionFields = new StringBuilder();
-        for (String partitionKey : config.partitionFields) {
-            partitionFields.append(partitionKey).append(" ")
-                .append(getPartitionKeyType(hiveSchema, partitionKey));
-        }
-
-        StringBuilder sb = new StringBuilder("CREATE EXTERNAL TABLE  IF NOT EXISTS ");
-        sb = sb.append(config.databaseName).append(".").append(config.tableName);
-        sb = sb.append("( ").append(columns).append(")");
-        if (!config.partitionFields.isEmpty()) {
-            sb = sb.append(" PARTITIONED BY (").append(partitionFields).append(")");
-        }
-        sb = sb.append(" ROW FORMAT SERDE '").append(serdeClass).append("'");
-        sb = sb.append(" STORED AS INPUTFORMAT '").append(inputFormatClass).append("'");
-        sb = sb.append(" OUTPUTFORMAT '").append(outputFormatClass).append("' LOCATION '")
-            .append(config.basePath).append("'");
-        return sb.toString();
-    }
-
-    private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
-        if (hiveSchema.containsKey(partitionKey)) {
-            return hiveSchema.get(partitionKey);
-        }
-        // Default the unknown partition fields to be String
-        // TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
-        return "String";
+  private static String getPartitionKeyType(Map<String, String> hiveSchema, String partitionKey) {
+    if (hiveSchema.containsKey(partitionKey)) {
+      return hiveSchema.get(partitionKey);
     }
+    // Default the unknown partition fields to be String
+    // TODO - all partition fields should be part of the schema. datestr is treated as special. Dont do that
+    return "String";
+  }
 }
diff --git a/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java b/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java
index 398d6e0a8..5250a660d 100644
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/HiveSyncToolTest.java
@@ -18,7 +18,9 @@
 
 package com.uber.hoodie.hive;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.hive.HoodieHiveClient.PartitionEvent;
@@ -52,9 +54,8 @@ public class HiveSyncToolTest {
   }
 
   /**
-   * Testing converting array types to Hive field declaration strings,
-   * according to the Parquet-113 spec:
-   * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
+   * Testing converting array types to Hive field declaration strings, according to the Parquet-113
+   * spec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
    */
   @Test
   public void testSchemaConvertArray() throws IOException {
@@ -274,7 +275,8 @@ public class HiveSyncToolTest {
     assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + " should exist after sync completes",
         hiveClient.doesTableExist());
     assertEquals("Hive Schema should match the dataset schema + partition field",
-        hiveClient.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
+        hiveClient.getTableSchema().size(),
+        SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
     assertEquals("Table partitions should match the number of partitions we wrote", 5,
         hiveClient.scanTablePartitions().size());
     assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
@@ -296,7 +298,8 @@ public class HiveSyncToolTest {
         TestUtil.getHiveConf(), TestUtil.fileSystem);
 
     assertEquals("Hive Schema should match the evolved dataset schema + partition field",
-        hiveClient.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
+        hiveClient.getTableSchema().size(),
+        SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
     // Sync should add the one partition
     assertEquals("The 2 partitions we wrote should be added to hive", 6,
         hiveClient.scanTablePartitions().size());
@@ -307,33 +310,37 @@ public class HiveSyncToolTest {
 
   @Test
   public void testSyncMergeOnReadRT()
-          throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
+      throws IOException, InitializationError, URISyntaxException, TException, InterruptedException {
     String commitTime = "100";
     String deltaCommitTime = "101";
     String roTablename = TestUtil.hiveSyncConfig.tableName;
-    TestUtil.hiveSyncConfig.tableName = TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
+    TestUtil.hiveSyncConfig.tableName =
+        TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE;
     TestUtil.createMORDataset(commitTime, deltaCommitTime, 5);
     HoodieHiveClient hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
-            TestUtil.getHiveConf(), TestUtil.fileSystem);
+        TestUtil.getHiveConf(), TestUtil.fileSystem);
 
-    assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should not exist initially",
-            hiveClientRT.doesTableExist());
+    assertFalse("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
+            + " should not exist initially",
+        hiveClientRT.doesTableExist());
 
     // Lets do the sync
     HiveSyncTool tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
-            TestUtil.fileSystem);
+        TestUtil.fileSystem);
     tool.syncHoodieTable();
 
-    assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE + " should exist after sync completes",
-            hiveClientRT.doesTableExist());
+    assertTrue("Table " + TestUtil.hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE
+            + " should exist after sync completes",
+        hiveClientRT.doesTableExist());
 
     assertEquals("Hive Schema should match the dataset schema + partition field",
-            hiveClientRT.getTableSchema().size(), SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
+        hiveClientRT.getTableSchema().size(),
+        SchemaTestUtil.getSimpleSchema().getFields().size() + 1);
     assertEquals("Table partitions should match the number of partitions we wrote", 5,
-            hiveClientRT.scanTablePartitions().size());
+        hiveClientRT.scanTablePartitions().size());
     assertEquals("The last commit that was sycned should be updated in the TBLPROPERTIES",
-            deltaCommitTime,
-            hiveClientRT.getLastCommitTimeSynced().get());
+        deltaCommitTime,
+        hiveClientRT.getLastCommitTimeSynced().get());
 
     // Now lets create more parititions and these are the only ones which needs to be synced
     DateTime dateTime = DateTime.now().plusDays(6);
@@ -344,20 +351,21 @@ public class HiveSyncToolTest {
     TestUtil.addMORPartitions(1, true, false, dateTime, commitTime2, deltaCommitTime2);
     // Lets do the sync
     tool = new HiveSyncTool(TestUtil.hiveSyncConfig, TestUtil.getHiveConf(),
-            TestUtil.fileSystem);
+        TestUtil.fileSystem);
     tool.syncHoodieTable();
     hiveClientRT = new HoodieHiveClient(TestUtil.hiveSyncConfig,
-            TestUtil.getHiveConf(), TestUtil.fileSystem);
+        TestUtil.getHiveConf(), TestUtil.fileSystem);
 
     assertEquals("Hive Schema should match the evolved dataset schema + partition field",
-            hiveClientRT.getTableSchema().size(), SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
+        hiveClientRT.getTableSchema().size(),
+        SchemaTestUtil.getEvolvedSchema().getFields().size() + 1);
     // Sync should add the one partition
     assertEquals("The 2 partitions we wrote should be added to hive", 6,
-            hiveClientRT.scanTablePartitions().size());
+        hiveClientRT.scanTablePartitions().size());
     assertEquals("The last commit that was sycned should be 103",
-            deltaCommitTime2,
-            hiveClientRT.getLastCommitTimeSynced().get());
+        deltaCommitTime2,
+        hiveClientRT.getLastCommitTimeSynced().get());
     TestUtil.hiveSyncConfig.tableName = roTablename;
   }
 
-}
\ No newline at end of file
+}
diff --git a/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java b/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java
index bb7d6e3cc..2707377af 100644
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/TestUtil.java
@@ -16,6 +16,9 @@
 
 package com.uber.hoodie.hive;
 
+import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
+import static org.junit.Assert.fail;
+
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
@@ -41,6 +44,15 @@ import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.hive.util.HiveTestService;
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.UUID;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.commons.io.FileUtils;
@@ -60,19 +72,6 @@ import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.junit.runners.model.InitializationError;
 
-import java.io.File;
-import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.charset.StandardCharsets;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-import java.util.UUID;
-
-import static com.uber.hoodie.common.model.HoodieTestUtils.DEFAULT_TASK_PARTITIONID;
-import static org.junit.Assert.fail;
-
 @SuppressWarnings("SameParameterValue")
 public class TestUtil {
 
@@ -161,7 +160,8 @@ public class TestUtil {
     boolean result = fileSystem.mkdirs(path);
     checkResult(result);
     DateTime dateTime = DateTime.now();
-    HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
+    HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
+        commitTime);
     createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
     createCommitFile(commitMetadata, commitTime);
   }
@@ -177,16 +177,19 @@ public class TestUtil {
     boolean result = fileSystem.mkdirs(path);
     checkResult(result);
     DateTime dateTime = DateTime.now();
-    HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime, commitTime);
+    HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions, true, dateTime,
+        commitTime);
     createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
-    createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
+    createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
+        + HiveSyncTool.SUFFIX_REALTIME_TABLE);
     HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
     commitMetadata.getPartitionToWriteStats()
         .forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
             .forEach(l -> compactionMetadata.addWriteStat(key, l)));
     createCompactionCommitFile(compactionMetadata, commitTime);
     // Write a delta commit
-    HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), true);
+    HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
+        true);
     createDeltaCommitFile(deltaMetadata, deltaCommitTime);
   }
 
@@ -206,18 +209,20 @@ public class TestUtil {
     HoodieCommitMetadata commitMetadata = createPartitions(numberOfPartitions,
         isParquetSchemaSimple, startFrom, commitTime);
     createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName);
-    createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName + HiveSyncTool.SUFFIX_REALTIME_TABLE);
+    createdTablesSet.add(hiveSyncConfig.databaseName + "." + hiveSyncConfig.tableName
+        + HiveSyncTool.SUFFIX_REALTIME_TABLE);
     HoodieCompactionMetadata compactionMetadata = new HoodieCompactionMetadata();
     commitMetadata.getPartitionToWriteStats()
         .forEach((key, value) -> value.stream().map(k -> new CompactionWriteStat(k, key, 0, 0, 0))
             .forEach(l -> compactionMetadata.addWriteStat(key, l)));
     createCompactionCommitFile(compactionMetadata, commitTime);
-    HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(), isLogSchemaSimple);
+    HoodieCommitMetadata deltaMetadata = createLogFiles(commitMetadata.getPartitionToWriteStats(),
+        isLogSchemaSimple);
     createDeltaCommitFile(deltaMetadata, deltaCommitTime);
   }
 
   private static HoodieCommitMetadata createLogFiles(
-          Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
+      Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple)
       throws InterruptedException, IOException, URISyntaxException {
     HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
     for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) {
@@ -246,7 +251,8 @@ public class TestUtil {
       Path partPath = new Path(hiveSyncConfig.basePath + "/" + partitionPath);
       fileSystem.makeQualified(partPath);
       fileSystem.mkdirs(partPath);
-      List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple, commitTime);
+      List<HoodieWriteStat> writeStats = createTestData(partPath, isParquetSchemaSimple,
+          commitTime);
       startFrom = startFrom.minusDays(1);
       writeStats.forEach(s -> commitMetadata.addWriteStat(partitionPath, s));
     }
diff --git a/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/HiveTestService.java b/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/HiveTestService.java
index e9faa4536..26ed1b0f9 100644
--- a/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/HiveTestService.java
+++ b/hoodie-hive/src/test/java/com/uber/hoodie/hive/util/HiveTestService.java
@@ -20,6 +20,13 @@ package com.uber.hoodie.hive.util;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import com.google.common.io.Files;
+import java.io.File;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.SocketException;
+import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
@@ -46,277 +53,274 @@ import org.apache.thrift.transport.TTransportFactory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.net.SocketException;
-import java.util.Map;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
 public class HiveTestService {
 
-    private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
+  private static final Logger LOG = LoggerFactory.getLogger(HiveTestService.class);
 
-    private static final int CONNECTION_TIMEOUT = 30000;
+  private static final int CONNECTION_TIMEOUT = 30000;
 
-    /**
-     * Configuration settings
-     */
-    private Configuration hadoopConf;
-    private String workDir;
-    private String bindIP = "127.0.0.1";
-    private int metastorePort = 9083;
-    private int serverPort = 9999;
-    private boolean clean = true;
+  /**
+   * Configuration settings
+   */
+  private Configuration hadoopConf;
+  private String workDir;
+  private String bindIP = "127.0.0.1";
+  private int metastorePort = 9083;
+  private int serverPort = 9999;
+  private boolean clean = true;
 
-    private Map<String, String> sysProps = Maps.newHashMap();
-    private ExecutorService executorService;
-    private TServer tServer;
-    private HiveServer2 hiveServer;
+  private Map<String, String> sysProps = Maps.newHashMap();
+  private ExecutorService executorService;
+  private TServer tServer;
+  private HiveServer2 hiveServer;
 
-    public HiveTestService(Configuration configuration) {
-        this.workDir = Files.createTempDir().getAbsolutePath();
+  public HiveTestService(Configuration configuration) {
+    this.workDir = Files.createTempDir().getAbsolutePath();
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
+
+  public HiveServer2 start() throws IOException {
+    Preconditions
+        .checkState(workDir != null, "The work dir must be set before starting cluster.");
+
+    if (hadoopConf == null) {
+      hadoopConf = new Configuration();
     }
 
-    public Configuration getHadoopConf() {
-        return hadoopConf;
+    String localHiveLocation = getHiveLocation(workDir);
+    if (clean) {
+      LOG.info(
+          "Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
+      File file = new File(localHiveLocation);
+      FileUtils.deleteDirectory(file);
     }
 
-    public HiveServer2 start() throws IOException {
-        Preconditions
-            .checkState(workDir != null, "The work dir must be set before starting cluster.");
+    HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
 
-        if (hadoopConf == null) {
-            hadoopConf = new Configuration();
-        }
+    executorService = Executors.newSingleThreadExecutor();
+    tServer = startMetaStore(bindIP, metastorePort, serverConf);
 
-        String localHiveLocation = getHiveLocation(workDir);
-        if (clean) {
-            LOG.info(
-                "Cleaning Hive cluster data at: " + localHiveLocation + " and starting fresh.");
-            File file = new File(localHiveLocation);
-            FileUtils.deleteDirectory(file);
-        }
+    hiveServer = startHiveServer(serverConf);
 
-        HiveConf serverConf = configureHive(hadoopConf, localHiveLocation);
-
-        executorService = Executors.newSingleThreadExecutor();
-        tServer = startMetaStore(bindIP, metastorePort, serverConf);
-
-        hiveServer = startHiveServer(serverConf);
-
-        String serverHostname;
-        if (bindIP.equals("0.0.0.0")) {
-            serverHostname = "localhost";
-        } else {
-            serverHostname = bindIP;
-        }
-        if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
-            throw new IOException("Waiting for startup of standalone server");
-        }
-
-        LOG.info("Hive Minicluster service started.");
-        return hiveServer;
+    String serverHostname;
+    if (bindIP.equals("0.0.0.0")) {
+      serverHostname = "localhost";
+    } else {
+      serverHostname = bindIP;
+    }
+    if (!waitForServerUp(serverConf, serverHostname, metastorePort, CONNECTION_TIMEOUT)) {
+      throw new IOException("Waiting for startup of standalone server");
     }
 
-    public void stop() throws IOException {
-        resetSystemProperties();
-        if (tServer != null) {
-            tServer.stop();
-        }
-        if (hiveServer != null) {
-            hiveServer.stop();
-        }
-        LOG.info("Hive Minicluster service shut down.");
-        tServer = null;
-        hiveServer = null;
-        hadoopConf = null;
+    LOG.info("Hive Minicluster service started.");
+    return hiveServer;
+  }
+
+  public void stop() throws IOException {
+    resetSystemProperties();
+    if (tServer != null) {
+      tServer.stop();
+    }
+    if (hiveServer != null) {
+      hiveServer.stop();
+    }
+    LOG.info("Hive Minicluster service shut down.");
+    tServer = null;
+    hiveServer = null;
+    hadoopConf = null;
+  }
+
+  private HiveConf configureHive(Configuration conf, String localHiveLocation)
+      throws IOException {
+    conf.set("hive.metastore.local", "false");
+    conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
+        "thrift://" + bindIP + ":" + metastorePort);
+    conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
+    conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
+    // The following line to turn of SASL has no effect since HiveAuthFactory calls
+    // 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
+    // in Hive 0.14.
+    // As a workaround, the property is set in hive-site.xml in this module.
+    //conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
+    File localHiveDir = new File(localHiveLocation);
+    localHiveDir.mkdirs();
+    File metastoreDbDir = new File(localHiveDir, "metastore_db");
+    conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
+        "jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
+    File derbyLogFile = new File(localHiveDir, "derby.log");
+    derbyLogFile.createNewFile();
+    setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
+    conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
+        Files.createTempDir().getAbsolutePath());
+
+    return new HiveConf(conf, this.getClass());
+  }
+
+  private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
+    long start = System.currentTimeMillis();
+    while (true) {
+      try {
+        new HiveMetaStoreClient(serverConf);
+        return true;
+      } catch (MetaException e) {
+        // ignore as this is expected
+        LOG.info("server " + hostname + ":" + port + " not up " + e);
+      }
+
+      if (System.currentTimeMillis() > start + timeout) {
+        break;
+      }
+      try {
+        Thread.sleep(250);
+      } catch (InterruptedException e) {
+        // ignore
+      }
+    }
+    return false;
+  }
+
+  private void setSystemProperty(String name, String value) {
+    if (!sysProps.containsKey(name)) {
+      String currentValue = System.getProperty(name);
+      sysProps.put(name, currentValue);
+    }
+    if (value != null) {
+      System.setProperty(name, value);
+    } else {
+      System.getProperties().remove(name);
+    }
+  }
+
+  private void resetSystemProperties() {
+    for (Map.Entry<String, String> entry : sysProps.entrySet()) {
+      if (entry.getValue() != null) {
+        System.setProperty(entry.getKey(), entry.getValue());
+      } else {
+        System.getProperties().remove(entry.getKey());
+      }
+    }
+    sysProps.clear();
+  }
+
+  private static String getHiveLocation(String baseLocation) {
+    return baseLocation + Path.SEPARATOR + "hive";
+  }
+
+  private HiveServer2 startHiveServer(HiveConf serverConf) {
+    HiveServer2 hiveServer = new HiveServer2();
+    hiveServer.init(serverConf);
+    hiveServer.start();
+    return hiveServer;
+  }
+
+  // XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
+  // with changes to support binding to a specified IP address (not only 0.0.0.0)
+
+
+  private static final class ChainedTTransportFactory extends TTransportFactory {
+
+    private final TTransportFactory parentTransFactory;
+    private final TTransportFactory childTransFactory;
+
+    private ChainedTTransportFactory(TTransportFactory parentTransFactory,
+        TTransportFactory childTransFactory) {
+      this.parentTransFactory = parentTransFactory;
+      this.childTransFactory = childTransFactory;
     }
 
-    private HiveConf configureHive(Configuration conf, String localHiveLocation)
-        throws IOException {
-        conf.set("hive.metastore.local", "false");
-        conf.set(HiveConf.ConfVars.METASTOREURIS.varname,
-            "thrift://" + bindIP + ":" + metastorePort);
-        conf.set(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST.varname, bindIP);
-        conf.setInt(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_PORT.varname, serverPort);
-        // The following line to turn of SASL has no effect since HiveAuthFactory calls
-        // 'new HiveConf()'. This is fixed by https://issues.apache.org/jira/browse/HIVE-6657,
-        // in Hive 0.14.
-        // As a workaround, the property is set in hive-site.xml in this module.
-        //conf.set(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION.varname, "NOSASL");
-        File localHiveDir = new File(localHiveLocation);
-        localHiveDir.mkdirs();
-        File metastoreDbDir = new File(localHiveDir, "metastore_db");
-        conf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
-            "jdbc:derby:" + metastoreDbDir.getPath() + ";create=true");
-        File derbyLogFile = new File(localHiveDir, "derby.log");
-        derbyLogFile.createNewFile();
-        setSystemProperty("derby.stream.error.file", derbyLogFile.getPath());
-        conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
-            Files.createTempDir().getAbsolutePath());
+    @Override
+    public TTransport getTransport(TTransport trans) {
+      return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
+    }
+  }
 
-        return new HiveConf(conf, this.getClass());
+
+  private static final class TServerSocketKeepAlive extends TServerSocket {
+
+    public TServerSocketKeepAlive(int port) throws TTransportException {
+      super(port, 0);
     }
 
-    private boolean waitForServerUp(HiveConf serverConf, String hostname, int port, int timeout) {
-        long start = System.currentTimeMillis();
-        while (true) {
-            try {
-                new HiveMetaStoreClient(serverConf);
-                return true;
-            } catch (MetaException e) {
-                // ignore as this is expected
-                LOG.info("server " + hostname + ":" + port + " not up " + e);
-            }
-
-            if (System.currentTimeMillis() > start + timeout) {
-                break;
-            }
-            try {
-                Thread.sleep(250);
-            } catch (InterruptedException e) {
-                // ignore
-            }
-        }
-        return false;
+    public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
+      super(address, 0);
     }
 
-    private void setSystemProperty(String name, String value) {
-        if (!sysProps.containsKey(name)) {
-            String currentValue = System.getProperty(name);
-            sysProps.put(name, currentValue);
-        }
-        if (value != null) {
-            System.setProperty(name, value);
-        } else {
-            System.getProperties().remove(name);
-        }
+    @Override
+    protected TSocket acceptImpl() throws TTransportException {
+      TSocket ts = super.acceptImpl();
+      try {
+        ts.getSocket().setKeepAlive(true);
+      } catch (SocketException e) {
+        throw new TTransportException(e);
+      }
+      return ts;
     }
+  }
 
-    private void resetSystemProperties() {
-        for (Map.Entry<String, String> entry : sysProps.entrySet()) {
-            if (entry.getValue() != null) {
-                System.setProperty(entry.getKey(), entry.getValue());
-            } else {
-                System.getProperties().remove(entry.getKey());
-            }
-        }
-        sysProps.clear();
-    }
-
-    private static String getHiveLocation(String baseLocation) {
-        return baseLocation + Path.SEPARATOR + "hive";
-    }
-
-    private HiveServer2 startHiveServer(HiveConf serverConf) {
-        HiveServer2 hiveServer = new HiveServer2();
-        hiveServer.init(serverConf);
-        hiveServer.start();
-        return hiveServer;
-    }
-
-    // XXX: From org.apache.hadoop.hive.metastore.HiveMetaStore,
-    // with changes to support binding to a specified IP address (not only 0.0.0.0)
-
-
-    private static final class ChainedTTransportFactory extends TTransportFactory {
-        private final TTransportFactory parentTransFactory;
-        private final TTransportFactory childTransFactory;
-
-        private ChainedTTransportFactory(TTransportFactory parentTransFactory,
-            TTransportFactory childTransFactory) {
-            this.parentTransFactory = parentTransFactory;
-            this.childTransFactory = childTransFactory;
-        }
-
-        @Override public TTransport getTransport(TTransport trans) {
-            return childTransFactory.getTransport(parentTransFactory.getTransport(trans));
-        }
-    }
-
-
-    private static final class TServerSocketKeepAlive extends TServerSocket {
-        public TServerSocketKeepAlive(int port) throws TTransportException {
-            super(port, 0);
-        }
-
-        public TServerSocketKeepAlive(InetSocketAddress address) throws TTransportException {
-            super(address, 0);
-        }
-
-        @Override protected TSocket acceptImpl() throws TTransportException {
-            TSocket ts = super.acceptImpl();
-            try {
-                ts.getSocket().setKeepAlive(true);
-            } catch (SocketException e) {
-                throw new TTransportException(e);
-            }
-            return ts;
-        }
-    }
-
-    public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
-        try {
-            // Server will create new threads up to max as necessary. After an idle
-            // period, it will destory threads to keep the number of threads in the
-            // pool to min.
-            int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
-            int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
-            boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
-            boolean useFramedTransport =
-                conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
-
-            // don't support SASL yet
-            //boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
-
-            TServerTransport serverTransport;
-            if (forceBindIP != null) {
-                InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
-                serverTransport =
-                    tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
-
-            } else {
-                serverTransport =
-                    tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
-            }
-
-            TProcessor processor;
-            TTransportFactory transFactory;
-
-            IHMSHandler handler = (IHMSHandler) HiveMetaStore
-                .newRetryingHMSHandler("new db based metaserver", conf, true);
-
-            if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
-                transFactory = useFramedTransport ?
-                    new ChainedTTransportFactory(new TFramedTransport.Factory(),
-                        new TUGIContainingTransport.Factory()) :
-                    new TUGIContainingTransport.Factory();
-
-                processor = new TUGIBasedProcessor<IHMSHandler>(handler);
-                LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
-            } else {
-                transFactory =
-                    useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
-                processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
-                LOG.info("Starting DB backed MetaStore Server");
-            }
-
-            TThreadPoolServer.Args args =
-                new TThreadPoolServer.Args(serverTransport).processor(processor)
-                    .transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
-                    .minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
-
-            final TServer tServer = new TThreadPoolServer(args);
-            executorService.submit(new Runnable() {
-                @Override public void run() {
-                    tServer.serve();
-                }
-            });
-            return tServer;
-        } catch (Throwable x) {
-            throw new IOException(x);
+  public TServer startMetaStore(String forceBindIP, int port, HiveConf conf) throws IOException {
+    try {
+      // Server will create new threads up to max as necessary. After an idle
+      // period, it will destory threads to keep the number of threads in the
+      // pool to min.
+      int minWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMINTHREADS);
+      int maxWorkerThreads = conf.getIntVar(HiveConf.ConfVars.METASTORESERVERMAXTHREADS);
+      boolean tcpKeepAlive = conf.getBoolVar(HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE);
+      boolean useFramedTransport =
+          conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_FRAMED_TRANSPORT);
+
+      // don't support SASL yet
+      //boolean useSasl = conf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL);
+
+      TServerTransport serverTransport;
+      if (forceBindIP != null) {
+        InetSocketAddress address = new InetSocketAddress(forceBindIP, port);
+        serverTransport =
+            tcpKeepAlive ? new TServerSocketKeepAlive(address) : new TServerSocket(address);
+
+      } else {
+        serverTransport =
+            tcpKeepAlive ? new TServerSocketKeepAlive(port) : new TServerSocket(port);
+      }
+
+      TProcessor processor;
+      TTransportFactory transFactory;
+
+      IHMSHandler handler = (IHMSHandler) HiveMetaStore
+          .newRetryingHMSHandler("new db based metaserver", conf, true);
+
+      if (conf.getBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI)) {
+        transFactory = useFramedTransport ?
+            new ChainedTTransportFactory(new TFramedTransport.Factory(),
+                new TUGIContainingTransport.Factory()) :
+            new TUGIContainingTransport.Factory();
+
+        processor = new TUGIBasedProcessor<IHMSHandler>(handler);
+        LOG.info("Starting DB backed MetaStore Server with SetUGI enabled");
+      } else {
+        transFactory =
+            useFramedTransport ? new TFramedTransport.Factory() : new TTransportFactory();
+        processor = new TSetIpAddressProcessor<IHMSHandler>(handler);
+        LOG.info("Starting DB backed MetaStore Server");
+      }
+
+      TThreadPoolServer.Args args =
+          new TThreadPoolServer.Args(serverTransport).processor(processor)
+              .transportFactory(transFactory).protocolFactory(new TBinaryProtocol.Factory())
+              .minWorkerThreads(minWorkerThreads).maxWorkerThreads(maxWorkerThreads);
+
+      final TServer tServer = new TThreadPoolServer(args);
+      executorService.submit(new Runnable() {
+        @Override
+        public void run() {
+          tServer.serve();
         }
+      });
+      return tServer;
+    } catch (Throwable x) {
+      throw new IOException(x);
     }
+  }
 }
diff --git a/hoodie-hive/src/test/resources/log4j-surefire.properties b/hoodie-hive/src/test/resources/log4j-surefire.properties
index cc6a57052..8027f04d8 100644
--- a/hoodie-hive/src/test/resources/log4j-surefire.properties
+++ b/hoodie-hive/src/test/resources/log4j-surefire.properties
@@ -13,12 +13,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 log4j.rootLogger=WARN, A1
 log4j.category.com.uber=INFO
 log4j.category.org.apache.parquet.hadoop=WARN
 log4j.category.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/hoodie-spark/pom.xml b/hoodie-spark/pom.xml
index 114dd516f..5f3763220 100644
--- a/hoodie-spark/pom.xml
+++ b/hoodie-spark/pom.xml
@@ -17,216 +17,218 @@
   ~
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>hoodie</artifactId>
-        <groupId>com.uber.hoodie</groupId>
-        <version>0.4.1-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
-
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hoodie</artifactId>
     <groupId>com.uber.hoodie</groupId>
-    <artifactId>hoodie-spark</artifactId>
-    <packaging>jar</packaging>
+    <version>0.4.1-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
 
-    <properties>
-        <log4j.version>1.2.17</log4j.version>
-        <junit.version>4.10</junit.version>
-    </properties>
+  <groupId>com.uber.hoodie</groupId>
+  <artifactId>hoodie-spark</artifactId>
+  <packaging>jar</packaging>
 
-    <repositories>
-        <repository>
-            <id>scala-tools.org</id>
-            <name>Scala-tools Maven2 Repository</name>
-            <url>http://scala-tools.org/repo-releases</url>
-        </repository>
-    </repositories>
+  <properties>
+    <log4j.version>1.2.17</log4j.version>
+    <junit.version>4.10</junit.version>
+  </properties>
 
-    <build>
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <groupId>net.alchim31.maven</groupId>
-                    <artifactId>scala-maven-plugin</artifactId>
-                    <version>3.3.1</version>
-                </plugin>
-                <plugin>
-                    <groupId>org.apache.maven.plugins</groupId>
-                    <artifactId>maven-compiler-plugin</artifactId>
-                    <version>2.0.2</version>
-                </plugin>
-            </plugins>
-        </pluginManagement>
+  <repositories>
+    <repository>
+      <id>scala-tools.org</id>
+      <name>Scala-tools Maven2 Repository</name>
+      <url>http://scala-tools.org/repo-releases</url>
+    </repository>
+  </repositories>
 
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-dependency-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>copy-dependencies</id>
-                        <phase>prepare-package</phase>
-                        <goals>
-                            <goal>copy-dependencies</goal>
-                        </goals>
-                        <configuration>
-                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
-                            <overWriteReleases>true</overWriteReleases>
-                            <overWriteSnapshots>true</overWriteSnapshots>
-                            <overWriteIfNewer>true</overWriteIfNewer>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>net.alchim31.maven</groupId>
-                <artifactId>scala-maven-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>scala-compile-first</id>
-                        <phase>process-resources</phase>
-                        <goals>
-                            <goal>add-source</goal>
-                            <goal>compile</goal>
-                        </goals>
-                    </execution>
-                    <execution>
-                        <id>scala-test-compile</id>
-                        <phase>process-test-resources</phase>
-                        <goals>
-                            <goal>testCompile</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <phase>compile</phase>
-                        <goals>
-                            <goal>compile</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.rat</groupId>
-                <artifactId>apache-rat-plugin</artifactId>
-            </plugin>
-        </plugins>
-    </build>
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>net.alchim31.maven</groupId>
+          <artifactId>scala-maven-plugin</artifactId>
+          <version>3.3.1</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-compiler-plugin</artifactId>
+          <version>2.0.2</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-dependencies</id>
+            <phase>prepare-package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
+              <overWriteReleases>true</overWriteReleases>
+              <overWriteSnapshots>true</overWriteSnapshots>
+              <overWriteIfNewer>true</overWriteIfNewer>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>net.alchim31.maven</groupId>
+        <artifactId>scala-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>scala-compile-first</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>add-source</goal>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>scala-test-compile</id>
+            <phase>process-test-resources</phase>
+            <goals>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>compile</phase>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 
 
-    <dependencies>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-library</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scalatest</groupId>
-            <artifactId>scalatest_2.11</artifactId>
-            <version>3.0.1</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>com.databricks</groupId>
-            <artifactId>spark-avro_2.11</artifactId>
-            <version>3.2.0</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-        </dependency>
+  <dependencies>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+      <version>${scala.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_2.11</artifactId>
+      <version>3.0.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_2.11</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_2.11</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.databricks</groupId>
+      <artifactId>spark-avro_2.11</artifactId>
+      <version>3.2.0</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-client</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-            <scope>provided</scope>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+      <scope>provided</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <scope>provided</scope>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-            <version>${log4j.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-configuration2</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-configuration2</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-client</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-hadoop-mr</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit-dep</artifactId>
-            <version>${junit.version}</version>
-            <scope>test</scope>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-client</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-hadoop-mr</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit-dep</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-client</artifactId>
-            <version>${project.version}</version>
-            <type>test-jar</type>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-            <type>test-jar</type>
-            <scope>test</scope>
-        </dependency>
-    </dependencies>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-client</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
 
 </project>
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/BaseAvroPayload.java b/hoodie-spark/src/main/java/com/uber/hoodie/BaseAvroPayload.java
index 45022f28a..26e9cd31e 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/BaseAvroPayload.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/BaseAvroPayload.java
@@ -18,8 +18,8 @@
 
 package com.uber.hoodie;
 
-import org.apache.avro.generic.GenericRecord;
 import java.io.Serializable;
+import org.apache.avro.generic.GenericRecord;
 
 /**
  * Base class for all AVRO record based payloads, that can be ordered based on a field
@@ -27,23 +27,23 @@ import java.io.Serializable;
 public abstract class BaseAvroPayload implements Serializable {
 
 
-    /**
-     * Avro data extracted from the source
-     */
-    protected final GenericRecord record;
+  /**
+   * Avro data extracted from the source
+   */
+  protected final GenericRecord record;
 
-    /**
-     * For purposes of preCombining
-     */
-    protected final Comparable orderingVal;
+  /**
+   * For purposes of preCombining
+   */
+  protected final Comparable orderingVal;
 
-    /**
-     *
-     * @param record
-     * @param orderingVal
-     */
-    public BaseAvroPayload(GenericRecord record, Comparable orderingVal) {
-        this.record = record;
-        this.orderingVal = orderingVal;
-    }
+  /**
+   *
+   * @param record
+   * @param orderingVal
+   */
+  public BaseAvroPayload(GenericRecord record, Comparable orderingVal) {
+    this.record = record;
+    this.orderingVal = orderingVal;
+  }
 }
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/DataSourceUtils.java b/hoodie-spark/src/main/java/com/uber/hoodie/DataSourceUtils.java
index 1af5c199e..b16202cff 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/DataSourceUtils.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/DataSourceUtils.java
@@ -27,126 +27,130 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import com.uber.hoodie.index.HoodieIndex;
-
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.lang3.reflect.ConstructorUtils;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 /**
  * Utilities used throughout the data source
  */
 public class DataSourceUtils {
 
-    /**
-     * Obtain value of the provided field as string, denoted by dot notation. e.g: a.b.c
-     */
-    public static String getNestedFieldValAsString(GenericRecord record, String fieldName) {
-        String[] parts = fieldName.split("\\.");
-        GenericRecord valueNode = record;
-        for (int i = 0; i < parts.length; i++) {
-            String part = parts[i];
-            Object val = valueNode.get(part);
-            if (val == null) {
-                break;
-            }
+  /**
+   * Obtain value of the provided field as string, denoted by dot notation. e.g: a.b.c
+   */
+  public static String getNestedFieldValAsString(GenericRecord record, String fieldName) {
+    String[] parts = fieldName.split("\\.");
+    GenericRecord valueNode = record;
+    for (int i = 0; i < parts.length; i++) {
+      String part = parts[i];
+      Object val = valueNode.get(part);
+      if (val == null) {
+        break;
+      }
 
-            // return, if last part of name
-            if (i == parts.length - 1) {
-                return val.toString();
-            } else {
-                // VC: Need a test here
-                if (!(val instanceof GenericRecord)) {
-                    throw new HoodieException("Cannot find a record at part value :" + part);
-                }
-                valueNode = (GenericRecord) val;
-            }
+      // return, if last part of name
+      if (i == parts.length - 1) {
+        return val.toString();
+      } else {
+        // VC: Need a test here
+        if (!(val instanceof GenericRecord)) {
+          throw new HoodieException("Cannot find a record at part value :" + part);
         }
-        throw new HoodieException(fieldName + " field not found in record");
+        valueNode = (GenericRecord) val;
+      }
     }
+    throw new HoodieException(fieldName + " field not found in record");
+  }
 
-    /**
-     * Create a key generator class via reflection, passing in any configs needed
-     */
-    public static KeyGenerator createKeyGenerator(String keyGeneratorClass, PropertiesConfiguration cfg) throws IOException {
-        try {
-            return (KeyGenerator) ConstructorUtils.invokeConstructor(Class.forName(keyGeneratorClass), (Object) cfg);
-        } catch (Throwable e) {
-            throw new IOException("Could not load key generator class " + keyGeneratorClass, e);
-        }
+  /**
+   * Create a key generator class via reflection, passing in any configs needed
+   */
+  public static KeyGenerator createKeyGenerator(String keyGeneratorClass,
+      PropertiesConfiguration cfg) throws IOException {
+    try {
+      return (KeyGenerator) ConstructorUtils
+          .invokeConstructor(Class.forName(keyGeneratorClass), (Object) cfg);
+    } catch (Throwable e) {
+      throw new IOException("Could not load key generator class " + keyGeneratorClass, e);
     }
+  }
 
-    /**
-     * Create a payload class via reflection, passing in an ordering/precombine value.
-     */
-    public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record, Comparable orderingVal) throws IOException {
-        try {
-            return (HoodieRecordPayload) ConstructorUtils.invokeConstructor(Class.forName(payloadClass), (Object) record, (Object) orderingVal);
-        } catch (Throwable e) {
-            throw new IOException("Could not create payload for class: " + payloadClass, e);
-        }
+  /**
+   * Create a payload class via reflection, passing in an ordering/precombine value.
+   */
+  public static HoodieRecordPayload createPayload(String payloadClass, GenericRecord record,
+      Comparable orderingVal) throws IOException {
+    try {
+      return (HoodieRecordPayload) ConstructorUtils
+          .invokeConstructor(Class.forName(payloadClass), (Object) record, (Object) orderingVal);
+    } catch (Throwable e) {
+      throw new IOException("Could not create payload for class: " + payloadClass, e);
     }
+  }
 
-    public static void checkRequiredProperties(PropertiesConfiguration configuration, List<String> checkPropNames) {
-        checkPropNames.stream().forEach(prop -> {
-            if (!configuration.containsKey(prop)) {
-                throw new HoodieNotSupportedException("Required property " + prop + " is missing");
-            }
-        });
+  public static void checkRequiredProperties(PropertiesConfiguration configuration,
+      List<String> checkPropNames) {
+    checkPropNames.stream().forEach(prop -> {
+      if (!configuration.containsKey(prop)) {
+        throw new HoodieNotSupportedException("Required property " + prop + " is missing");
+      }
+    });
+  }
+
+  public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc,
+      String schemaStr,
+      String basePath,
+      String tblName,
+      Map<String, String> parameters) throws Exception {
+    HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
+        .combineInput(true, true)
+        .withPath(basePath)
+        .withAutoCommit(false)
+        .withSchema(schemaStr)
+        .forTable(tblName)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder()
+                .withIndexType(HoodieIndex.IndexType.BLOOM)
+                .build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY()))
+            .build())
+        // override above with Hoodie configs specified as options.
+        .withProps(parameters)
+        .build();
+
+    return new HoodieWriteClient<>(jssc, writeConfig);
+  }
+
+
+  public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client,
+      JavaRDD<HoodieRecord> hoodieRecords,
+      String commitTime,
+      String operation) {
+    if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
+      return client.bulkInsert(hoodieRecords, commitTime);
+    } else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
+      return client.insert(hoodieRecords, commitTime);
+    } else {
+      //default is upsert
+      return client.upsert(hoodieRecords, commitTime);
     }
+  }
 
-    public static HoodieWriteClient createHoodieClient(JavaSparkContext jssc,
-                                                       String schemaStr,
-                                                       String basePath,
-                                                       String tblName,
-                                                       Map<String, String> parameters) throws Exception {
-        HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
-                .combineInput(true, true)
-                .withPath(basePath)
-                .withAutoCommit(false)
-                .withSchema(schemaStr)
-                .forTable(tblName)
-                .withIndexConfig(
-                        HoodieIndexConfig.newBuilder()
-                                .withIndexType(HoodieIndex.IndexType.BLOOM)
-                                .build())
-                .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                        .withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY())).build())
-                // override above with Hoodie configs specified as options.
-                .withProps(parameters)
-                .build();
-
-        return new HoodieWriteClient<>(jssc, writeConfig);
-    }
-
-
-    public static JavaRDD<WriteStatus> doWriteOperation(HoodieWriteClient client,
-                                                        JavaRDD<HoodieRecord> hoodieRecords,
-                                                        String commitTime,
-                                                        String operation) {
-        if (operation.equals(DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL())) {
-            return client.bulkInsert(hoodieRecords, commitTime);
-        } else if (operation.equals(DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL())) {
-            return client.insert(hoodieRecords, commitTime);
-        } else {
-            //default is upsert
-            return client.upsert(hoodieRecords, commitTime);
-        }
-    }
-
-    public static HoodieRecord createHoodieRecord(GenericRecord gr,
-                                                  Comparable orderingVal,
-                                                  HoodieKey hKey,
-                                                  String payloadClass) throws IOException {
-        HoodieRecordPayload payload = DataSourceUtils.createPayload(
-                payloadClass,
-                gr,
-                orderingVal);
-        return new HoodieRecord<>(hKey, payload);
-    }
+  public static HoodieRecord createHoodieRecord(GenericRecord gr,
+      Comparable orderingVal,
+      HoodieKey hKey,
+      String payloadClass) throws IOException {
+    HoodieRecordPayload payload = DataSourceUtils.createPayload(
+        payloadClass,
+        gr,
+        orderingVal);
+    return new HoodieRecord<>(hKey, payload);
+  }
 }
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/HoodieDataSourceHelpers.java b/hoodie-spark/src/main/java/com/uber/hoodie/HoodieDataSourceHelpers.java
index d43a81dd6..fb8db4a94 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/HoodieDataSourceHelpers.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/HoodieDataSourceHelpers.java
@@ -19,68 +19,62 @@
 package com.uber.hoodie;
 
 import com.google.common.collect.Sets;
-
 import com.uber.hoodie.common.model.HoodieTableType;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.table.HoodieTable;
-
-import org.apache.hadoop.fs.FileSystem;
-
 import java.util.List;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
 
 /**
- * List of helpers to aid, construction of instanttime for read and write operations using datasource
+ * List of helpers to aid, construction of instanttime for read and write operations using
+ * datasource
  */
 public class HoodieDataSourceHelpers {
 
-    /**
-     * Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently
-     * fed to an incremental view read, to perform incremental processing.
-     */
-    public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
-        return listCommitsSince(fs, basePath, commitTimestamp).size() > 0;
-    }
+  /**
+   * Checks if the Hoodie dataset has new data since given timestamp. This can be subsequently fed
+   * to an incremental view read, to perform incremental processing.
+   */
+  public static boolean hasNewCommits(FileSystem fs, String basePath, String commitTimestamp) {
+    return listCommitsSince(fs, basePath, commitTimestamp).size() > 0;
+  }
 
-    /**
-     * Get a list of instant times that have occurred, from the given instant timestamp.
-     *
-     * @param instantTimestamp
-     */
-    public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) {
-        HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
-        return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants()
-                .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
-    }
+  /**
+   * Get a list of instant times that have occurred, from the given instant timestamp.
+   */
+  public static List<String> listCommitsSince(FileSystem fs, String basePath,
+      String instantTimestamp) {
+    HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
+    return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants()
+        .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+  }
 
-    /**
-     * Returns the last successful write operation's instant time
-     */
-    public static String latestCommit(FileSystem fs, String basePath) {
-        HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
-        return timeline.lastInstant().get().getTimestamp();
-    }
+  /**
+   * Returns the last successful write operation's instant time
+   */
+  public static String latestCommit(FileSystem fs, String basePath) {
+    HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath);
+    return timeline.lastInstant().get().getTimestamp();
+  }
 
-    /**
-     * Obtain all the commits, compactions that have occurred on the timeline, whose
-     * instant times could be fed into the datasource options.
-     *
-     * @param fs
-     * @param basePath
-     */
-    public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
-        HoodieTable table = HoodieTable
-                .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
-        if (table.getMetaClient().getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
-            return table.getActiveTimeline().getTimelineOfActions(
-                    Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION,
-                            HoodieActiveTimeline.DELTA_COMMIT_ACTION)
-            );
-        } else {
-            return table.getCompletedCompactionCommitTimeline();
-        }
+  /**
+   * Obtain all the commits, compactions that have occurred on the timeline, whose instant times
+   * could be fed into the datasource options.
+   */
+  public static HoodieTimeline allCompletedCommitsCompactions(FileSystem fs, String basePath) {
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
+    if (table.getMetaClient().getTableType().equals(HoodieTableType.MERGE_ON_READ)) {
+      return table.getActiveTimeline().getTimelineOfActions(
+          Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION,
+              HoodieActiveTimeline.DELTA_COMMIT_ACTION)
+      );
+    } else {
+      return table.getCompletedCompactionCommitTimeline();
     }
+  }
 }
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/KeyGenerator.java b/hoodie-spark/src/main/java/com/uber/hoodie/KeyGenerator.java
index 58bd3e909..f23148ad3 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/KeyGenerator.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/KeyGenerator.java
@@ -19,29 +19,24 @@
 package com.uber.hoodie;
 
 import com.uber.hoodie.common.model.HoodieKey;
-
+import java.io.Serializable;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
 
-import java.io.Serializable;
-
 /**
  * Abstract class to extend for plugging in extraction of {@link com.uber.hoodie.common.model.HoodieKey}
  * from an Avro record
  */
 public abstract class KeyGenerator implements Serializable {
 
-    protected transient PropertiesConfiguration config;
+  protected transient PropertiesConfiguration config;
 
-    protected KeyGenerator(PropertiesConfiguration config) {
-        this.config = config;
-    }
+  protected KeyGenerator(PropertiesConfiguration config) {
+    this.config = config;
+  }
 
-    /**
-     * Generate a Hoodie Key out of provided generic record.
-     *
-     * @param record
-     * @return
-     */
-    public abstract HoodieKey getKey(GenericRecord record);
+  /**
+   * Generate a Hoodie Key out of provided generic record.
+   */
+  public abstract HoodieKey getKey(GenericRecord record);
 }
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/OverwriteWithLatestAvroPayload.java b/hoodie-spark/src/main/java/com/uber/hoodie/OverwriteWithLatestAvroPayload.java
index e7877aafc..74424ac36 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/OverwriteWithLatestAvroPayload.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/OverwriteWithLatestAvroPayload.java
@@ -20,49 +20,49 @@ package com.uber.hoodie;
 
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
-
+import java.io.IOException;
+import java.util.Optional;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 
-import java.io.IOException;
-import java.util.Optional;
-
 /**
  * Default payload used for delta streamer.
  *
- * 1. preCombine - Picks the latest delta record for a key, based on an ordering field
- * 2. combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
+ * 1. preCombine - Picks the latest delta record for a key, based on an ordering field 2.
+ * combineAndGetUpdateValue/getInsertValue - Simply overwrites storage with latest delta record
  */
-public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
+public class OverwriteWithLatestAvroPayload extends BaseAvroPayload implements
+    HoodieRecordPayload<OverwriteWithLatestAvroPayload> {
 
-    /**
-     *
-     * @param record
-     * @param orderingVal
-     */
-    public OverwriteWithLatestAvroPayload(GenericRecord record, Comparable orderingVal) {
-       super(record, orderingVal);
-    }
+  /**
+   *
+   * @param record
+   * @param orderingVal
+   */
+  public OverwriteWithLatestAvroPayload(GenericRecord record, Comparable orderingVal) {
+    super(record, orderingVal);
+  }
 
-    @Override
-    public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload another) {
-        // pick the payload with greatest ordering value
-        if (another.orderingVal.compareTo(orderingVal) > 0) {
-            return another;
-        } else {
-            return this;
-        }
+  @Override
+  public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload another) {
+    // pick the payload with greatest ordering value
+    if (another.orderingVal.compareTo(orderingVal) > 0) {
+      return another;
+    } else {
+      return this;
     }
+  }
 
-    @Override
-    public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
-        // combining strategy here trivially ignores currentValue on disk and writes this record
-        return getInsertValue(schema);
-    }
+  @Override
+  public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
+      throws IOException {
+    // combining strategy here trivially ignores currentValue on disk and writes this record
+    return getInsertValue(schema);
+  }
 
-    @Override
-    public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
-        return Optional.of(HoodieAvroUtils.rewriteRecord(record, schema));
-    }
+  @Override
+  public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    return Optional.of(HoodieAvroUtils.rewriteRecord(record, schema));
+  }
 }
diff --git a/hoodie-spark/src/main/java/com/uber/hoodie/SimpleKeyGenerator.java b/hoodie-spark/src/main/java/com/uber/hoodie/SimpleKeyGenerator.java
index c5733856a..6c15ce434 100644
--- a/hoodie-spark/src/main/java/com/uber/hoodie/SimpleKeyGenerator.java
+++ b/hoodie-spark/src/main/java/com/uber/hoodie/SimpleKeyGenerator.java
@@ -20,32 +20,33 @@ package com.uber.hoodie;
 
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.exception.HoodieException;
-
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
 
 /**
- * Simple key generator, which takes names of fields to be used for recordKey and partitionPath
- * as configs.
+ * Simple key generator, which takes names of fields to be used for recordKey and partitionPath as
+ * configs.
  */
 public class SimpleKeyGenerator extends KeyGenerator {
 
-    protected final String recordKeyField;
+  protected final String recordKeyField;
 
-    protected final String partitionPathField;
+  protected final String partitionPathField;
 
-    public SimpleKeyGenerator(PropertiesConfiguration config) {
-        super(config);
-        this.recordKeyField = config.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
-        this.partitionPathField = config.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
-    }
-
-    @Override
-    public HoodieKey getKey(GenericRecord record) {
-        if (recordKeyField == null || partitionPathField == null) {
-            throw new HoodieException("Unable to find field names for record key or partition path in cfg");
-        }
-        return new HoodieKey(DataSourceUtils.getNestedFieldValAsString(record, recordKeyField),
-                DataSourceUtils.getNestedFieldValAsString(record, partitionPathField));
+  public SimpleKeyGenerator(PropertiesConfiguration config) {
+    super(config);
+    this.recordKeyField = config.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
+    this.partitionPathField = config
+        .getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
+  }
+
+  @Override
+  public HoodieKey getKey(GenericRecord record) {
+    if (recordKeyField == null || partitionPathField == null) {
+      throw new HoodieException(
+          "Unable to find field names for record key or partition path in cfg");
     }
+    return new HoodieKey(DataSourceUtils.getNestedFieldValAsString(record, recordKeyField),
+        DataSourceUtils.getNestedFieldValAsString(record, partitionPathField));
+  }
 }
diff --git a/hoodie-spark/src/main/scala/com/uber/hoodie/AvroConversionUtils.scala b/hoodie-spark/src/main/scala/com/uber/hoodie/AvroConversionUtils.scala
index 82cf7cc14..684024887 100644
--- a/hoodie-spark/src/main/scala/com/uber/hoodie/AvroConversionUtils.scala
+++ b/hoodie-spark/src/main/scala/com/uber/hoodie/AvroConversionUtils.scala
@@ -115,12 +115,12 @@ object AvroConversionUtils {
 
   def convertStructTypeToAvroSchema(structType: StructType,
                                     structName: String,
-                                    recordNamespace: String) : Schema = {
+                                    recordNamespace: String): Schema = {
     val builder = SchemaBuilder.record(structName).namespace(recordNamespace)
     SchemaConverters.convertStructToAvro(structType, builder, recordNamespace)
   }
 
-  def convertAvroSchemaToStructType(avroSchema: Schema) : StructType = {
+  def convertAvroSchemaToStructType(avroSchema: Schema): StructType = {
     SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType];
   }
 }
diff --git a/hoodie-spark/src/main/scala/com/uber/hoodie/package.scala b/hoodie-spark/src/main/scala/com/uber/hoodie/package.scala
index 63265f3f5..390f07b81 100644
--- a/hoodie-spark/src/main/scala/com/uber/hoodie/package.scala
+++ b/hoodie-spark/src/main/scala/com/uber/hoodie/package.scala
@@ -20,6 +20,7 @@ package com.uber.hoodie
 import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter}
 
 package object hoodie {
+
   /**
     * Adds a method, `hoodie`, to DataFrameWriter
     */
@@ -33,4 +34,5 @@ package object hoodie {
   implicit class AvroDataFrameReader(reader: DataFrameReader) {
     def avro: String => DataFrame = reader.format("com.uber.hoodie").load
   }
+
 }
diff --git a/hoodie-spark/src/test/java/DataSourceTestUtils.java b/hoodie-spark/src/test/java/DataSourceTestUtils.java
index 47f069ee1..a39c42ac3 100644
--- a/hoodie-spark/src/test/java/DataSourceTestUtils.java
+++ b/hoodie-spark/src/test/java/DataSourceTestUtils.java
@@ -18,9 +18,6 @@
 
 import com.uber.hoodie.common.TestRawTripPayload;
 import com.uber.hoodie.common.model.HoodieRecord;
-
-import org.apache.spark.api.java.JavaRDD;
-
 import java.io.IOException;
 import java.util.List;
 import java.util.Optional;
@@ -31,20 +28,21 @@ import java.util.stream.Collectors;
  */
 public class DataSourceTestUtils {
 
-    public static Optional<String> convertToString(HoodieRecord record) {
-        try {
-            String str = ((TestRawTripPayload) record.getData()).getJsonData();
-            str = "{" + str.substring(str.indexOf("\"timestamp\":"));
-            return Optional.of(str.replaceAll("}", ", \"partition\": \"" + record.getPartitionPath() + "\"}"));
-        } catch (IOException e) {
-            return Optional.empty();
-        }
+  public static Optional<String> convertToString(HoodieRecord record) {
+    try {
+      String str = ((TestRawTripPayload) record.getData()).getJsonData();
+      str = "{" + str.substring(str.indexOf("\"timestamp\":"));
+      return Optional
+          .of(str.replaceAll("}", ", \"partition\": \"" + record.getPartitionPath() + "\"}"));
+    } catch (IOException e) {
+      return Optional.empty();
     }
+  }
 
-    public static List<String> convertToStringList(List<HoodieRecord> records) {
-        return records.stream().map(hr -> convertToString(hr))
-                .filter(os -> os.isPresent())
-                .map(os -> os.get())
-                .collect(Collectors.toList());
-    }
+  public static List<String> convertToStringList(List<HoodieRecord> records) {
+    return records.stream().map(hr -> convertToString(hr))
+        .filter(os -> os.isPresent())
+        .map(os -> os.get())
+        .collect(Collectors.toList());
+  }
 }
diff --git a/hoodie-spark/src/test/java/HoodieJavaApp.java b/hoodie-spark/src/test/java/HoodieJavaApp.java
index c61d8cca4..ff2a03416 100644
--- a/hoodie-spark/src/test/java/HoodieJavaApp.java
+++ b/hoodie-spark/src/test/java/HoodieJavaApp.java
@@ -25,7 +25,7 @@ import com.uber.hoodie.HoodieDataSourceHelpers;
 import com.uber.hoodie.common.HoodieTestDataGenerator;
 import com.uber.hoodie.common.model.HoodieTableType;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
+import java.util.List;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -35,113 +35,123 @@ import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.SparkSession;
 
-import java.util.List;
 /**
  * Sample program that writes & reads hoodie datasets via the Spark datasource
  */
 public class HoodieJavaApp {
 
-    @Parameter(names={"--table-path", "-p"}, description = "path for Hoodie sample table")
-    private String tablePath = "file:///tmp/hoodie/sample-table";
+  @Parameter(names = {"--table-path", "-p"}, description = "path for Hoodie sample table")
+  private String tablePath = "file:///tmp/hoodie/sample-table";
 
-    @Parameter(names={"--table-name", "-n"}, description = "table name for Hoodie sample table")
-    private String tableName =  "hoodie_test";
+  @Parameter(names = {"--table-name", "-n"}, description = "table name for Hoodie sample table")
+  private String tableName = "hoodie_test";
 
-    @Parameter(names={"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
-    private String tableType =  HoodieTableType.COPY_ON_WRITE.name();
+  @Parameter(names = {"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
+  private String tableType = HoodieTableType.COPY_ON_WRITE.name();
 
-    @Parameter(names = {"--help", "-h"}, help = true)
-    public Boolean help = false;
+  @Parameter(names = {"--help", "-h"}, help = true)
+  public Boolean help = false;
 
-    private static Logger logger = LogManager.getLogger(HoodieJavaApp.class);
+  private static Logger logger = LogManager.getLogger(HoodieJavaApp.class);
 
-    public static void main(String[] args) throws Exception {
-        HoodieJavaApp cli = new HoodieJavaApp();
-        JCommander cmd = new JCommander(cli, args);
+  public static void main(String[] args) throws Exception {
+    HoodieJavaApp cli = new HoodieJavaApp();
+    JCommander cmd = new JCommander(cli, args);
 
-        if (cli.help) {
-            cmd.usage();
-            System.exit(1);
-        }
-        cli.run();
+    if (cli.help) {
+      cmd.usage();
+      System.exit(1);
     }
+    cli.run();
+  }
 
-    public void run() throws Exception {
+  public void run() throws Exception {
 
-        // Spark session setup..
-        SparkSession spark = SparkSession.builder()
-                .appName("Hoodie Spark APP")
-                .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-                .master("local[1]")
-                .getOrCreate();
-        JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());
-        FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
+    // Spark session setup..
+    SparkSession spark = SparkSession.builder()
+        .appName("Hoodie Spark APP")
+        .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+        .master("local[1]")
+        .getOrCreate();
+    JavaSparkContext jssc = new JavaSparkContext(spark.sparkContext());
+    FileSystem fs = FileSystem.get(jssc.hadoopConfiguration());
 
-        // Generator of some records to be loaded in.
-        HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+    // Generator of some records to be loaded in.
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
 
-        /**
-         * Commit with only inserts
-         */
-        // Generate some input..
-        List<String> records1 = DataSourceTestUtils.convertToStringList(dataGen.generateInserts("001"/* ignore */, 100));
-        Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
+    /**
+     * Commit with only inserts
+     */
+    // Generate some input..
+    List<String> records1 = DataSourceTestUtils
+        .convertToStringList(dataGen.generateInserts("001"/* ignore */, 100));
+    Dataset<Row> inputDF1 = spark.read().json(jssc.parallelize(records1, 2));
 
-        // Save as hoodie dataset (copy on write)
-        inputDF1.write()
-                .format("com.uber.hoodie") // specify the hoodie source
-                .option("hoodie.insert.shuffle.parallelism", "2") // any hoodie client config can be passed like this
-                .option("hoodie.upsert.shuffle.parallelism", "2") // full list in HoodieWriteConfig & its package
-                .option(DataSourceWriteOptions.OPERATION_OPT_KEY(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) // insert
-                .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key") // This is the record key
-                .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition") // this is the partition to place it into
-                .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp") // use to combine duplicate records in input/with disk val
-                .option(HoodieWriteConfig.TABLE_NAME, tableName) // Used by hive sync and queries
-                .mode(SaveMode.Overwrite) // This will remove any existing data at path below, and create a new dataset if needed
-                .save(tablePath); // ultimately where the dataset will be placed
-        String commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
-        logger.info("First commit at instant time :" + commitInstantTime1);
+    // Save as hoodie dataset (copy on write)
+    inputDF1.write()
+        .format("com.uber.hoodie") // specify the hoodie source
+        .option("hoodie.insert.shuffle.parallelism",
+            "2") // any hoodie client config can be passed like this
+        .option("hoodie.upsert.shuffle.parallelism",
+            "2") // full list in HoodieWriteConfig & its package
+        .option(DataSourceWriteOptions.OPERATION_OPT_KEY(),
+            DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()) // insert
+        .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(),
+            "_row_key") // This is the record key
+        .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(),
+            "partition") // this is the partition to place it into
+        .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(),
+            "timestamp") // use to combine duplicate records in input/with disk val
+        .option(HoodieWriteConfig.TABLE_NAME, tableName) // Used by hive sync and queries
+        .mode(
+            SaveMode.Overwrite) // This will remove any existing data at path below, and create a new dataset if needed
+        .save(tablePath); // ultimately where the dataset will be placed
+    String commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
+    logger.info("First commit at instant time :" + commitInstantTime1);
 
-        /**
-         * Commit that updates records
-         */
-        List<String> records2 = DataSourceTestUtils.convertToStringList(dataGen.generateUpdates("002"/* ignore */, 100));
-        Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));
-        inputDF2.write()
-                .format("com.uber.hoodie")
-                .option("hoodie.insert.shuffle.parallelism", "2")
-                .option("hoodie.upsert.shuffle.parallelism", "2")
-                .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
-                .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
-                .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
-                .option(HoodieWriteConfig.TABLE_NAME, tableName)
-                .mode(SaveMode.Append)
-                .save(tablePath);
-        String commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
-        logger.info("Second commit at instant time :" + commitInstantTime1);
+    /**
+     * Commit that updates records
+     */
+    List<String> records2 = DataSourceTestUtils
+        .convertToStringList(dataGen.generateUpdates("002"/* ignore */, 100));
+    Dataset<Row> inputDF2 = spark.read().json(jssc.parallelize(records2, 2));
+    inputDF2.write()
+        .format("com.uber.hoodie")
+        .option("hoodie.insert.shuffle.parallelism", "2")
+        .option("hoodie.upsert.shuffle.parallelism", "2")
+        .option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key")
+        .option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "partition")
+        .option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY(), "timestamp")
+        .option(HoodieWriteConfig.TABLE_NAME, tableName)
+        .mode(SaveMode.Append)
+        .save(tablePath);
+    String commitInstantTime2 = HoodieDataSourceHelpers.latestCommit(fs, tablePath);
+    logger.info("Second commit at instant time :" + commitInstantTime1);
 
-        /**
-         * Read & do some queries
-         */
-        Dataset<Row> hoodieROViewDF = spark.read()
-                .format("com.uber.hoodie")
-                // pass any path glob, can include hoodie & non-hoodie datasets
-                .load(tablePath + "/*/*/*/*");
-        hoodieROViewDF.registerTempTable("hoodie_ro");
-        spark.sql("describe hoodie_ro").show();
-        // all trips whose fare was greater than 2.
-        spark.sql("select fare, begin_lon, begin_lat, timestamp from hoodie_ro where fare > 2.0").show();
+    /**
+     * Read & do some queries
+     */
+    Dataset<Row> hoodieROViewDF = spark.read()
+        .format("com.uber.hoodie")
+        // pass any path glob, can include hoodie & non-hoodie datasets
+        .load(tablePath + "/*/*/*/*");
+    hoodieROViewDF.registerTempTable("hoodie_ro");
+    spark.sql("describe hoodie_ro").show();
+    // all trips whose fare was greater than 2.
+    spark.sql("select fare, begin_lon, begin_lat, timestamp from hoodie_ro where fare > 2.0")
+        .show();
 
+    /**
+     * Consume incrementally, only changes in commit 2 above.
+     */
+    Dataset<Row> hoodieIncViewDF = spark.read().format("com.uber.hoodie")
+        .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY(),
+            DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL())
+        .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(),
+            commitInstantTime1) // Only changes in write 2 above
+        .load(tablePath); // For incremental view, pass in the root/base path of dataset
 
-        /**
-         * Consume incrementally, only changes in commit 2 above.
-         */
-        Dataset<Row> hoodieIncViewDF = spark.read().format("com.uber.hoodie")
-                .option(DataSourceReadOptions.VIEW_TYPE_OPT_KEY(), DataSourceReadOptions.VIEW_TYPE_INCREMENTAL_OPT_VAL())
-                .option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(), commitInstantTime1) // Only changes in write 2 above
-                .load(tablePath); // For incremental view, pass in the root/base path of dataset
-
-        logger.info("You will only see records from : " + commitInstantTime2);
-        hoodieIncViewDF.groupBy(hoodieIncViewDF.col("_hoodie_commit_time")).count().show();
-    }
+    logger.info("You will only see records from : " + commitInstantTime2);
+    hoodieIncViewDF.groupBy(hoodieIncViewDF.col("_hoodie_commit_time")).count().show();
+  }
 }
diff --git a/hoodie-spark/src/test/resources/log4j-surefire.properties b/hoodie-spark/src/test/resources/log4j-surefire.properties
index 490c6411d..daf8d28c1 100644
--- a/hoodie-spark/src/test/resources/log4j-surefire.properties
+++ b/hoodie-spark/src/test/resources/log4j-surefire.properties
@@ -20,7 +20,6 @@ log4j.category.com.uber.hoodie.io=WARN
 log4j.category.com.uber.hoodie.common=WARN
 log4j.category.com.uber.hoodie.table.log=WARN
 log4j.category.org.apache.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/hoodie-spark/src/test/scala/DataSourceDefaultsTest.scala b/hoodie-spark/src/test/scala/DataSourceDefaultsTest.scala
index 2996c46fd..a2f82af41 100644
--- a/hoodie-spark/src/test/scala/DataSourceDefaultsTest.scala
+++ b/hoodie-spark/src/test/scala/DataSourceDefaultsTest.scala
@@ -16,9 +16,9 @@
  *
  */
 
-import com.uber.hoodie.{DataSourceWriteOptions, OverwriteWithLatestAvroPayload, SimpleKeyGenerator}
 import com.uber.hoodie.common.util.SchemaTestUtil
 import com.uber.hoodie.exception.HoodieException
+import com.uber.hoodie.{DataSourceWriteOptions, OverwriteWithLatestAvroPayload, SimpleKeyGenerator}
 import org.apache.avro.generic.GenericRecord
 import org.apache.commons.configuration.PropertiesConfiguration
 import org.junit.Assert._
@@ -31,7 +31,7 @@ import org.scalatest.junit.AssertionsForJUnit
 class DataSourceDefaultsTest extends AssertionsForJUnit {
 
   val schema = SchemaTestUtil.getComplexEvolvedSchema
-  var baseRecord : GenericRecord = null
+  var baseRecord: GenericRecord = null
 
   @Before def initialize(): Unit = {
     baseRecord = SchemaTestUtil
@@ -39,12 +39,13 @@ class DataSourceDefaultsTest extends AssertionsForJUnit {
   }
 
 
-  private def getKeyConfig(recordKeyFieldName: String, paritionPathField: String): PropertiesConfiguration  = {
+  private def getKeyConfig(recordKeyFieldName: String, paritionPathField: String): PropertiesConfiguration = {
     val props = new PropertiesConfiguration()
     props.addProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, recordKeyFieldName)
     props.addProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, paritionPathField)
     props
   }
+
   @Test def testSimpleKeyGenerator() = {
     // top level, valid fields
     val hk1 = new SimpleKeyGenerator(getKeyConfig("field1", "name")).getKey(baseRecord)
diff --git a/hoodie-spark/src/test/scala/DataSourceTest.scala b/hoodie-spark/src/test/scala/DataSourceTest.scala
index 764206f24..b9fed1cfe 100644
--- a/hoodie-spark/src/test/scala/DataSourceTest.scala
+++ b/hoodie-spark/src/test/scala/DataSourceTest.scala
@@ -20,11 +20,11 @@ import com.uber.hoodie.common.HoodieTestDataGenerator
 import com.uber.hoodie.common.util.FSUtils
 import com.uber.hoodie.config.HoodieWriteConfig
 import com.uber.hoodie.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
-import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.sql._
 import org.junit.Assert._
-import org.junit.{Before, Test}
 import org.junit.rules.TemporaryFolder
+import org.junit.{Before, Test}
 import org.scalatest.junit.AssertionsForJUnit
 
 import scala.collection.JavaConversions._
@@ -44,8 +44,8 @@ class DataSourceTest extends AssertionsForJUnit {
     DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "timestamp",
     HoodieWriteConfig.TABLE_NAME -> "hoodie_test"
   )
-  var basePath : String = null
-  var fs : FileSystem = null
+  var basePath: String = null
+  var fs: FileSystem = null
 
   @Before def initialize() {
     spark = SparkSession.builder
diff --git a/hoodie-utilities/pom.xml b/hoodie-utilities/pom.xml
index 901e6bf5f..a6df1595f 100644
--- a/hoodie-utilities/pom.xml
+++ b/hoodie-utilities/pom.xml
@@ -15,282 +15,284 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>hoodie</artifactId>
-        <groupId>com.uber.hoodie</groupId>
-        <version>0.4.1-SNAPSHOT</version>
-    </parent>
-    <modelVersion>4.0.0</modelVersion>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>hoodie</artifactId>
+    <groupId>com.uber.hoodie</groupId>
+    <version>0.4.1-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
 
-    <artifactId>hoodie-utilities</artifactId>
-    <packaging>jar</packaging>
+  <artifactId>hoodie-utilities</artifactId>
+  <packaging>jar</packaging>
 
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>org.jacoco</groupId>
-                <artifactId>jacoco-maven-plugin</artifactId>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <configuration>
-                    <source>1.8</source>
-                    <target>1.8</target>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-assembly-plugin</artifactId>
-                <version>2.4.1</version>
-                <configuration>
-                    <descriptors>
-                        <descriptor>src/assembly/src.xml</descriptor>
-                    </descriptors>
-                    <archive>
-                        <manifest>
-                            <mainClass>com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer</mainClass>
-                        </manifest>
-                    </archive>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.jacoco</groupId>
+        <artifactId>jacoco-maven-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <version>2.4.1</version>
+        <configuration>
+          <descriptors>
+            <descriptor>src/assembly/src.xml</descriptor>
+          </descriptors>
+          <archive>
+            <manifest>
+              <mainClass>com.uber.hoodie.utilities.deltastreamer.HoodieDeltaStreamer</mainClass>
+            </manifest>
+          </archive>
 
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>make-assembly</id>
-                        <!-- bind to the packaging phase -->
-                        <phase>package</phase>
-                        <goals>
-                            <goal>single</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
+        </configuration>
+        <executions>
+          <execution>
+            <id>make-assembly</id>
+            <!-- bind to the packaging phase -->
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
 
-        <resources>
-            <resource>
-                <directory>src/main/resources</directory>
-            </resource>
-            <resource>
-                <directory>src/test/resources</directory>
-            </resource>
-        </resources>
-    </build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+      </resource>
+      <resource>
+        <directory>src/test/resources</directory>
+      </resource>
+    </resources>
+  </build>
 
-    <dependencies>
-        <dependency>
-          <groupId>org.apache.spark</groupId>
-          <artifactId>spark-sql_2.11</artifactId>
-        </dependency>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_2.11</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-common</artifactId>
-            <version>${project.version}</version>
-            <type>test-jar</type>
-            <scope>test</scope>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-common</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-spark</artifactId>
-            <version>${project.version}</version>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-spark</artifactId>
+      <version>${project.version}</version>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-hdfs</artifactId>
-            <classifier>tests</classifier>
-            <!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
-            <exclusions>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet.jsp</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-            <classifier>tests</classifier>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.mortbay.jetty</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet.jsp</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <classifier>tests</classifier>
+      <!-- Need these exclusions to make sure JavaSparkContext can be setup. https://issues.apache.org/jira/browse/SPARK-1693 -->
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <classifier>tests</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet.jsp</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-hive</artifactId>
-            <version>${project.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-hive</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-client</artifactId>
-            <version>${project.version}</version>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-client</artifactId>
+      <version>${project.version}</version>
+    </dependency>
 
-        <dependency>
-            <groupId>com.uber.hoodie</groupId>
-            <artifactId>hoodie-client</artifactId>
-            <version>${project.version}</version>
-            <type>test-jar</type>
-            <scope>test</scope>
-        </dependency>
+    <dependency>
+      <groupId>com.uber.hoodie</groupId>
+      <artifactId>hoodie-client</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hive</groupId>
-            <artifactId>hive-jdbc</artifactId>
-            <version>${hive.version}-cdh${cdh.version}</version>
-            <classifier>standalone</classifier>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-api</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hive</groupId>
+      <artifactId>hive-jdbc</artifactId>
+      <version>${hive.version}-cdh${cdh.version}</version>
+      <classifier>standalone</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>commons-dbcp</groupId>
-            <artifactId>commons-dbcp</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpcore</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>commons-dbcp</groupId>
+      <artifactId>commons-dbcp</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-mapreduce-client-common</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-common</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-client</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_2.11</artifactId>
-            <exclusions>
-                <exclusion>
-                    <groupId>javax.servlet</groupId>
-                    <artifactId>servlet-api</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_2.11</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-streaming_2.11</artifactId>
-            <version>${spark.version}</version>
-            <scope>provided</scope>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_2.11</artifactId>
+      <version>${spark.version}</version>
+      <scope>provided</scope>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.spark</groupId>
-            <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
-            <version>${spark.version}</version>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
+      <version>${spark.version}</version>
+    </dependency>
 
-        <!-- Used for SQL templating -->
-        <dependency>
-            <groupId>org.antlr</groupId>
-            <artifactId>stringtemplate</artifactId>
-            <version>4.0.2</version>
-        </dependency>
+    <!-- Used for SQL templating -->
+    <dependency>
+      <groupId>org.antlr</groupId>
+      <artifactId>stringtemplate</artifactId>
+      <version>4.0.2</version>
+    </dependency>
 
-        <dependency>
-            <groupId>com.beust</groupId>
-            <artifactId>jcommander</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>com.beust</groupId>
+      <artifactId>jcommander</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-all</artifactId>
-            <version>1.10.19</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.avro</groupId>
-            <artifactId>avro-mapred</artifactId>
-            <version>1.7.6-cdh5.7.2</version>
-        </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.10.19</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <version>1.7.6-cdh5.7.2</version>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-avro</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-avro</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>org.apache.parquet</groupId>
-            <artifactId>parquet-hadoop</artifactId>
-        </dependency>
+    <dependency>
+      <groupId>org.apache.parquet</groupId>
+      <artifactId>parquet-hadoop</artifactId>
+    </dependency>
 
-        <dependency>
-            <groupId>com.twitter</groupId>
-            <artifactId>bijection-avro_2.11</artifactId>
-            <version>0.9.2</version>
-        </dependency>
+    <dependency>
+      <groupId>com.twitter</groupId>
+      <artifactId>bijection-avro_2.11</artifactId>
+      <version>0.9.2</version>
+    </dependency>
 
-    </dependencies>
+  </dependencies>
 
 </project>
diff --git a/hoodie-utilities/src/assembly/src.xml b/hoodie-utilities/src/assembly/src.xml
index 77b5f87e3..aa2fbcd21 100644
--- a/hoodie-utilities/src/assembly/src.xml
+++ b/hoodie-utilities/src/assembly/src.xml
@@ -15,8 +15,8 @@
   -->
 
 <assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
-          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-          xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
   <id>bin</id>
   <formats>
     <format>jar</format>
@@ -40,9 +40,9 @@
       </excludes>
       <unpackOptions>
         <!--<excludes>-->
-          <!--<exclude>-->
-            <!--**/slf4j/**-->
-          <!--</exclude>-->
+        <!--<exclude>-->
+        <!--**/slf4j/**-->
+        <!--</exclude>-->
         <!--</excludes>-->
       </unpackOptions>
     </dependencySet>
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HDFSParquetImporter.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HDFSParquetImporter.java
index a8338b727..27f264974 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HDFSParquetImporter.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HDFSParquetImporter.java
@@ -60,253 +60,255 @@ import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.VoidFunction;
 import scala.Tuple2;
 
-public class HDFSParquetImporter implements Serializable{
+public class HDFSParquetImporter implements Serializable {
 
-    private static volatile Logger logger = LogManager.getLogger(HDFSParquetImporter.class);
-    private final Config cfg;
-    private final transient FileSystem fs;
-    public static final SimpleDateFormat PARTITION_FORMATTER = new SimpleDateFormat("yyyy/MM/dd");
+  private static volatile Logger logger = LogManager.getLogger(HDFSParquetImporter.class);
+  private final Config cfg;
+  private final transient FileSystem fs;
+  public static final SimpleDateFormat PARTITION_FORMATTER = new SimpleDateFormat("yyyy/MM/dd");
 
-    public HDFSParquetImporter(
-        Config cfg) throws IOException {
-        this.cfg = cfg;
-        fs = FSUtils.getFs();
+  public HDFSParquetImporter(
+      Config cfg) throws IOException {
+    this.cfg = cfg;
+    fs = FSUtils.getFs();
+  }
+
+  public static class FormatValidator implements IValueValidator<String> {
+
+    List<String> validFormats = Arrays.asList("parquet");
+
+    @Override
+    public void validate(String name, String value) throws ParameterException {
+      if (value == null || !validFormats.contains(value)) {
+        throw new ParameterException(String
+            .format("Invalid format type: value:%s: supported formats:%s", value,
+                validFormats));
+      }
+    }
+  }
+
+  public static class SourceTypeValidator implements IValueValidator<String> {
+
+    List<String> validSourceTypes = Arrays.asList("hdfs");
+
+    @Override
+    public void validate(String name, String value) throws ParameterException {
+      if (value == null || !validSourceTypes.contains(value)) {
+        throw new ParameterException(String
+            .format("Invalid source type: value:%s: supported source types:%s", value,
+                validSourceTypes));
+      }
+    }
+  }
+
+  public static class Config implements Serializable {
+
+    @Parameter(names = {"--src-path",
+        "-sp"}, description = "Base path for the input dataset", required = true)
+    public String srcPath = null;
+    @Parameter(names = {"--src-type",
+        "-st"}, description = "Source type for the input dataset", required = true,
+        validateValueWith = SourceTypeValidator.class)
+    public String srcType = null;
+    @Parameter(names = {"--target-path",
+        "-tp"}, description = "Base path for the target hoodie dataset", required = true)
+    public String targetPath = null;
+    @Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
+    public String tableName = null;
+    @Parameter(names = {"--table-type", "-tt"}, description = "Table type", required = true)
+    public String tableType = null;
+    @Parameter(names = {"--row-key-field",
+        "-rk"}, description = "Row key field name", required = true)
+    public String rowKey = null;
+    @Parameter(names = {"--partition-key-field",
+        "-pk"}, description = "Partition key field name", required = true)
+    public String partitionKey = null;
+    @Parameter(names = {"--parallelism",
+        "-pl"}, description = "Parallelism for hoodie insert", required = true)
+    public int parallelism = 1;
+    @Parameter(names = {"--schema-file",
+        "-sf"}, description = "path for Avro schema file", required = true)
+    public String schemaFile = null;
+    @Parameter(names = {"--format",
+        "-f"}, description = "Format for the input data.", required = false,
+        validateValueWith = FormatValidator.class)
+    public String format = null;
+    @Parameter(names = {"--spark-master",
+        "-ms"}, description = "Spark master", required = false)
+    public String sparkMaster = null;
+    @Parameter(names = {"--spark-memory",
+        "-sm"}, description = "spark memory to use", required = true)
+    public String sparkMemory = null;
+    @Parameter(names = {"--retry",
+        "-rt"}, description = "number of retries", required = false)
+    public int retry = 0;
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+  }
+
+  public static void main(String args[]) throws Exception {
+    final HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
+    JCommander cmd = new JCommander(cfg, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+    HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
+    dataImporter.dataImport(dataImporter.getSparkContext(), cfg.retry);
+  }
+
+  private JavaSparkContext getSparkContext() {
+    SparkConf sparkConf = new SparkConf().setAppName("hoodie-data-importer-" + cfg.tableName);
+    sparkConf.setMaster(cfg.sparkMaster);
+
+    if (cfg.sparkMaster.startsWith("yarn")) {
+      sparkConf.set("spark.eventLog.overwrite", "true");
+      sparkConf.set("spark.eventLog.enabled", "true");
     }
 
-    public static class FormatValidator implements IValueValidator<String> {
-        List<String> validFormats = Arrays.asList("parquet");
+    sparkConf.set("spark.driver.maxResultSize", "2g");
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    sparkConf.set("spark.executor.memory", cfg.sparkMemory);
 
+    // Configure hadoop conf
+    sparkConf.set("spark.hadoop.mapred.output.compress", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec",
+        "org.apache.hadoop.io.compress.GzipCodec");
+    sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
+
+    sparkConf = HoodieWriteClient.registerClasses(sparkConf);
+    return new JavaSparkContext(sparkConf);
+  }
+
+  private String getSchema() throws Exception {
+    // Read schema file.
+    Path p = new Path(cfg.schemaFile);
+    if (!fs.exists(p)) {
+      throw new Exception(
+          String.format("Could not find - %s - schema file.", cfg.schemaFile));
+    }
+    long len = fs.getFileStatus(p).getLen();
+    ByteBuffer buf = ByteBuffer.allocate((int) len);
+    FSDataInputStream inputStream = null;
+    try {
+      inputStream = fs.open(p);
+      inputStream.readFully(0, buf.array(), 0, buf.array().length);
+    } finally {
+      if (inputStream != null) {
+        inputStream.close();
+      }
+    }
+    return new String(buf.array());
+  }
+
+  public int dataImport(JavaSparkContext jsc, int retry) throws Exception {
+    int ret = -1;
+    try {
+      // Verify that targetPath is not present.
+      if (fs.exists(new Path(cfg.targetPath))) {
+        throw new HoodieIOException(
+            String.format("Make sure %s is not present.", cfg.targetPath));
+      }
+      do {
+        ret = dataImport(jsc);
+      } while (ret != 0 && retry-- > 0);
+    } catch (Throwable t) {
+      logger.error(t);
+    }
+    return ret;
+  }
+
+  @VisibleForTesting
+  protected int dataImport(JavaSparkContext jsc) throws IOException {
+    try {
+      if (fs.exists(new Path(cfg.targetPath))) {
+        // cleanup target directory.
+        fs.delete(new Path(cfg.targetPath), true);
+      }
+
+      //Get schema.
+      String schemaStr = getSchema();
+
+      // Initialize target hoodie table.
+      Properties properties = new Properties();
+      properties.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, cfg.tableName);
+      properties.put(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, cfg.tableType);
+      HoodieTableMetaClient.initializePathAsHoodieDataset(fs, cfg.targetPath, properties);
+
+      HoodieWriteClient client = createHoodieClient(jsc, cfg.targetPath, schemaStr,
+          cfg.parallelism);
+
+      Job job = Job.getInstance(jsc.hadoopConfiguration());
+      // To parallelize reading file status.
+      job.getConfiguration().set(FileInputFormat.LIST_STATUS_NUM_THREADS, "1024");
+      AvroReadSupport.setAvroReadSchema(jsc.hadoopConfiguration(),
+          (new Schema.Parser().parse(schemaStr)));
+      ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
+
+      JavaRDD<HoodieRecord<HoodieJsonPayload>> hoodieRecords = jsc
+          .newAPIHadoopFile(cfg.srcPath, ParquetInputFormat.class, Void.class,
+              GenericRecord.class, job.getConfiguration())
+          // To reduce large number of tasks.
+          .coalesce(16 * cfg.parallelism)
+          .map(new Function<Tuple2<Void, GenericRecord>, HoodieRecord<HoodieJsonPayload>>() {
+                 @Override
+                 public HoodieRecord<HoodieJsonPayload> call(Tuple2<Void, GenericRecord> entry)
+                     throws Exception {
+                   GenericRecord genericRecord = entry._2();
+                   Object partitionField = genericRecord.get(cfg.partitionKey);
+                   if (partitionField == null) {
+                     throw new HoodieIOException(
+                         "partition key is missing. :" + cfg.partitionKey);
+                   }
+                   Object rowField = genericRecord.get(cfg.rowKey);
+                   if (rowField == null) {
+                     throw new HoodieIOException(
+                         "row field is missing. :" + cfg.rowKey);
+                   }
+                   long ts = (long) ((Double) partitionField * 1000l);
+                   String partitionPath = PARTITION_FORMATTER.format(new Date(ts));
+                   return new HoodieRecord<HoodieJsonPayload>(
+                       new HoodieKey((String) rowField, partitionPath),
+                       new HoodieJsonPayload(genericRecord.toString()));
+                 }
+               }
+          );
+      // Get commit time.
+      String commitTime = client.startCommit();
+
+      JavaRDD<WriteStatus> writeResponse = client.bulkInsert(hoodieRecords, commitTime);
+      Accumulator<Integer> errors = jsc.accumulator(0);
+      writeResponse.foreach(new VoidFunction<WriteStatus>() {
         @Override
-        public void validate(String name, String value) throws ParameterException {
-            if (value == null || !validFormats.contains(value)) {
-                throw new ParameterException(String
-                    .format("Invalid format type: value:%s: supported formats:%s", value,
-                        validFormats));
-            }
+        public void call(WriteStatus writeStatus) throws Exception {
+          if (writeStatus.hasErrors()) {
+            errors.add(1);
+            logger.error(String.format("Error processing records :writeStatus:%s",
+                writeStatus.getStat().toString()));
+          }
         }
+      });
+      if (errors.value() == 0) {
+        logger.info(String
+            .format("Dataset imported into hoodie dataset with %s commit time.",
+                commitTime));
+        return 0;
+      }
+      logger.error(String.format("Import failed with %d errors.", errors.value()));
+    } catch (Throwable t) {
+      logger.error("Error occurred.", t);
     }
+    return -1;
+  }
 
-    public static class SourceTypeValidator implements IValueValidator<String> {
-        List<String> validSourceTypes = Arrays.asList("hdfs");
-
-        @Override
-        public void validate(String name, String value) throws ParameterException {
-            if (value == null || !validSourceTypes.contains(value)) {
-                throw new ParameterException(String
-                    .format("Invalid source type: value:%s: supported source types:%s", value,
-                        validSourceTypes));
-            }
-        }
-    }
-
-    public static class Config implements Serializable {
-
-        @Parameter(names = {"--src-path",
-            "-sp"}, description = "Base path for the input dataset", required = true)
-        public String srcPath = null;
-        @Parameter(names = {"--src-type",
-            "-st"}, description = "Source type for the input dataset", required = true,
-            validateValueWith = SourceTypeValidator.class)
-        public String srcType = null;
-        @Parameter(names = {"--target-path",
-            "-tp"}, description = "Base path for the target hoodie dataset", required = true)
-        public String targetPath = null;
-        @Parameter(names = {"--table-name", "-tn"}, description = "Table name", required = true)
-        public String tableName = null;
-        @Parameter(names = {"--table-type", "-tt"}, description = "Table type", required = true)
-        public String tableType = null;
-        @Parameter(names = {"--row-key-field",
-            "-rk"}, description = "Row key field name", required = true)
-        public String rowKey = null;
-        @Parameter(names = {"--partition-key-field",
-            "-pk"}, description = "Partition key field name", required = true)
-        public String partitionKey = null;
-        @Parameter(names = {"--parallelism",
-            "-pl"}, description = "Parallelism for hoodie insert", required = true)
-        public int parallelism = 1;
-        @Parameter(names = {"--schema-file",
-            "-sf"}, description = "path for Avro schema file", required = true)
-        public String schemaFile = null;
-        @Parameter(names = {"--format",
-            "-f"}, description = "Format for the input data.", required = false,
-            validateValueWith = FormatValidator.class)
-        public String format = null;
-        @Parameter(names = {"--spark-master",
-            "-ms"}, description = "Spark master", required = false)
-        public String sparkMaster = null;
-        @Parameter(names = {"--spark-memory",
-            "-sm"}, description = "spark memory to use", required = true)
-        public String sparkMemory = null;
-        @Parameter(names = {"--retry",
-            "-rt"}, description = "number of retries", required = false)
-        public int retry = 0;
-        @Parameter(names = {"--help", "-h"}, help = true)
-        public Boolean help = false;
-    }
-
-    public static void main(String args[]) throws Exception {
-        final HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
-        JCommander cmd = new JCommander(cfg, args);
-        if (cfg.help || args.length == 0) {
-            cmd.usage();
-            System.exit(1);
-        }
-        HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
-        dataImporter.dataImport(dataImporter.getSparkContext(), cfg.retry);
-    }
-
-    private JavaSparkContext getSparkContext() {
-        SparkConf sparkConf = new SparkConf().setAppName("hoodie-data-importer-" + cfg.tableName);
-        sparkConf.setMaster(cfg.sparkMaster);
-
-        if (cfg.sparkMaster.startsWith("yarn")) {
-            sparkConf.set("spark.eventLog.overwrite", "true");
-            sparkConf.set("spark.eventLog.enabled", "true");
-        }
-
-        sparkConf.set("spark.driver.maxResultSize", "2g");
-        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-        sparkConf.set("spark.executor.memory", cfg.sparkMemory);
-
-        // Configure hadoop conf
-        sparkConf.set("spark.hadoop.mapred.output.compress", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec",
-            "org.apache.hadoop.io.compress.GzipCodec");
-        sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
-
-        sparkConf = HoodieWriteClient.registerClasses(sparkConf);
-        return new JavaSparkContext(sparkConf);
-    }
-
-    private String getSchema() throws Exception {
-        // Read schema file.
-        Path p = new Path(cfg.schemaFile);
-        if (!fs.exists(p)) {
-            throw new Exception(
-                String.format("Could not find - %s - schema file.", cfg.schemaFile));
-        }
-        long len = fs.getFileStatus(p).getLen();
-        ByteBuffer buf = ByteBuffer.allocate((int) len);
-        FSDataInputStream inputStream = null;
-        try {
-            inputStream = fs.open(p);
-            inputStream.readFully(0, buf.array(), 0, buf.array().length);
-        }
-        finally {
-            if (inputStream != null)
-                inputStream.close();
-        }
-        return new String(buf.array());
-    }
-
-    public int dataImport(JavaSparkContext jsc, int retry) throws Exception {
-        int ret = -1;
-        try {
-            // Verify that targetPath is not present.
-            if (fs.exists(new Path(cfg.targetPath))) {
-                throw new HoodieIOException(
-                    String.format("Make sure %s is not present.", cfg.targetPath));
-            }
-            do {
-                ret = dataImport(jsc);
-            } while (ret != 0 && retry-- > 0);
-        } catch (Throwable t) {
-            logger.error(t);
-        }
-        return ret;
-    }
-
-    @VisibleForTesting
-    protected int dataImport(JavaSparkContext jsc) throws IOException {
-        try {
-            if (fs.exists(new Path(cfg.targetPath))) {
-                // cleanup target directory.
-                fs.delete(new Path(cfg.targetPath), true);
-            }
-
-            //Get schema.
-            String schemaStr = getSchema();
-
-            // Initialize target hoodie table.
-            Properties properties = new Properties();
-            properties.put(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, cfg.tableName);
-            properties.put(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, cfg.tableType);
-            HoodieTableMetaClient.initializePathAsHoodieDataset(fs, cfg.targetPath, properties);
-
-            HoodieWriteClient client = createHoodieClient(jsc, cfg.targetPath, schemaStr,
-                cfg.parallelism);
-
-            Job job = Job.getInstance(jsc.hadoopConfiguration());
-            // To parallelize reading file status.
-            job.getConfiguration().set(FileInputFormat.LIST_STATUS_NUM_THREADS, "1024");
-            AvroReadSupport.setAvroReadSchema(jsc.hadoopConfiguration(),
-                (new Schema.Parser().parse(schemaStr)));
-            ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));
-
-            JavaRDD<HoodieRecord<HoodieJsonPayload>> hoodieRecords = jsc
-                .newAPIHadoopFile(cfg.srcPath, ParquetInputFormat.class, Void.class,
-                    GenericRecord.class, job.getConfiguration())
-                // To reduce large number of tasks.
-                .coalesce(16 * cfg.parallelism)
-                .map(new Function<Tuple2<Void, GenericRecord>, HoodieRecord<HoodieJsonPayload>>() {
-                         @Override
-                         public HoodieRecord<HoodieJsonPayload> call(Tuple2<Void, GenericRecord> entry)
-                             throws Exception {
-                             GenericRecord genericRecord = entry._2();
-                             Object partitionField = genericRecord.get(cfg.partitionKey);
-                             if (partitionField == null) {
-                                 throw new HoodieIOException(
-                                     "partition key is missing. :" + cfg.partitionKey);
-                             }
-                             Object rowField = genericRecord.get(cfg.rowKey);
-                             if (rowField == null) {
-                                 throw new HoodieIOException(
-                                     "row field is missing. :" + cfg.rowKey);
-                             }
-                             long ts = (long) ((Double) partitionField * 1000l);
-                             String partitionPath = PARTITION_FORMATTER.format(new Date(ts));
-                             return new HoodieRecord<HoodieJsonPayload>(
-                                 new HoodieKey((String) rowField, partitionPath),
-                                 new HoodieJsonPayload(genericRecord.toString()));
-                         }
-                     }
-                );
-            // Get commit time.
-            String commitTime = client.startCommit();
-
-            JavaRDD<WriteStatus> writeResponse = client.bulkInsert(hoodieRecords, commitTime);
-            Accumulator<Integer> errors = jsc.accumulator(0);
-            writeResponse.foreach(new VoidFunction<WriteStatus>() {
-                @Override
-                public void call(WriteStatus writeStatus) throws Exception {
-                    if (writeStatus.hasErrors()) {
-                        errors.add(1);
-                        logger.error(String.format("Error processing records :writeStatus:%s",
-                            writeStatus.getStat().toString()));
-                    }
-                }
-            });
-            if (errors.value() == 0) {
-                logger.info(String
-                    .format("Dataset imported into hoodie dataset with %s commit time.",
-                        commitTime));
-                return 0;
-            }
-            logger.error(String.format("Import failed with %d errors.", errors.value()));
-        } catch (Throwable t) {
-            logger.error("Error occurred.", t);
-        }
-        return -1;
-    }
-
-    private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath,
-        String schemaStr, int parallelism) throws Exception {
-        HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withParallelism(parallelism, parallelism).withSchema(schemaStr)
-            .combineInput(true, true).withIndexConfig(
-                HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-            .build();
-        return new HoodieWriteClient(jsc, config);
-    }
+  private static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath,
+      String schemaStr, int parallelism) throws Exception {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withParallelism(parallelism, parallelism).withSchema(schemaStr)
+        .combineInput(true, true).withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .build();
+    return new HoodieWriteClient(jsc, config);
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HiveIncrementalPuller.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HiveIncrementalPuller.java
index 7c6230d5b..61aec29fb 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HiveIncrementalPuller.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HiveIncrementalPuller.java
@@ -25,19 +25,6 @@ import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.utilities.exception.HoodieIncrementalPullException;
 import com.uber.hoodie.utilities.exception.HoodieIncrementalPullSQLException;
-
-import org.apache.commons.dbcp.BasicDataSource;
-import org.apache.commons.io.IOUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsAction;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.stringtemplate.v4.ST;
-
-import javax.sql.DataSource;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -50,301 +37,343 @@ import java.util.List;
 import java.util.Optional;
 import java.util.Scanner;
 import java.util.stream.Collectors;
+import javax.sql.DataSource;
+import org.apache.commons.dbcp.BasicDataSource;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.stringtemplate.v4.ST;
 
 /**
- * Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as another hive temporary table.
+ * Utility to pull data after a given commit, based on the supplied HiveQL and save the delta as
+ * another hive temporary table.
  *
  * Current Limitations:
  *
- * - Only the source table can be incrementally pulled (usually the largest table)
- * - The incrementally pulled table can't be referenced more than once.
+ * - Only the source table can be incrementally pulled (usually the largest table) - The
+ * incrementally pulled table can't be referenced more than once.
  */
 public class HiveIncrementalPuller {
 
-    private static Logger log = LogManager.getLogger(HiveIncrementalPuller.class);
-    private static String driverName = "org.apache.hive.jdbc.HiveDriver";
+  private static Logger log = LogManager.getLogger(HiveIncrementalPuller.class);
+  private static String driverName = "org.apache.hive.jdbc.HiveDriver";
 
-    public static class Config implements Serializable {
-        @Parameter(names = {"--hiveUrl"}) public String hiveJDBCUrl =
-                "jdbc:hive2://localhost:10014/;transportMode=http;httpPath=hs2";
-        @Parameter(names = {"--hiveUser"}) public String hiveUsername = "hive";
-        @Parameter(names = {"--hivePass"}) public String hivePassword = "";
-        @Parameter(names = {"--queue"}) public String yarnQueueName = "hadoop-queue";
-        @Parameter(names = {"--tmp"}) public String hoodieTmpDir = "/app/hoodie/intermediate";
-        @Parameter(names = {"--extractSQLFile"}, required = true) public String incrementalSQLFile;
-        @Parameter(names = {"--sourceDb"}, required = true) public String sourceDb;
-        @Parameter(names = {"--sourceTable"}, required = true) public String sourceTable;
-        @Parameter(names = {"--targetDb"}) public String targetDb;
-        @Parameter(names = {"--targetTable"}, required = true) public String targetTable;
-        @Parameter(names = {"--tmpdb"}) public String tmpDb = "tmp";
-        @Parameter(names = {"--fromCommitTime"}) public String fromCommitTime;
-        @Parameter(names = {"--maxCommits"}) public int maxCommits = 3;
-        @Parameter(names = {"--help", "-h"}, help = true) public Boolean help = false;
-        @Parameter(names = {"--storageFormat"}) public String tempTableStorageFormat = "AVRO";
+  public static class Config implements Serializable {
+
+    @Parameter(names = {"--hiveUrl"})
+    public String hiveJDBCUrl =
+        "jdbc:hive2://localhost:10014/;transportMode=http;httpPath=hs2";
+    @Parameter(names = {"--hiveUser"})
+    public String hiveUsername = "hive";
+    @Parameter(names = {"--hivePass"})
+    public String hivePassword = "";
+    @Parameter(names = {"--queue"})
+    public String yarnQueueName = "hadoop-queue";
+    @Parameter(names = {"--tmp"})
+    public String hoodieTmpDir = "/app/hoodie/intermediate";
+    @Parameter(names = {"--extractSQLFile"}, required = true)
+    public String incrementalSQLFile;
+    @Parameter(names = {"--sourceDb"}, required = true)
+    public String sourceDb;
+    @Parameter(names = {"--sourceTable"}, required = true)
+    public String sourceTable;
+    @Parameter(names = {"--targetDb"})
+    public String targetDb;
+    @Parameter(names = {"--targetTable"}, required = true)
+    public String targetTable;
+    @Parameter(names = {"--tmpdb"})
+    public String tmpDb = "tmp";
+    @Parameter(names = {"--fromCommitTime"})
+    public String fromCommitTime;
+    @Parameter(names = {"--maxCommits"})
+    public int maxCommits = 3;
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+    @Parameter(names = {"--storageFormat"})
+    public String tempTableStorageFormat = "AVRO";
+  }
+
+  static {
+    try {
+      Class.forName(driverName);
+    } catch (ClassNotFoundException e) {
+      throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
     }
+  }
 
-    static {
-        try {
-            Class.forName(driverName);
-        } catch (ClassNotFoundException e) {
-            throw new IllegalStateException("Could not find " + driverName + " in classpath. ", e);
+  private Connection connection;
+  protected final Config config;
+  private final ST incrementalPullSQLtemplate;
+
+  public HiveIncrementalPuller(Config config) throws IOException {
+    this.config = config;
+    validateConfig(config);
+    String templateContent = IOUtils
+        .toString(this.getClass().getResourceAsStream("IncrementalPull.sqltemplate"));
+    incrementalPullSQLtemplate = new ST(templateContent);
+  }
+
+  private void validateConfig(Config config) {
+    if (config.maxCommits == -1) {
+      config.maxCommits = Integer.MAX_VALUE;
+    }
+  }
+
+  public void saveDelta() throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    Statement stmt = null;
+    try {
+      if (config.fromCommitTime == null) {
+        config.fromCommitTime = inferCommitTime(fs);
+        log.info("FromCommitTime inferred as " + config.fromCommitTime);
+      }
+
+      log.info("FromCommitTime - " + config.fromCommitTime);
+      String sourceTableLocation = getTableLocation(config.sourceDb, config.sourceTable);
+      String lastCommitTime = getLastCommitTimePulled(fs, sourceTableLocation);
+      if (lastCommitTime == null) {
+        log.info("Nothing to pull. However we will continue to create a empty table");
+        lastCommitTime = config.fromCommitTime;
+      }
+
+      Connection conn = getConnection();
+      stmt = conn.createStatement();
+      // drop the temp table if exists
+      String tempDbTable = config.tmpDb + "." + config.targetTable + "__" + config.sourceTable;
+      String tempDbTablePath =
+          config.hoodieTmpDir + "/" + config.targetTable + "__" + config.sourceTable + "/"
+              + lastCommitTime;
+      executeStatement("drop table " + tempDbTable, stmt);
+      deleteHDFSPath(fs, tempDbTablePath);
+      if (!ensureTempPathExists(fs, lastCommitTime)) {
+        throw new IllegalStateException(
+            "Could not create target path at " + new Path(config.hoodieTmpDir,
+                config.targetTable + "/" + lastCommitTime));
+      }
+
+      initHiveBeelineProperties(stmt);
+      executeIncrementalSQL(tempDbTable, tempDbTablePath, stmt);
+      log.info("Finished HoodieReader execution");
+    } catch (SQLException e) {
+      log.error("Exception when executing SQL", e);
+      throw new IOException("Could not scan " + config.sourceTable + " incrementally", e);
+    } finally {
+      try {
+        if (stmt != null) {
+          stmt.close();
         }
+      } catch (SQLException e) {
+        log.error("Could not close the resultset opened ", e);
+      }
+    }
+  }
+
+  private void executeIncrementalSQL(String tempDbTable, String tempDbTablePath, Statement stmt)
+      throws FileNotFoundException, SQLException {
+    incrementalPullSQLtemplate.add("tempDbTable", tempDbTable);
+    incrementalPullSQLtemplate.add("tempDbTablePath", tempDbTablePath);
+
+    String storedAsClause = getStoredAsClause();
+
+    incrementalPullSQLtemplate.add("storedAsClause", storedAsClause);
+    String incrementalSQL =
+        new Scanner(new File(config.incrementalSQLFile)).useDelimiter("\\Z").next();
+    if (!incrementalSQL.contains(config.sourceDb + "." + config.sourceTable)) {
+      log.info("Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable
+          + ", which means its pulling from a different table. Fencing this from happening.");
+      throw new HoodieIncrementalPullSQLException(
+          "Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable);
+    }
+    if (!incrementalSQL.contains("`_hoodie_commit_time` > '%targetBasePath'")) {
+      log.info("Incremental SQL : " + incrementalSQL
+          + " does not contain `_hoodie_commit_time` > '%targetBasePath'. Please add this clause for incremental to work properly.");
+      throw new HoodieIncrementalPullSQLException(
+          "Incremental SQL does not have clause `_hoodie_commit_time` > '%targetBasePath', which means its not pulling incrementally");
     }
 
-    private Connection connection;
-    protected final Config config;
-    private final ST incrementalPullSQLtemplate;
+    incrementalPullSQLtemplate
+        .add("incrementalSQL", String.format(incrementalSQL, config.fromCommitTime));
+    String sql = incrementalPullSQLtemplate.render();
+    // Check if the SQL is pulling from the right database
+    executeStatement(sql, stmt);
+  }
 
-    public HiveIncrementalPuller(Config config) throws IOException {
-        this.config = config;
-        validateConfig(config);
-        String templateContent = IOUtils.toString(this.getClass().getResourceAsStream("IncrementalPull.sqltemplate"));
-        incrementalPullSQLtemplate = new ST(templateContent);
+  private String getStoredAsClause() {
+    if (config.tempTableStorageFormat.equalsIgnoreCase("json")) {
+      // Special case for json
+      // default json serde does not support having same key even if its under multiple depths
+      return "ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE";
     }
+    return "STORED AS " + config.tempTableStorageFormat;
+  }
 
-    private void validateConfig(Config config) {
-        if(config.maxCommits == -1) {
-            config.maxCommits = Integer.MAX_VALUE;
+  private void initHiveBeelineProperties(Statement stmt) throws SQLException {
+    log.info("Setting up Hive JDBC Session with properties");
+    // set the queue
+    executeStatement("set mapred.job.queue.name=" + config.yarnQueueName, stmt);
+    // Set the inputformat to HoodieCombineHiveInputFormat
+    executeStatement(
+        "set hive.input.format=com.uber.hoodie.hadoop.hive.HoodieCombineHiveInputFormat", stmt);
+    // Allow queries without partition predicate
+    executeStatement("set hive.strict.checks.large.query=false", stmt);
+    // Dont gather stats for the table created
+    executeStatement("set hive.stats.autogather=false", stmt);
+    // Set the hoodie modie
+    executeStatement("set hoodie." + config.sourceTable + ".consume.mode=INCREMENTAL", stmt);
+    // Set the from commit time
+    executeStatement("set hoodie." + config.sourceTable + ".consume.start.timestamp="
+        + config.fromCommitTime, stmt);
+    // Set number of commits to pull
+    executeStatement("set hoodie." + config.sourceTable + ".consume.max.commits=" + String
+        .valueOf(config.maxCommits), stmt);
+  }
+
+  private boolean deleteHDFSPath(FileSystem fs, String path) throws IOException {
+    log.info("Deleting path " + path);
+    return fs.delete(new Path(path), true);
+  }
+
+  private void executeStatement(String sql, Statement stmt) throws SQLException {
+    log.info("Executing: " + sql);
+    stmt.execute(sql);
+  }
+
+  private String inferCommitTime(FileSystem fs) throws SQLException, IOException {
+    log.info("FromCommitTime not specified. Trying to infer it from Hoodie dataset "
+        + config.targetDb + "." + config.targetTable);
+    String targetDataLocation = getTableLocation(config.targetDb, config.targetTable);
+    return scanForCommitTime(fs, targetDataLocation);
+  }
+
+  private String getTableLocation(String db, String table) throws SQLException {
+    ResultSet resultSet = null;
+    Statement stmt = null;
+    try {
+      Connection conn = getConnection();
+      stmt = conn.createStatement();
+      resultSet = stmt.executeQuery("describe formatted `" + db + "." + table + "`");
+      while (resultSet.next()) {
+        if (resultSet.getString(1).trim().equals("Location:")) {
+          log.info("Inferred table location for " + db + "." + table + " as " + resultSet
+              .getString(2));
+          return resultSet.getString(2);
         }
-    }
-
-    public void saveDelta() throws IOException {
-        Configuration conf = new Configuration();
-        FileSystem fs = FileSystem.get(conf);
-        Statement stmt = null;
-        try {
-            if (config.fromCommitTime == null) {
-                config.fromCommitTime = inferCommitTime(fs);
-                log.info("FromCommitTime inferred as " + config.fromCommitTime);
-            }
-
-            log.info("FromCommitTime - " + config.fromCommitTime);
-            String sourceTableLocation = getTableLocation(config.sourceDb, config.sourceTable);
-            String lastCommitTime = getLastCommitTimePulled(fs, sourceTableLocation);
-            if (lastCommitTime == null) {
-                log.info("Nothing to pull. However we will continue to create a empty table");
-                lastCommitTime = config.fromCommitTime;
-            }
-
-            Connection conn = getConnection();
-            stmt = conn.createStatement();
-            // drop the temp table if exists
-            String tempDbTable = config.tmpDb + "." + config.targetTable + "__" + config.sourceTable;
-            String tempDbTablePath = config.hoodieTmpDir + "/" + config.targetTable + "__" + config.sourceTable + "/" + lastCommitTime;
-            executeStatement("drop table " + tempDbTable, stmt);
-            deleteHDFSPath(fs, tempDbTablePath);
-            if (!ensureTempPathExists(fs, lastCommitTime)) {
-                throw new IllegalStateException(
-                    "Could not create target path at " + new Path(config.hoodieTmpDir,
-                        config.targetTable + "/" + lastCommitTime));
-            }
-
-            initHiveBeelineProperties(stmt);
-            executeIncrementalSQL(tempDbTable, tempDbTablePath, stmt);
-            log.info("Finished HoodieReader execution");
-        } catch (SQLException e) {
-            log.error("Exception when executing SQL", e);
-            throw new IOException("Could not scan " + config.sourceTable + " incrementally", e);
-        } finally {
-            try {
-                if (stmt != null)
-                    stmt.close();
-            } catch (SQLException e) {
-                log.error("Could not close the resultset opened ", e);
-            }
+      }
+    } catch (SQLException e) {
+      throw new HoodieIncrementalPullException(
+          "Failed to get data location for table " + db + "." + table, e);
+    } finally {
+      try {
+        if (stmt != null) {
+          stmt.close();
         }
-    }
-
-    private void executeIncrementalSQL(String tempDbTable, String tempDbTablePath, Statement stmt)
-        throws FileNotFoundException, SQLException {
-        incrementalPullSQLtemplate.add("tempDbTable", tempDbTable);
-        incrementalPullSQLtemplate.add("tempDbTablePath", tempDbTablePath);
-
-        String storedAsClause = getStoredAsClause();
-
-        incrementalPullSQLtemplate.add("storedAsClause", storedAsClause);
-        String incrementalSQL =
-            new Scanner(new File(config.incrementalSQLFile)).useDelimiter("\\Z").next();
-        if (!incrementalSQL.contains(config.sourceDb + "." + config.sourceTable)) {
-            log.info("Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable
-                + ", which means its pulling from a different table. Fencing this from happening.");
-            throw new HoodieIncrementalPullSQLException(
-                "Incremental SQL does not have " + config.sourceDb + "." + config.sourceTable);
-        }
-        if (!incrementalSQL.contains("`_hoodie_commit_time` > '%targetBasePath'")) {
-            log.info("Incremental SQL : " + incrementalSQL
-                + " does not contain `_hoodie_commit_time` > '%targetBasePath'. Please add this clause for incremental to work properly.");
-            throw new HoodieIncrementalPullSQLException(
-                "Incremental SQL does not have clause `_hoodie_commit_time` > '%targetBasePath', which means its not pulling incrementally");
+        if (resultSet != null) {
+          resultSet.close();
         }
+      } catch (SQLException e) {
+        log.error("Could not close the resultset opened ", e);
+      }
+    }
+    return null;
+  }
 
-        incrementalPullSQLtemplate
-            .add("incrementalSQL", String.format(incrementalSQL, config.fromCommitTime));
-        String sql = incrementalPullSQLtemplate.render();
-        // Check if the SQL is pulling from the right database
-        executeStatement(sql, stmt);
+  private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException {
+    if (targetDataPath == null) {
+      throw new IllegalArgumentException(
+          "Please specify either --fromCommitTime or --targetDataPath");
+    }
+    if (!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) {
+      return "0";
+    }
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, targetDataPath);
+
+    Optional<HoodieInstant>
+        lastCommit = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants().lastInstant();
+    if (lastCommit.isPresent()) {
+      return lastCommit.get().getTimestamp();
+    }
+    return "0";
+  }
+
+  private boolean ensureTempPathExists(FileSystem fs, String lastCommitTime)
+      throws IOException {
+    Path targetBaseDirPath = new Path(config.hoodieTmpDir,
+        config.targetTable + "__" + config.sourceTable);
+    if (!fs.exists(targetBaseDirPath)) {
+      log.info("Creating " + targetBaseDirPath + " with permission drwxrwxrwx");
+      boolean result = FileSystem.mkdirs(fs, targetBaseDirPath,
+          new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
+      if (!result) {
+        throw new HoodieException(
+            "Could not create " + targetBaseDirPath + " with the required permissions");
+      }
     }
 
-    private String getStoredAsClause() {
-        if(config.tempTableStorageFormat.equalsIgnoreCase("json")) {
-            // Special case for json
-            // default json serde does not support having same key even if its under multiple depths
-            return "ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe' STORED AS TEXTFILE";
-        }
-        return "STORED AS " + config.tempTableStorageFormat;
+    Path targetPath = new Path(targetBaseDirPath, lastCommitTime);
+    if (fs.exists(targetPath)) {
+      boolean result = fs.delete(targetPath, true);
+      if (!result) {
+        throw new HoodieException(
+            "Could not delete existing " + targetPath);
+      }
     }
+    log.info("Creating " + targetPath + " with permission drwxrwxrwx");
+    return FileSystem.mkdirs(fs, targetBaseDirPath,
+        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
+  }
 
-    private void initHiveBeelineProperties(Statement stmt) throws SQLException {
-        log.info("Setting up Hive JDBC Session with properties");
-        // set the queue
-        executeStatement("set mapred.job.queue.name=" + config.yarnQueueName, stmt);
-        // Set the inputformat to HoodieCombineHiveInputFormat
-        executeStatement("set hive.input.format=com.uber.hoodie.hadoop.hive.HoodieCombineHiveInputFormat", stmt);
-        // Allow queries without partition predicate
-        executeStatement("set hive.strict.checks.large.query=false", stmt);
-        // Dont gather stats for the table created
-        executeStatement("set hive.stats.autogather=false", stmt);
-        // Set the hoodie modie
-        executeStatement("set hoodie." + config.sourceTable + ".consume.mode=INCREMENTAL", stmt);
-        // Set the from commit time
-        executeStatement("set hoodie." + config.sourceTable + ".consume.start.timestamp="
-            + config.fromCommitTime, stmt);
-        // Set number of commits to pull
-        executeStatement("set hoodie." + config.sourceTable + ".consume.max.commits=" + String
-            .valueOf(config.maxCommits), stmt);
+  private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation)
+      throws IOException {
+    HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, sourceTableLocation);
+    List<String> commitsToSync = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline()
+        .filterCompletedInstants()
+        .findInstantsAfter(config.fromCommitTime, config.maxCommits).getInstants()
+        .map(HoodieInstant::getTimestamp)
+        .collect(Collectors.toList());
+    if (commitsToSync.isEmpty()) {
+      log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata
+          .getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants()
+          .getInstants()
+          .collect(Collectors.toList()) + " and from commit time is "
+          + config.fromCommitTime);
+      return null;
     }
+    log.info("Syncing commits " + commitsToSync);
+    return commitsToSync.get(commitsToSync.size() - 1);
+  }
 
-    private boolean deleteHDFSPath(FileSystem fs, String path) throws IOException {
-        log.info("Deleting path " + path);
-        return fs.delete(new Path(path), true);
+  private Connection getConnection() throws SQLException {
+    if (connection == null) {
+      DataSource ds = getDatasource();
+      log.info("Getting Hive Connection from Datasource " + ds);
+      this.connection = ds.getConnection();
     }
+    return connection;
+  }
 
-    private void executeStatement(String sql, Statement stmt) throws SQLException {
-        log.info("Executing: " + sql);
-        stmt.execute(sql);
-    }
-
-    private String inferCommitTime(FileSystem fs) throws SQLException, IOException {
-        log.info("FromCommitTime not specified. Trying to infer it from Hoodie dataset "
-            + config.targetDb + "." + config.targetTable);
-        String targetDataLocation = getTableLocation(config.targetDb, config.targetTable);
-        return scanForCommitTime(fs, targetDataLocation);
-    }
-
-    private String getTableLocation(String db, String table) throws SQLException {
-        ResultSet resultSet = null;
-        Statement stmt = null;
-        try {
-            Connection conn = getConnection();
-            stmt = conn.createStatement();
-            resultSet = stmt.executeQuery("describe formatted `" + db + "." + table + "`");
-            while (resultSet.next()) {
-                if (resultSet.getString(1).trim().equals("Location:")) {
-                    log.info("Inferred table location for " + db + "." + table + " as " + resultSet
-                        .getString(2));
-                    return resultSet.getString(2);
-                }
-            }
-        } catch (SQLException e) {
-            throw new HoodieIncrementalPullException(
-                "Failed to get data location for table " + db + "." + table, e);
-        } finally {
-            try {
-                if (stmt != null)
-                    stmt.close();
-                if (resultSet != null)
-                    resultSet.close();
-            } catch (SQLException e) {
-                log.error("Could not close the resultset opened ", e);
-            }
-        }
-        return null;
-    }
-
-    private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException {
-        if(targetDataPath == null) {
-            throw new IllegalArgumentException("Please specify either --fromCommitTime or --targetDataPath");
-        }
-        if(!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) {
-            return "0";
-        }
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, targetDataPath);
-
-        Optional<HoodieInstant>
-            lastCommit = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants().lastInstant();
-        if(lastCommit.isPresent()) {
-            return lastCommit.get().getTimestamp();
-        }
-        return "0";
-    }
-
-    private boolean ensureTempPathExists(FileSystem fs, String lastCommitTime)
-        throws IOException {
-        Path targetBaseDirPath = new Path(config.hoodieTmpDir, config.targetTable + "__" + config.sourceTable);
-        if(!fs.exists(targetBaseDirPath)) {
-            log.info("Creating " + targetBaseDirPath + " with permission drwxrwxrwx");
-            boolean result = FileSystem.mkdirs(fs, targetBaseDirPath,
-                new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
-            if (!result) {
-                throw new HoodieException(
-                    "Could not create " + targetBaseDirPath + " with the required permissions");
-            }
-        }
-
-        Path targetPath = new Path(targetBaseDirPath, lastCommitTime);
-        if(fs.exists(targetPath)) {
-            boolean result = fs.delete(targetPath, true);
-            if (!result) {
-                throw new HoodieException(
-                    "Could not delete existing " + targetPath);
-            }
-        }
-        log.info("Creating " + targetPath + " with permission drwxrwxrwx");
-        return FileSystem.mkdirs(fs, targetBaseDirPath,
-            new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
-    }
-
-    private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException {
-        HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs, sourceTableLocation);
-        List<String> commitsToSync = metadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants()
-            .findInstantsAfter(config.fromCommitTime, config.maxCommits).getInstants().map(HoodieInstant::getTimestamp)
-            .collect(Collectors.toList());
-        if (commitsToSync.isEmpty()) {
-            log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata
-                .getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants().getInstants()
-                .collect(Collectors.toList()) + " and from commit time is "
-                + config.fromCommitTime);
-            return null;
-        }
-        log.info("Syncing commits " + commitsToSync);
-        return commitsToSync.get(commitsToSync.size() - 1);
-    }
-
-    private Connection getConnection() throws SQLException {
-        if (connection == null) {
-            DataSource ds = getDatasource();
-            log.info("Getting Hive Connection from Datasource " + ds);
-            this.connection = ds.getConnection();
-        }
-        return connection;
-    }
-
-    private DataSource getDatasource() {
-        BasicDataSource ds = new BasicDataSource();
-        ds.setDriverClassName(driverName);
-        ds.setUrl(config.hiveJDBCUrl);
-        ds.setUsername(config.hiveUsername);
-        ds.setPassword(config.hivePassword);
-        return ds;
-    }
-
-    public static void main(String[] args) throws IOException {
-        final Config cfg = new Config();
-        JCommander cmd = new JCommander(cfg, args);
-        if (cfg.help || args.length == 0) {
-            cmd.usage();
-            System.exit(1);
-        }
-        new HiveIncrementalPuller(cfg).saveDelta();
+  private DataSource getDatasource() {
+    BasicDataSource ds = new BasicDataSource();
+    ds.setDriverClassName(driverName);
+    ds.setUrl(config.hiveJDBCUrl);
+    ds.setUsername(config.hiveUsername);
+    ds.setPassword(config.hivePassword);
+    return ds;
+  }
+
+  public static void main(String[] args) throws IOException {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
     }
+    new HiveIncrementalPuller(cfg).saveDelta();
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HoodieSnapshotCopier.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HoodieSnapshotCopier.java
index 9bb7869bd..0a74e2036 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HoodieSnapshotCopier.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/HoodieSnapshotCopier.java
@@ -20,7 +20,6 @@ package com.uber.hoodie.utilities;
 
 import com.beust.jcommander.JCommander;
 import com.beust.jcommander.Parameter;
-
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.table.HoodieTableConfig;
@@ -30,7 +29,12 @@ import com.uber.hoodie.common.table.TableFileSystemView;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.common.util.FSUtils;
-
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Stream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -39,140 +43,154 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
-
 import scala.Tuple2;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Stream;
-
 /**
- * Hoodie snapshot copy job which copies latest files from all partitions to another place, for snapshot backup.
+ * Hoodie snapshot copy job which copies latest files from all partitions to another place, for
+ * snapshot backup.
  */
 public class HoodieSnapshotCopier implements Serializable {
-    private static Logger logger = LogManager.getLogger(HoodieSnapshotCopier.class);
 
-    static class Config implements Serializable {
-        @Parameter(names = {"--base-path", "-bp"}, description = "Hoodie table base path", required = true)
-        String basePath = null;
+  private static Logger logger = LogManager.getLogger(HoodieSnapshotCopier.class);
 
-        @Parameter(names = {"--output-path", "-op"}, description = "The snapshot output path", required = true)
-        String outputPath = null;
+  static class Config implements Serializable {
 
-        @Parameter(names = {"--date-partitioned", "-dp"}, description = "Can we assume date partitioning?")
-        boolean shouldAssumeDatePartitioning = false;
+    @Parameter(names = {"--base-path",
+        "-bp"}, description = "Hoodie table base path", required = true)
+    String basePath = null;
+
+    @Parameter(names = {"--output-path",
+        "-op"}, description = "The snapshot output path", required = true)
+    String outputPath = null;
+
+    @Parameter(names = {"--date-partitioned",
+        "-dp"}, description = "Can we assume date partitioning?")
+    boolean shouldAssumeDatePartitioning = false;
+  }
+
+  public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir,
+      final boolean shouldAssumeDatePartitioning) throws IOException {
+    FileSystem fs = FSUtils.getFs();
+    final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs, baseDir);
+    final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(
+        tableMetadata,
+        tableMetadata.getActiveTimeline().getCommitsAndCompactionsTimeline()
+            .filterCompletedInstants());
+    // Get the latest commit
+    Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline()
+        .getCommitsAndCompactionsTimeline().filterCompletedInstants().lastInstant();
+    if (!latestCommit.isPresent()) {
+      logger.warn("No commits present. Nothing to snapshot");
+      return;
     }
+    final String latestCommitTimestamp = latestCommit.get().getTimestamp();
+    logger.info(String
+        .format("Starting to snapshot latest version files which are also no-late-than %s.",
+            latestCommitTimestamp));
 
-    public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException {
-        FileSystem fs = FSUtils.getFs();
-        final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs, baseDir);
-        final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata,
-            tableMetadata.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants());
-        // Get the latest commit
-        Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline()
-                .getCommitsAndCompactionsTimeline().filterCompletedInstants().lastInstant();
-        if(!latestCommit.isPresent()) {
-            logger.warn("No commits present. Nothing to snapshot");
-            return;
-        }
-        final String latestCommitTimestamp = latestCommit.get().getTimestamp();
-        logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp));
+    List<String> partitions = FSUtils
+        .getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning);
+    if (partitions.size() > 0) {
+      logger.info(String.format("The job needs to copy %d partitions.", partitions.size()));
 
-        List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning);
-        if (partitions.size() > 0) {
-            logger.info(String.format("The job needs to copy %d partitions.", partitions.size()));
+      // Make sure the output directory is empty
+      Path outputPath = new Path(outputDir);
+      if (fs.exists(outputPath)) {
+        logger.warn(
+            String.format("The output path %targetBasePath already exists, deleting", outputPath));
+        fs.delete(new Path(outputDir), true);
+      }
 
-            // Make sure the output directory is empty
-            Path outputPath = new Path(outputDir);
-            if (fs.exists(outputPath)) {
-                logger.warn(String.format("The output path %targetBasePath already exists, deleting", outputPath));
-                fs.delete(new Path(outputDir), true);
+      jsc.parallelize(partitions, partitions.size())
+          .flatMap(partition -> {
+            // Only take latest version files <= latestCommit.
+            FileSystem fs1 = FSUtils.getFs();
+            List<Tuple2<String, String>> filePaths = new ArrayList<>();
+            Stream<HoodieDataFile> dataFiles = fsView
+                .getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp);
+            dataFiles.forEach(
+                hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));
+
+            // also need to copy over partition metadata
+            Path partitionMetaFile = new Path(new Path(baseDir, partition),
+                HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
+            if (fs1.exists(partitionMetaFile)) {
+              filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
             }
 
-            jsc.parallelize(partitions, partitions.size())
-                    .flatMap(partition -> {
-                        // Only take latest version files <= latestCommit.
-                        FileSystem fs1 = FSUtils.getFs();
-                        List<Tuple2<String, String>> filePaths = new ArrayList<>();
-                        Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp);
-                        dataFiles.forEach(hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));
+            return filePaths.iterator();
+          }).foreach(tuple -> {
+        String partition = tuple._1();
+        Path sourceFilePath = new Path(tuple._2());
+        Path toPartitionPath = new Path(outputDir, partition);
+        FileSystem fs1 = FSUtils.getFs();
 
-                        // also need to copy over partition metadata
-                        Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
-                        if (fs1.exists(partitionMetaFile)) {
-                            filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
-                        }
+        if (!fs1.exists(toPartitionPath)) {
+          fs1.mkdirs(toPartitionPath);
+        }
+        FileUtil.copy(fs1, sourceFilePath, fs1,
+            new Path(toPartitionPath, sourceFilePath.getName()), false, fs1.getConf());
+      });
 
-                        return filePaths.iterator();
-                    }).foreach(tuple -> {
-                        String partition = tuple._1();
-                        Path sourceFilePath = new Path(tuple._2());
-                        Path toPartitionPath = new Path(outputDir, partition);
-                        FileSystem fs1 = FSUtils.getFs();
-
-                        if (!fs1.exists(toPartitionPath)) {
-                          fs1.mkdirs(toPartitionPath);
-                        }
-                        FileUtil.copy(fs1, sourceFilePath, fs1,
-                                new Path(toPartitionPath, sourceFilePath.getName()), false, fs1.getConf());
-            });
-
-            // Also copy the .commit files
-            logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp));
-            FileStatus[] commitFilesToCopy = fs.listStatus(
-                    new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> {
-                        if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
-                            return true;
-                        } else {
-                            String commitTime =
-                                    FSUtils.getCommitFromCommitFile(commitFilePath.getName());
-                            return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL);
-                        }
-                    });
-            for (FileStatus commitStatus : commitFilesToCopy) {
-                Path targetFilePath = new Path(
-                    outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus
-                        .getPath().getName());
-                if (! fs.exists(targetFilePath.getParent())) {
-                    fs.mkdirs(targetFilePath.getParent());
-                }
-                if (fs.exists(targetFilePath)) {
-                    logger.error(String.format("The target output commit file (%targetBasePath) already exists.", targetFilePath));
-                }
-                FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf());
+      // Also copy the .commit files
+      logger.info(
+          String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp));
+      FileStatus[] commitFilesToCopy = fs.listStatus(
+          new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> {
+            if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
+              return true;
+            } else {
+              String commitTime =
+                  FSUtils.getCommitFromCommitFile(commitFilePath.getName());
+              return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
+                  HoodieTimeline.LESSER_OR_EQUAL);
             }
-        } else {
-            logger.info("The job has 0 partition to copy.");
+          });
+      for (FileStatus commitStatus : commitFilesToCopy) {
+        Path targetFilePath = new Path(
+            outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus
+                .getPath().getName());
+        if (!fs.exists(targetFilePath.getParent())) {
+          fs.mkdirs(targetFilePath.getParent());
         }
-
-        // Create the _SUCCESS tag
-        Path successTagPath = new Path(outputDir + "/_SUCCESS");
-        if (!fs.exists(successTagPath)) {
-            logger.info("Creating _SUCCESS under targetBasePath: " + outputDir);
-            fs.createNewFile(successTagPath);
+        if (fs.exists(targetFilePath)) {
+          logger.error(String
+              .format("The target output commit file (%targetBasePath) already exists.",
+                  targetFilePath));
         }
+        FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf());
+      }
+    } else {
+      logger.info("The job has 0 partition to copy.");
     }
 
-    public static void main(String[] args) throws IOException {
-        // Take input configs
-        final Config cfg = new Config();
-        new JCommander(cfg, args);
-        logger.info(String.format("Snapshot hoodie table from %targetBasePath to %targetBasePath", cfg.basePath, cfg.outputPath));
-
-        // Create a spark job to do the snapshot copy
-        SparkConf sparkConf = new SparkConf().setAppName("Hoodie-snapshot-copier");
-        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        logger.info("Initializing spark job.");
-
-        // Copy
-        HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
-        copier.snapshot(jsc, cfg.basePath, cfg.outputPath, cfg.shouldAssumeDatePartitioning);
-
-        // Stop the job
-        jsc.stop();
+    // Create the _SUCCESS tag
+    Path successTagPath = new Path(outputDir + "/_SUCCESS");
+    if (!fs.exists(successTagPath)) {
+      logger.info("Creating _SUCCESS under targetBasePath: " + outputDir);
+      fs.createNewFile(successTagPath);
     }
+  }
+
+  public static void main(String[] args) throws IOException {
+    // Take input configs
+    final Config cfg = new Config();
+    new JCommander(cfg, args);
+    logger.info(String
+        .format("Snapshot hoodie table from %targetBasePath to %targetBasePath", cfg.basePath,
+            cfg.outputPath));
+
+    // Create a spark job to do the snapshot copy
+    SparkConf sparkConf = new SparkConf().setAppName("Hoodie-snapshot-copier");
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+    logger.info("Initializing spark job.");
+
+    // Copy
+    HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
+    copier.snapshot(jsc, cfg.basePath, cfg.outputPath, cfg.shouldAssumeDatePartitioning);
+
+    // Stop the job
+    jsc.stop();
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/UtilHelpers.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/UtilHelpers.java
index 69ad2e7e7..502e36e83 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/UtilHelpers.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/UtilHelpers.java
@@ -18,16 +18,12 @@
 
 package com.uber.hoodie.utilities;
 
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.exception.HoodieIOException;
-import com.uber.hoodie.exception.HoodieNotSupportedException;
-import com.uber.hoodie.KeyGenerator;
+import com.uber.hoodie.utilities.exception.HoodieDeltaStreamerException;
 import com.uber.hoodie.utilities.schema.SchemaProvider;
 import com.uber.hoodie.utilities.sources.Source;
-import com.uber.hoodie.utilities.exception.HoodieDeltaStreamerException;
 import com.uber.hoodie.utilities.sources.SourceDataFormat;
-
-import org.apache.avro.generic.GenericRecord;
+import java.io.IOException;
 import org.apache.commons.configuration.ConfigurationException;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.lang3.reflect.ConstructorUtils;
@@ -36,50 +32,49 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.IOException;
-import java.util.List;
-
 /**
  * Bunch of helper methods
  */
 public class UtilHelpers {
 
-    public static Source createSource(String sourceClass, PropertiesConfiguration cfg, JavaSparkContext jssc, SourceDataFormat dataFormat, SchemaProvider schemaProvider) throws IOException {
-        try {
-            return (Source) ConstructorUtils.invokeConstructor(Class.forName(sourceClass), (Object) cfg, (Object) jssc, (Object) dataFormat, (Object) schemaProvider);
-        } catch (Throwable e) {
-            throw new IOException("Could not load source class " + sourceClass, e);
-        }
+  public static Source createSource(String sourceClass, PropertiesConfiguration cfg,
+      JavaSparkContext jssc, SourceDataFormat dataFormat, SchemaProvider schemaProvider)
+      throws IOException {
+    try {
+      return (Source) ConstructorUtils
+          .invokeConstructor(Class.forName(sourceClass), (Object) cfg, (Object) jssc,
+              (Object) dataFormat, (Object) schemaProvider);
+    } catch (Throwable e) {
+      throw new IOException("Could not load source class " + sourceClass, e);
     }
+  }
 
-    public static SchemaProvider createSchemaProvider(String schemaProviderClass, PropertiesConfiguration cfg) throws IOException {
-        try {
-            return (SchemaProvider) ConstructorUtils.invokeConstructor(Class.forName(schemaProviderClass), (Object) cfg);
-        } catch (Throwable e) {
-            throw new IOException("Could not load schema provider class " + schemaProviderClass, e);
-        }
+  public static SchemaProvider createSchemaProvider(String schemaProviderClass,
+      PropertiesConfiguration cfg) throws IOException {
+    try {
+      return (SchemaProvider) ConstructorUtils
+          .invokeConstructor(Class.forName(schemaProviderClass), (Object) cfg);
+    } catch (Throwable e) {
+      throw new IOException("Could not load schema provider class " + schemaProviderClass, e);
     }
+  }
 
-    /**
-     *
-     * TODO: Support hierarchical config files (see CONFIGURATION-609 for sample)
-     *
-     * @param fs
-     * @param cfgPath
-     * @return
-     */
-    public static PropertiesConfiguration readConfig(FileSystem fs, Path cfgPath) {
-        try {
-            FSDataInputStream in = fs.open(cfgPath);
-            PropertiesConfiguration config = new PropertiesConfiguration();
-            config.load(in);
-            in.close();
-            return config;
-        } catch (IOException e) {
-            throw new HoodieIOException("Unable to read config file at :" + cfgPath, e);
-        } catch (ConfigurationException e) {
-            throw new HoodieDeltaStreamerException("Invalid configs found in config file at :" + cfgPath, e);
-        }
+  /**
+   * TODO: Support hierarchical config files (see CONFIGURATION-609 for sample)
+   */
+  public static PropertiesConfiguration readConfig(FileSystem fs, Path cfgPath) {
+    try {
+      FSDataInputStream in = fs.open(cfgPath);
+      PropertiesConfiguration config = new PropertiesConfiguration();
+      config.load(in);
+      in.close();
+      return config;
+    } catch (IOException e) {
+      throw new HoodieIOException("Unable to read config file at :" + cfgPath, e);
+    } catch (ConfigurationException e) {
+      throw new HoodieDeltaStreamerException("Invalid configs found in config file at :" + cfgPath,
+          e);
     }
+  }
 
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/deltastreamer/HoodieDeltaStreamer.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/deltastreamer/HoodieDeltaStreamer.java
index 02fc0d7ce..ad8ccd2ed 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -47,6 +47,13 @@ import com.uber.hoodie.utilities.schema.SchemaProvider;
 import com.uber.hoodie.utilities.sources.DFSSource;
 import com.uber.hoodie.utilities.sources.Source;
 import com.uber.hoodie.utilities.sources.SourceDataFormat;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Optional;
+import java.util.Properties;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
@@ -60,299 +67,326 @@ import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import scala.collection.JavaConversions;
 
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Optional;
-import java.util.Properties;
-
 /**
- * An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply it to the target dataset.
- * Does not maintain any state, queries at runtime to see how far behind the target dataset is from
- * the source dataset. This can be overriden to force sync from a timestamp.
+ * An Utility which can incrementally take the output from {@link HiveIncrementalPuller} and apply
+ * it to the target dataset. Does not maintain any state, queries at runtime to see how far behind
+ * the target dataset is from the source dataset. This can be overriden to force sync from a
+ * timestamp.
  */
 public class HoodieDeltaStreamer implements Serializable {
 
-    private static volatile Logger log = LogManager.getLogger(HoodieDeltaStreamer.class);
+  private static volatile Logger log = LogManager.getLogger(HoodieDeltaStreamer.class);
 
-    private static String CHECKPOINT_KEY = "deltastreamer.checkpoint.key";
+  private static String CHECKPOINT_KEY = "deltastreamer.checkpoint.key";
 
-    private final Config cfg;
+  private final Config cfg;
 
-    /**
-     * Source to pull deltas from
-     */
-    private transient Source source;
+  /**
+   * Source to pull deltas from
+   */
+  private transient Source source;
 
-    /**
-     * Schema provider that supplies the command for reading the input and writing out the
-     * target table.
-     */
-    private transient SchemaProvider schemaProvider;
+  /**
+   * Schema provider that supplies the command for reading the input and writing out the target
+   * table.
+   */
+  private transient SchemaProvider schemaProvider;
 
-    /**
-     * Extract the key for the target dataset
-     */
-    private KeyGenerator keyGenerator;
+  /**
+   * Extract the key for the target dataset
+   */
+  private KeyGenerator keyGenerator;
 
-    /**
-     * Filesystem used
-     */
-    private transient FileSystem fs;
+  /**
+   * Filesystem used
+   */
+  private transient FileSystem fs;
 
-    /**
-     * Timeline with completed commits
-     */
-    private transient Optional<HoodieTimeline> commitTimelineOpt;
+  /**
+   * Timeline with completed commits
+   */
+  private transient Optional<HoodieTimeline> commitTimelineOpt;
 
-    /**
-     * Spark context
-     */
-    private transient JavaSparkContext jssc;
+  /**
+   * Spark context
+   */
+  private transient JavaSparkContext jssc;
 
 
-    public HoodieDeltaStreamer(Config cfg) throws IOException {
-        this.cfg = cfg;
-        this.fs = FSUtils.getFs();
+  public HoodieDeltaStreamer(Config cfg) throws IOException {
+    this.cfg = cfg;
+    this.fs = FSUtils.getFs();
+
+    if (fs.exists(new Path(cfg.targetBasePath))) {
+      HoodieTableMetaClient meta = new HoodieTableMetaClient(fs, cfg.targetBasePath);
+      this.commitTimelineOpt = Optional
+          .of(meta.getActiveTimeline().getCommitsAndCompactionsTimeline()
+              .filterCompletedInstants());
+    } else {
+      this.commitTimelineOpt = Optional.empty();
+    }
+
+    //TODO(vc) Should these be passed from outside?
+    initSchemaProvider();
+    initKeyGenerator();
+    this.jssc = getSparkContext();
+
+    initSource();
+  }
+
+  private void initSource() throws IOException {
+    // Create the source & schema providers
+    PropertiesConfiguration sourceCfg = UtilHelpers.readConfig(fs, new Path(cfg.sourceConfigProps));
+    log.info("Creating source " + cfg.sourceClassName + " with configs : " + sourceCfg.toString());
+    this.source = UtilHelpers
+        .createSource(cfg.sourceClassName, sourceCfg, jssc, cfg.sourceFormat, schemaProvider);
+  }
+
+  private void initSchemaProvider() throws IOException {
+    PropertiesConfiguration schemaCfg = UtilHelpers
+        .readConfig(fs, new Path(cfg.schemaProviderConfigProps));
+    log.info(
+        "Creating schema provider " + cfg.schemaProviderClassName + " with configs : " + schemaCfg
+            .toString());
+    this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, schemaCfg);
+  }
+
+  private void initKeyGenerator() throws IOException {
+    PropertiesConfiguration keygenCfg = UtilHelpers.readConfig(fs, new Path(cfg.keyGeneratorProps));
+    log.info("Creating key generator " + cfg.keyGeneratorClass + " with configs : " + keygenCfg
+        .toString());
+    this.keyGenerator = DataSourceUtils.createKeyGenerator(cfg.keyGeneratorClass, keygenCfg);
+  }
 
 
-        if (fs.exists(new Path(cfg.targetBasePath))) {
-            HoodieTableMetaClient meta = new HoodieTableMetaClient(fs, cfg.targetBasePath);
-            this.commitTimelineOpt = Optional.of(meta.getActiveTimeline().getCommitsAndCompactionsTimeline().filterCompletedInstants());
+  private JavaSparkContext getSparkContext() {
+    SparkConf sparkConf = new SparkConf()
+        .setAppName("hoodie-delta-streamer-" + cfg.targetTableName);
+    //sparkConf.setMaster(cfg.sparkMaster);
+    sparkConf.setMaster("local[2]");
+    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+    sparkConf.set("spark.driver.maxResultSize", "2g");
+
+    // Configure hadoop conf
+    sparkConf.set("spark.hadoop.mapred.output.compress", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
+    sparkConf.set("spark.hadoop.mapred.output.compression.codec",
+        "org.apache.hadoop.io.compress.GzipCodec");
+    sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
+
+    sparkConf = HoodieWriteClient.registerClasses(sparkConf);
+    // register the schemas, so that shuffle does not serialize the full schemas
+    List<Schema> schemas = Arrays
+        .asList(schemaProvider.getSourceSchema(), schemaProvider.getTargetSchema());
+    sparkConf.registerAvroSchemas(JavaConversions.asScalaBuffer(schemas).toList());
+    return new JavaSparkContext(sparkConf);
+  }
+
+  private void sync() throws Exception {
+    // Retrieve the previous round checkpoints, if any
+    Optional<String> resumeCheckpointStr = Optional.empty();
+    if (commitTimelineOpt.isPresent()) {
+      Optional<HoodieInstant> lastCommit = commitTimelineOpt.get().lastInstant();
+      if (lastCommit.isPresent()) {
+        HoodieCommitMetadata commitMetadata =
+            HoodieCommitMetadata
+                .fromBytes(commitTimelineOpt.get().getInstantDetails(lastCommit.get()).get());
+        if (commitMetadata.getMetadata(CHECKPOINT_KEY) != null) {
+          resumeCheckpointStr = Optional.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
         } else {
-            this.commitTimelineOpt = Optional.empty();
+          throw new HoodieDeltaStreamerException(
+              "Unable to find previous checkpoint. Please double check if this table " +
+                  "was indeed built via delta streamer ");
         }
+      }
+    } else {
+      Properties properties = new Properties();
+      properties.put(HoodieWriteConfig.TABLE_NAME, cfg.targetTableName);
+      HoodieTableMetaClient
+          .initializePathAsHoodieDataset(FSUtils.getFs(), cfg.targetBasePath, properties);
+    }
+    log.info("Checkpoint to resume from : " + resumeCheckpointStr);
 
-        //TODO(vc) Should these be passed from outside?
-        initSchemaProvider();
-        initKeyGenerator();
-        this.jssc = getSparkContext();
+    // Pull the data from the source & prepare the write
+    Pair<Optional<JavaRDD<GenericRecord>>, String> dataAndCheckpoint = source
+        .fetchNewData(resumeCheckpointStr, cfg.maxInputBytes);
 
-        initSource();
+    if (!dataAndCheckpoint.getKey().isPresent()) {
+      log.info("No new data, nothing to commit.. ");
+      return;
     }
 
-    private void initSource() throws IOException {
-        // Create the source & schema providers
-        PropertiesConfiguration sourceCfg = UtilHelpers.readConfig(fs, new Path(cfg.sourceConfigProps));
-        log.info("Creating source " + cfg.sourceClassName + " with configs : " + sourceCfg.toString());
-        this.source = UtilHelpers.createSource(cfg.sourceClassName, sourceCfg, jssc, cfg.sourceFormat, schemaProvider);
+    JavaRDD<GenericRecord> avroRDD = dataAndCheckpoint.getKey().get();
+    JavaRDD<HoodieRecord> records = avroRDD
+        .map(gr -> {
+          HoodieRecordPayload payload = DataSourceUtils.createPayload(
+              cfg.payloadClassName,
+              gr,
+              (Comparable) gr.get(cfg.sourceOrderingField));
+          return new HoodieRecord<>(keyGenerator.getKey(gr), payload);
+        });
+
+    // Perform the write
+    HoodieWriteConfig hoodieCfg = getHoodieClientConfig(cfg.hoodieClientProps);
+    HoodieWriteClient client = new HoodieWriteClient<>(jssc, hoodieCfg);
+    String commitTime = client.startCommit();
+    log.info("Starting commit  : " + commitTime);
+
+    JavaRDD<WriteStatus> writeStatusRDD;
+    if (cfg.operation == Operation.INSERT) {
+      writeStatusRDD = client.insert(records, commitTime);
+    } else if (cfg.operation == Operation.UPSERT) {
+      writeStatusRDD = client.upsert(records, commitTime);
+    } else {
+      throw new HoodieDeltaStreamerException("Unknown operation :" + cfg.operation);
     }
 
-    private void initSchemaProvider() throws IOException {
-        PropertiesConfiguration schemaCfg = UtilHelpers.readConfig(fs, new Path(cfg.schemaProviderConfigProps));
-        log.info("Creating schema provider " + cfg.schemaProviderClassName + " with configs : " + schemaCfg.toString());
-        this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, schemaCfg);
+    // Simply commit for now. TODO(vc): Support better error handlers later on
+    HashMap<String, String> checkpointCommitMetadata = new HashMap<>();
+    checkpointCommitMetadata.put(CHECKPOINT_KEY, dataAndCheckpoint.getValue());
+
+    boolean success = client
+        .commit(commitTime, writeStatusRDD, Optional.of(checkpointCommitMetadata));
+    if (success) {
+      log.info("Commit " + commitTime + " successful!");
+      // TODO(vc): Kick off hive sync from here.
+
+    } else {
+      log.info("Commit " + commitTime + " failed!");
     }
+    client.close();
+  }
 
-    private void initKeyGenerator() throws IOException {
-        PropertiesConfiguration keygenCfg = UtilHelpers.readConfig(fs, new Path(cfg.keyGeneratorProps));
-        log.info("Creating key generator " + cfg.keyGeneratorClass + " with configs : " + keygenCfg.toString());
-        this.keyGenerator = DataSourceUtils.createKeyGenerator(cfg.keyGeneratorClass, keygenCfg);
+  private HoodieWriteConfig getHoodieClientConfig(String hoodieClientCfgPath) throws Exception {
+    return HoodieWriteConfig.newBuilder()
+        .combineInput(true, true)
+        .withPath(cfg.targetBasePath)
+        .withAutoCommit(false)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withPayloadClass(OverwriteWithLatestAvroPayload.class.getName()).build())
+        .withSchema(schemaProvider.getTargetSchema().toString())
+        .forTable(cfg.targetTableName)
+        .withIndexConfig(
+            HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
+        .fromInputStream(fs.open(new Path(hoodieClientCfgPath)))
+        .build();
+  }
+
+  private enum Operation {
+    UPSERT,
+    INSERT
+  }
+
+  private class OperationConvertor implements IStringConverter<Operation> {
+
+    @Override
+    public Operation convert(String value) throws ParameterException {
+      return Operation.valueOf(value);
     }
+  }
 
+  private class SourceFormatConvertor implements IStringConverter<SourceDataFormat> {
 
-    private JavaSparkContext getSparkContext() {
-        SparkConf sparkConf = new SparkConf().setAppName("hoodie-delta-streamer-" + cfg.targetTableName);
-        //sparkConf.setMaster(cfg.sparkMaster);
-        sparkConf.setMaster("local[2]");
-        sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
-        sparkConf.set("spark.driver.maxResultSize", "2g");
-
-        // Configure hadoop conf
-        sparkConf.set("spark.hadoop.mapred.output.compress", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec", "true");
-        sparkConf.set("spark.hadoop.mapred.output.compression.codec",
-                "org.apache.hadoop.io.compress.GzipCodec");
-        sparkConf.set("spark.hadoop.mapred.output.compression.type", "BLOCK");
-
-        sparkConf = HoodieWriteClient.registerClasses(sparkConf);
-        // register the schemas, so that shuffle does not serialize the full schemas
-        List<Schema> schemas = Arrays.asList(schemaProvider.getSourceSchema(), schemaProvider.getTargetSchema());
-        sparkConf.registerAvroSchemas(JavaConversions.asScalaBuffer(schemas).toList());
-        return new JavaSparkContext(sparkConf);
+    @Override
+    public SourceDataFormat convert(String value) throws ParameterException {
+      return SourceDataFormat.valueOf(value);
     }
+  }
 
-    private void sync() throws Exception {
-        // Retrieve the previous round checkpoints, if any
-        Optional<String> resumeCheckpointStr = Optional.empty();
-        if (commitTimelineOpt.isPresent()) {
-            Optional<HoodieInstant> lastCommit = commitTimelineOpt.get().lastInstant();
-            if (lastCommit.isPresent()) {
-                HoodieCommitMetadata commitMetadata =
-                        HoodieCommitMetadata.fromBytes(commitTimelineOpt.get().getInstantDetails(lastCommit.get()).get());
-                if (commitMetadata.getMetadata(CHECKPOINT_KEY) != null) {
-                    resumeCheckpointStr = Optional.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
-                } else {
-                    throw new HoodieDeltaStreamerException("Unable to find previous checkpoint. Please double check if this table " +
-                            "was indeed built via delta streamer ");
-                }
-            }
-        } else {
-            Properties properties = new Properties();
-            properties.put(HoodieWriteConfig.TABLE_NAME, cfg.targetTableName);
-            HoodieTableMetaClient.initializePathAsHoodieDataset(FSUtils.getFs(), cfg.targetBasePath, properties);
-        }
-        log.info("Checkpoint to resume from : " + resumeCheckpointStr);
+  public static class Config implements Serializable {
+
+    /**
+     * TARGET CONFIGS
+     **/
+    @Parameter(names = {
+        "--target-base-path"}, description = "base path for the target hoodie dataset", required = true)
+    public String targetBasePath;
+
+    // TODO: How to obtain hive configs to register?
+    @Parameter(names = {
+        "--target-table"}, description = "name of the target table in Hive", required = true)
+    public String targetTableName;
+
+    @Parameter(names = {"--hoodie-client-config"}, description =
+        "path to properties file on localfs or dfs, with hoodie client config. Sane defaults" +
+            "are used, but recommend use to provide basic things like metrics endpoints, hive configs etc")
+    public String hoodieClientProps = null;
+
+    /**
+     * SOURCE CONFIGS
+     **/
+    @Parameter(names = {"--source-class"}, description =
+        "subclass of com.uber.hoodie.utilities.sources.Source to use to read data. " +
+            "built-in options: com.uber.hoodie.utilities.common.{DFSSource (default), KafkaSource, HiveIncrPullSource}")
+    public String sourceClassName = DFSSource.class.getName();
+
+    @Parameter(names = {"--source-config"}, description =
+        "path to properties file on localfs or dfs, with source configs. " +
+            "For list of acceptable properties, refer the source class", required = true)
+    public String sourceConfigProps = null;
+
+    @Parameter(names = {"--source-format"}, description =
+        "Format of data in source, JSON (default), Avro. All source data is " +
+            "converted to Avro using the provided schema in any case", converter = SourceFormatConvertor.class)
+    public SourceDataFormat sourceFormat = SourceDataFormat.JSON;
+
+    @Parameter(names = {"--source-ordering-field"}, description =
+        "Field within source record to decide how to break ties between " +
+            " records with same key in input data. Default: 'ts' holding unix timestamp of record")
+    public String sourceOrderingField = "ts";
+
+    @Parameter(names = {"--key-generator-class"}, description =
+        "Subclass of com.uber.hoodie.utilities.common.KeyExtractor to generate" +
+            "a HoodieKey from the given avro record. Built in: SimpleKeyGenerator (Uses provided field names as recordkey & partitionpath. "
+            +
+            "Nested fields specified via dot notation, e.g: a.b.c)")
+    public String keyGeneratorClass = SimpleKeyGenerator.class.getName();
+
+    @Parameter(names = {"--key-generator-config"}, description =
+        "Path to properties file on localfs or dfs, with KeyGenerator configs. " +
+            "For list of acceptable properites, refer the KeyGenerator class", required = true)
+    public String keyGeneratorProps = null;
+
+    @Parameter(names = {"--payload-class"}, description =
+        "subclass of HoodieRecordPayload, that works off a GenericRecord. " +
+            "Default: SourceWrapperPayload. Implement your own, if you want to do something other than overwriting existing value")
+    public String payloadClassName = OverwriteWithLatestAvroPayload.class.getName();
+
+    @Parameter(names = {"--schemaprovider-class"}, description =
+        "subclass of com.uber.hoodie.utilities.schema.SchemaProvider " +
+            "to attach schemas to input & target table data, built in options: FilebasedSchemaProvider")
+    public String schemaProviderClassName = FilebasedSchemaProvider.class.getName();
+
+    @Parameter(names = {"--schemaprovider-config"}, description =
+        "path to properties file on localfs or dfs, with schema configs. " +
+            "For list of acceptable properties, refer the schema provider class", required = true)
+    public String schemaProviderConfigProps = null;
 
 
-        // Pull the data from the source & prepare the write
-        Pair<Optional<JavaRDD<GenericRecord>>, String> dataAndCheckpoint = source.fetchNewData(resumeCheckpointStr, cfg.maxInputBytes);
+    /**
+     * Other configs
+     **/
+    @Parameter(names = {
+        "--max-input-bytes"}, description = "Maximum number of bytes to read from source. Default: 1TB")
+    public long maxInputBytes = 1L * 1024 * 1024 * 1024 * 1024;
 
-        if (!dataAndCheckpoint.getKey().isPresent()) {
-            log.info("No new data, nothing to commit.. ");
-            return;
-        }
-
-        JavaRDD<GenericRecord> avroRDD = dataAndCheckpoint.getKey().get();
-        JavaRDD<HoodieRecord> records = avroRDD
-                .map(gr -> {
-                    HoodieRecordPayload payload = DataSourceUtils.createPayload(
-                            cfg.payloadClassName,
-                            gr,
-                            (Comparable) gr.get(cfg.sourceOrderingField));
-                    return new HoodieRecord<>(keyGenerator.getKey(gr), payload);
-                });
+    @Parameter(names = {"--op"}, description =
+        "Takes one of these values : UPSERT (default), INSERT (use when input " +
+            "is purely new data/inserts to gain speed)", converter = OperationConvertor.class)
+    public Operation operation = Operation.UPSERT;
 
 
-        // Perform the write
-        HoodieWriteConfig hoodieCfg = getHoodieClientConfig(cfg.hoodieClientProps);
-        HoodieWriteClient client = new HoodieWriteClient<>(jssc, hoodieCfg);
-        String commitTime = client.startCommit();
-        log.info("Starting commit  : " + commitTime);
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+  }
 
-        JavaRDD<WriteStatus> writeStatusRDD;
-        if (cfg.operation == Operation.INSERT) {
-            writeStatusRDD = client.insert(records, commitTime);
-        } else if (cfg.operation == Operation.UPSERT) {
-            writeStatusRDD = client.upsert(records, commitTime);
-        } else {
-            throw new HoodieDeltaStreamerException("Unknown operation :" + cfg.operation);
-        }
-
-        // Simply commit for now. TODO(vc): Support better error handlers later on
-        HashMap<String, String> checkpointCommitMetadata = new HashMap<>();
-        checkpointCommitMetadata.put(CHECKPOINT_KEY, dataAndCheckpoint.getValue());
-
-        boolean success = client.commit(commitTime, writeStatusRDD, Optional.of(checkpointCommitMetadata));
-        if (success) {
-            log.info("Commit " + commitTime + " successful!");
-            // TODO(vc): Kick off hive sync from here.
-
-        } else {
-            log.info("Commit " + commitTime + " failed!");
-        }
-        client.close();
-    }
-
-    private HoodieWriteConfig getHoodieClientConfig(String hoodieClientCfgPath) throws Exception {
-        return HoodieWriteConfig.newBuilder()
-                .combineInput(true, true)
-                .withPath(cfg.targetBasePath)
-                .withAutoCommit(false)
-                .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-                        .withPayloadClass(OverwriteWithLatestAvroPayload.class.getName()).build())
-                .withSchema(schemaProvider.getTargetSchema().toString())
-                .forTable(cfg.targetTableName)
-                .withIndexConfig(
-                        HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-                .fromInputStream(fs.open(new Path(hoodieClientCfgPath)))
-                .build();
-    }
-
-    private enum Operation {
-        UPSERT,
-        INSERT
-    }
-
-    private class OperationConvertor implements IStringConverter<Operation> {
-        @Override
-        public Operation convert(String value) throws ParameterException {
-            return Operation.valueOf(value);
-        }
-    }
-
-    private class SourceFormatConvertor implements IStringConverter<SourceDataFormat> {
-        @Override
-        public SourceDataFormat convert(String value) throws ParameterException {
-            return SourceDataFormat.valueOf(value);
-        }
-    }
-
-    public static class Config implements Serializable {
-
-        /** TARGET CONFIGS **/
-        @Parameter(names = {"--target-base-path"}, description = "base path for the target hoodie dataset", required = true)
-        public String targetBasePath;
-
-        // TODO: How to obtain hive configs to register?
-        @Parameter(names = {"--target-table"}, description = "name of the target table in Hive", required = true)
-        public String targetTableName;
-
-        @Parameter(names = {"--hoodie-client-config"}, description = "path to properties file on localfs or dfs, with hoodie client config. Sane defaults" +
-                "are used, but recommend use to provide basic things like metrics endpoints, hive configs etc")
-        public String hoodieClientProps = null;
-
-        /** SOURCE CONFIGS **/
-        @Parameter(names = {"--source-class"}, description = "subclass of com.uber.hoodie.utilities.sources.Source to use to read data. " +
-                "built-in options: com.uber.hoodie.utilities.common.{DFSSource (default), KafkaSource, HiveIncrPullSource}")
-        public String sourceClassName = DFSSource.class.getName();
-
-        @Parameter(names = {"--source-config"}, description = "path to properties file on localfs or dfs, with source configs. " +
-                "For list of acceptable properties, refer the source class", required = true)
-        public String sourceConfigProps = null;
-
-        @Parameter(names = {"--source-format"}, description = "Format of data in source, JSON (default), Avro. All source data is " +
-                "converted to Avro using the provided schema in any case", converter = SourceFormatConvertor.class)
-        public SourceDataFormat sourceFormat = SourceDataFormat.JSON;
-
-        @Parameter(names = {"--source-ordering-field"}, description = "Field within source record to decide how to break ties between " +
-                " records with same key in input data. Default: 'ts' holding unix timestamp of record")
-        public String sourceOrderingField = "ts";
-
-        @Parameter(names = {"--key-generator-class"}, description = "Subclass of com.uber.hoodie.utilities.common.KeyExtractor to generate" +
-                "a HoodieKey from the given avro record. Built in: SimpleKeyGenerator (Uses provided field names as recordkey & partitionpath. " +
-                "Nested fields specified via dot notation, e.g: a.b.c)")
-        public String keyGeneratorClass = SimpleKeyGenerator.class.getName();
-
-        @Parameter(names = {"--key-generator-config"}, description = "Path to properties file on localfs or dfs, with KeyGenerator configs. " +
-                "For list of acceptable properites, refer the KeyGenerator class", required = true)
-        public String keyGeneratorProps = null;
-
-        @Parameter(names = {"--payload-class"}, description = "subclass of HoodieRecordPayload, that works off a GenericRecord. " +
-                "Default: SourceWrapperPayload. Implement your own, if you want to do something other than overwriting existing value")
-        public String payloadClassName = OverwriteWithLatestAvroPayload.class.getName();
-
-        @Parameter(names = {"--schemaprovider-class"}, description = "subclass of com.uber.hoodie.utilities.schema.SchemaProvider " +
-                "to attach schemas to input & target table data, built in options: FilebasedSchemaProvider")
-        public String schemaProviderClassName = FilebasedSchemaProvider.class.getName();
-
-        @Parameter(names = {"--schemaprovider-config"}, description = "path to properties file on localfs or dfs, with schema configs. " +
-                "For list of acceptable properties, refer the schema provider class", required = true)
-        public String schemaProviderConfigProps = null;
-
-
-        /** Other configs **/
-        @Parameter(names = {"--max-input-bytes"}, description = "Maximum number of bytes to read from source. Default: 1TB")
-        public long maxInputBytes = 1L * 1024 * 1024 * 1024 * 1024;
-
-        @Parameter(names = {"--op"}, description = "Takes one of these values : UPSERT (default), INSERT (use when input " +
-                "is purely new data/inserts to gain speed)", converter = OperationConvertor.class)
-        public Operation operation = Operation.UPSERT;
-
-
-        @Parameter(names = {"--help", "-h"}, help = true)
-        public Boolean help = false;
-    }
-
-    public static void main(String[] args) throws Exception {
-        final Config cfg = new Config();
-        JCommander cmd = new JCommander(cfg, args);
-        if (cfg.help || args.length == 0) {
-            cmd.usage();
-            System.exit(1);
-        }
-        new HoodieDeltaStreamer(cfg).sync();
+  public static void main(String[] args) throws Exception {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, args);
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
     }
+    new HoodieDeltaStreamer(cfg).sync();
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieDeltaStreamerException.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieDeltaStreamerException.java
index c99197b8a..40031aa09 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieDeltaStreamerException.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieDeltaStreamerException.java
@@ -21,11 +21,12 @@ package com.uber.hoodie.utilities.exception;
 import com.uber.hoodie.exception.HoodieException;
 
 public class HoodieDeltaStreamerException extends HoodieException {
-    public HoodieDeltaStreamerException(String msg, Throwable e) {
-        super(msg, e);
-    }
 
-    public HoodieDeltaStreamerException(String msg) {
-        super(msg);
-    }
+  public HoodieDeltaStreamerException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieDeltaStreamerException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullException.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullException.java
index a939d8cc6..79092bacf 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullException.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullException.java
@@ -19,15 +19,15 @@
 package com.uber.hoodie.utilities.exception;
 
 import com.uber.hoodie.exception.HoodieException;
-
 import java.sql.SQLException;
 
 public class HoodieIncrementalPullException extends HoodieException {
-    public HoodieIncrementalPullException(String msg, SQLException e) {
-        super(msg, e);
-    }
 
-    public HoodieIncrementalPullException(String msg) {
-        super(msg);
-    }
+  public HoodieIncrementalPullException(String msg, SQLException e) {
+    super(msg, e);
+  }
+
+  public HoodieIncrementalPullException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullSQLException.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullSQLException.java
index 3089631b7..008d4d0d8 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullSQLException.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/exception/HoodieIncrementalPullSQLException.java
@@ -21,11 +21,12 @@ package com.uber.hoodie.utilities.exception;
 import java.sql.SQLException;
 
 public class HoodieIncrementalPullSQLException extends HoodieIncrementalPullException {
-    public HoodieIncrementalPullSQLException(String msg, SQLException e) {
-        super(msg, e);
-    }
 
-    public HoodieIncrementalPullSQLException(String msg) {
-        super(msg);
-    }
+  public HoodieIncrementalPullSQLException(String msg, SQLException e) {
+    super(msg, e);
+  }
+
+  public HoodieIncrementalPullSQLException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/keygen/TimestampBasedKeyGenerator.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/keygen/TimestampBasedKeyGenerator.java
index 5c67bbd8b..d9da949b5 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/keygen/TimestampBasedKeyGenerator.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/keygen/TimestampBasedKeyGenerator.java
@@ -23,83 +23,86 @@ import com.uber.hoodie.SimpleKeyGenerator;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import com.uber.hoodie.utilities.exception.HoodieDeltaStreamerException;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.configuration.PropertiesConfiguration;
-
 import java.io.Serializable;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.TimeZone;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.configuration.PropertiesConfiguration;
 
 /**
  * Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
- *
  */
 public class TimestampBasedKeyGenerator extends SimpleKeyGenerator {
 
-    enum TimestampType implements Serializable {
-        UNIX_TIMESTAMP,
-        DATE_STRING,
-        MIXED
+  enum TimestampType implements Serializable {
+    UNIX_TIMESTAMP,
+    DATE_STRING,
+    MIXED
+  }
+
+  private final TimestampType timestampType;
+
+  private SimpleDateFormat inputDateFormat;
+
+  private final String outputDateFormat;
+
+
+  /**
+   * Supported configs
+   */
+  static class Config {
+
+    // One value from TimestampType above
+    private static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
+    private static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat";
+    private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.dateformat";
+  }
+
+  public TimestampBasedKeyGenerator(PropertiesConfiguration config) {
+    super(config);
+    DataSourceUtils.checkRequiredProperties(config,
+        Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
+    this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
+    this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
+
+    if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
+      DataSourceUtils
+          .checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
+      this.inputDateFormat = new SimpleDateFormat(
+          config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
+      this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
     }
+  }
 
-    private final TimestampType timestampType;
+  @Override
+  public HoodieKey getKey(GenericRecord record) {
+    Object partitionVal = record.get(partitionPathField);
+    SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat);
+    partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
 
-    private SimpleDateFormat inputDateFormat;
+    try {
+      long unixTime;
+      if (partitionVal instanceof Double) {
+        unixTime = ((Double) partitionVal).longValue();
+      } else if (partitionVal instanceof Float) {
+        unixTime = ((Float) partitionVal).longValue();
+      } else if (partitionVal instanceof Long) {
+        unixTime = (Long) partitionVal;
+      } else if (partitionVal instanceof String) {
+        unixTime = inputDateFormat.parse(partitionVal.toString()).getTime() / 1000;
+      } else {
+        throw new HoodieNotSupportedException(
+            "Unexpected type for partition field: " + partitionVal.getClass().getName());
+      }
 
-    private final String outputDateFormat;
-
-
-    /**
-     * Supported configs
-     */
-    static class Config {
-        // One value from TimestampType above
-        private static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
-        private static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat";
-        private static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.dateformat";
-    }
-
-    public TimestampBasedKeyGenerator(PropertiesConfiguration config) {
-        super(config);
-        DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
-        this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
-        this.outputDateFormat = config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
-
-        if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
-            DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
-            this.inputDateFormat = new SimpleDateFormat(config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
-            this.inputDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
-        }
-    }
-
-    @Override
-    public HoodieKey getKey(GenericRecord record) {
-        Object partitionVal = record.get(partitionPathField);
-        SimpleDateFormat partitionPathFormat = new SimpleDateFormat(outputDateFormat);
-        partitionPathFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
-
-        try {
-            long unixTime;
-            if (partitionVal instanceof Double) {
-                unixTime = ((Double) partitionVal).longValue();
-            } else if (partitionVal instanceof Float) {
-                unixTime = ((Float) partitionVal).longValue();
-            } else if (partitionVal instanceof Long) {
-                unixTime = (Long) partitionVal;
-            } else if (partitionVal instanceof String) {
-                unixTime = inputDateFormat.parse(partitionVal.toString()).getTime() / 1000;
-            } else {
-                throw new HoodieNotSupportedException("Unexpected type for partition field: "+ partitionVal.getClass().getName());
-            }
-
-            return new HoodieKey(record.get(recordKeyField).toString(),
-                    partitionPathFormat.format(new Date(unixTime * 1000)));
-        } catch (ParseException pe) {
-            throw new HoodieDeltaStreamerException("Unable to parse input partition field :" + partitionVal, pe);
-        }
+      return new HoodieKey(record.get(recordKeyField).toString(),
+          partitionPathFormat.format(new Date(unixTime * 1000)));
+    } catch (ParseException pe) {
+      throw new HoodieDeltaStreamerException(
+          "Unable to parse input partition field :" + partitionVal, pe);
     }
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/FilebasedSchemaProvider.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/FilebasedSchemaProvider.java
index 6a77632c3..f6ea67f01 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/FilebasedSchemaProvider.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/FilebasedSchemaProvider.java
@@ -21,54 +21,56 @@ package com.uber.hoodie.utilities.schema;
 import com.uber.hoodie.DataSourceUtils;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
-
+import java.io.IOException;
+import java.util.Arrays;
 import org.apache.avro.Schema;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-import java.io.IOException;
-import java.util.Arrays;
-
 /**
  * A simple schema provider, that reads off files on DFS
  */
 public class FilebasedSchemaProvider extends SchemaProvider {
 
-    /**
-     * Configs supported
-     */
-    static class Config {
-        private static final String SOURCE_SCHEMA_FILE_PROP = "hoodie.deltastreamer.filebased.schemaprovider.source.schema.file";
-        private static final String TARGET_SCHEMA_FILE_PROP = "hoodie.deltastreamer.filebased.schemaprovider.target.schema.file";
+  /**
+   * Configs supported
+   */
+  static class Config {
+
+    private static final String SOURCE_SCHEMA_FILE_PROP = "hoodie.deltastreamer.filebased.schemaprovider.source.schema.file";
+    private static final String TARGET_SCHEMA_FILE_PROP = "hoodie.deltastreamer.filebased.schemaprovider.target.schema.file";
+  }
+
+  private final FileSystem fs;
+
+  private final Schema sourceSchema;
+
+  private final Schema targetSchema;
+
+  public FilebasedSchemaProvider(PropertiesConfiguration config) {
+    super(config);
+    this.fs = FSUtils.getFs();
+
+    DataSourceUtils.checkRequiredProperties(config,
+        Arrays.asList(Config.SOURCE_SCHEMA_FILE_PROP, Config.TARGET_SCHEMA_FILE_PROP));
+    try {
+      this.sourceSchema = new Schema.Parser()
+          .parse(fs.open(new Path(config.getString(Config.SOURCE_SCHEMA_FILE_PROP))));
+      this.targetSchema = new Schema.Parser()
+          .parse(fs.open(new Path(config.getString(Config.TARGET_SCHEMA_FILE_PROP))));
+    } catch (IOException ioe) {
+      throw new HoodieIOException("Error reading schema", ioe);
     }
+  }
 
-    private final FileSystem fs;
+  @Override
+  public Schema getSourceSchema() {
+    return sourceSchema;
+  }
 
-    private final Schema sourceSchema;
-
-    private final Schema targetSchema;
-
-    public FilebasedSchemaProvider(PropertiesConfiguration config) {
-        super(config);
-        this.fs = FSUtils.getFs();
-
-        DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.SOURCE_SCHEMA_FILE_PROP, Config.TARGET_SCHEMA_FILE_PROP));
-        try {
-            this.sourceSchema = new Schema.Parser().parse(fs.open(new Path(config.getString(Config.SOURCE_SCHEMA_FILE_PROP))));
-            this.targetSchema = new Schema.Parser().parse(fs.open(new Path(config.getString(Config.TARGET_SCHEMA_FILE_PROP))));
-        } catch (IOException ioe) {
-            throw new HoodieIOException("Error reading schema", ioe);
-        }
-    }
-
-    @Override
-    public Schema getSourceSchema() {
-        return sourceSchema;
-    }
-
-    @Override
-    public Schema getTargetSchema() {
-        return targetSchema;
-    }
+  @Override
+  public Schema getTargetSchema() {
+    return targetSchema;
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/SchemaProvider.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/SchemaProvider.java
index b3f385bf9..3a192581c 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/SchemaProvider.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/schema/SchemaProvider.java
@@ -18,22 +18,22 @@
 
 package com.uber.hoodie.utilities.schema;
 
+import java.io.Serializable;
 import org.apache.avro.Schema;
 import org.apache.commons.configuration.PropertiesConfiguration;
-import java.io.Serializable;
 
 /**
  * Class to provide schema for reading data and also writing into a Hoodie table
  */
 public abstract class SchemaProvider implements Serializable {
 
-    protected PropertiesConfiguration config;
+  protected PropertiesConfiguration config;
 
-    protected SchemaProvider(PropertiesConfiguration config) {
-        this.config = config;
-    }
+  protected SchemaProvider(PropertiesConfiguration config) {
+    this.config = config;
+  }
 
-    public abstract Schema getSourceSchema();
+  public abstract Schema getSourceSchema();
 
-    public abstract Schema getTargetSchema();
+  public abstract Schema getTargetSchema();
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/AvroConvertor.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/AvroConvertor.java
index a2c1db220..083ecb7d3 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/AvroConvertor.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/AvroConvertor.java
@@ -21,73 +21,71 @@ package com.uber.hoodie.utilities.sources;
 import com.twitter.bijection.Injection;
 import com.twitter.bijection.avro.GenericAvroCodecs;
 import com.uber.hoodie.avro.MercifulJsonConverter;
-
+import java.io.IOException;
+import java.io.Serializable;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 
-import java.io.IOException;
-import java.io.Serializable;
-
 /**
- * Convert a variety of {@link SourceDataFormat} into Avro GenericRecords. Has a bunch of lazy fields
- * to circumvent issues around serializing these objects from driver to executors
+ * Convert a variety of {@link SourceDataFormat} into Avro GenericRecords. Has a bunch of lazy
+ * fields to circumvent issues around serializing these objects from driver to executors
  */
 public class AvroConvertor implements Serializable {
 
-    /**
-     * To be lazily inited on executors
-     */
-    private transient Schema schema;
+  /**
+   * To be lazily inited on executors
+   */
+  private transient Schema schema;
 
-    private final String schemaStr;
+  private final String schemaStr;
 
-    /**
-     * To be lazily inited on executors
-     */
-    private transient MercifulJsonConverter jsonConverter;
+  /**
+   * To be lazily inited on executors
+   */
+  private transient MercifulJsonConverter jsonConverter;
 
 
-    /**
-     * To be lazily inited on executors
-     */
-    private transient Injection<GenericRecord, byte[]> recordInjection;
+  /**
+   * To be lazily inited on executors
+   */
+  private transient Injection<GenericRecord, byte[]> recordInjection;
 
 
-    public AvroConvertor(String schemaStr) {
-        this.schemaStr = schemaStr;
+  public AvroConvertor(String schemaStr) {
+    this.schemaStr = schemaStr;
+  }
+
+
+  private void initSchema() {
+    if (schema == null) {
+      Schema.Parser parser = new Schema.Parser();
+      schema = parser.parse(schemaStr);
     }
+  }
 
-
-    private void initSchema() {
-        if (schema == null) {
-            Schema.Parser parser = new Schema.Parser();
-            schema = parser.parse(schemaStr);
-        }
+  private void initInjection() {
+    if (recordInjection == null) {
+      recordInjection = GenericAvroCodecs.toBinary(schema);
     }
+  }
 
-    private void initInjection() {
-        if (recordInjection == null) {
-            recordInjection = GenericAvroCodecs.toBinary(schema);
-        }
-    }
-
-    private void initJsonConvertor() {
-        if (jsonConverter == null) {
-            jsonConverter = new MercifulJsonConverter(schema);
-        }
+  private void initJsonConvertor() {
+    if (jsonConverter == null) {
+      jsonConverter = new MercifulJsonConverter(schema);
     }
+  }
 
 
-    public GenericRecord fromJson(String json) throws IOException {
-        initSchema();
-        initJsonConvertor();
-        return jsonConverter.convert(json);
-    }
+  public GenericRecord fromJson(String json) throws IOException {
+    initSchema();
+    initJsonConvertor();
+    return jsonConverter.convert(json);
+  }
 
 
-    public GenericRecord fromAvroBinary(byte[] avroBinary) throws IOException {
-        initSchema();
-        initInjection();
-        return recordInjection.invert(avroBinary).get();
-    }
+  public GenericRecord fromAvroBinary(byte[] avroBinary) throws IOException {
+    initSchema();
+    initInjection();
+    return recordInjection.invert(avroBinary).get();
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/DFSSource.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/DFSSource.java
index 49c333782..128a449a4 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/DFSSource.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/DFSSource.java
@@ -23,7 +23,12 @@ import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import com.uber.hoodie.utilities.schema.SchemaProvider;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.mapred.AvroKey;
 import org.apache.avro.mapreduce.AvroKeyInputFormat;
@@ -40,113 +45,121 @@ import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
 /**
  * Source to read data from a given DFS directory structure, incrementally
  */
 public class DFSSource extends Source {
 
-    /**
-     * Configs supported
-     */
-    static class Config {
-        private final static String ROOT_INPUT_PATH_PROP = "hoodie.deltastreamer.source.dfs.root";
-    }
-
-    private final static List<String> IGNORE_FILEPREFIX_LIST = Arrays.asList(".", "_");
-
-    private final transient FileSystem fs;
-
-    public DFSSource(PropertiesConfiguration config, JavaSparkContext sparkContext, SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
-        super(config, sparkContext, dataFormat, schemaProvider);
-        this.fs = FSUtils.getFs();
-        DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.ROOT_INPUT_PATH_PROP));
+  /**
+   * Configs supported
+   */
+  static class Config {
+
+    private final static String ROOT_INPUT_PATH_PROP = "hoodie.deltastreamer.source.dfs.root";
+  }
+
+  private final static List<String> IGNORE_FILEPREFIX_LIST = Arrays.asList(".", "_");
+
+  private final transient FileSystem fs;
+
+  public DFSSource(PropertiesConfiguration config, JavaSparkContext sparkContext,
+      SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
+    super(config, sparkContext, dataFormat, schemaProvider);
+    this.fs = FSUtils.getFs();
+    DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.ROOT_INPUT_PATH_PROP));
+  }
+
+
+  public static JavaRDD<GenericRecord> fromAvroFiles(final AvroConvertor convertor, String pathStr,
+      JavaSparkContext sparkContext) {
+    JavaPairRDD<AvroKey, NullWritable> avroRDD = sparkContext.newAPIHadoopFile(pathStr,
+        AvroKeyInputFormat.class,
+        AvroKey.class,
+        NullWritable.class,
+        sparkContext.hadoopConfiguration());
+    return avroRDD.keys().map(r -> ((GenericRecord) r.datum()));
+  }
+
+  public static JavaRDD<GenericRecord> fromJsonFiles(final AvroConvertor convertor, String pathStr,
+      JavaSparkContext sparkContext) {
+    return sparkContext.textFile(pathStr).map((String j) -> {
+      return convertor.fromJson(j);
+    });
+  }
+
+  public static JavaRDD<GenericRecord> fromFiles(SourceDataFormat dataFormat,
+      final AvroConvertor convertor, String pathStr, JavaSparkContext sparkContext) {
+    if (dataFormat == SourceDataFormat.AVRO) {
+      return DFSSource.fromAvroFiles(convertor, pathStr, sparkContext);
+    } else if (dataFormat == SourceDataFormat.JSON) {
+      return DFSSource.fromJsonFiles(convertor, pathStr, sparkContext);
+    } else {
+      throw new HoodieNotSupportedException("Unsupported data format :" + dataFormat);
     }
+  }
 
 
-    public static JavaRDD<GenericRecord> fromAvroFiles(final AvroConvertor convertor, String pathStr, JavaSparkContext sparkContext) {
-        JavaPairRDD<AvroKey, NullWritable> avroRDD = sparkContext.newAPIHadoopFile(pathStr,
-                AvroKeyInputFormat.class,
-                AvroKey.class,
-                NullWritable.class,
-                sparkContext.hadoopConfiguration());
-        return avroRDD.keys().map(r -> ((GenericRecord) r.datum()));
-    }
+  @Override
+  public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(
+      Optional<String> lastCheckpointStr, long maxInputBytes) {
 
-    public static JavaRDD<GenericRecord> fromJsonFiles(final AvroConvertor convertor, String pathStr, JavaSparkContext sparkContext) {
-        return sparkContext.textFile(pathStr).map((String j) -> {
-            return convertor.fromJson(j);
-        });
-    }
-
-    public static JavaRDD<GenericRecord> fromFiles(SourceDataFormat dataFormat, final AvroConvertor convertor, String pathStr, JavaSparkContext sparkContext) {
-        if (dataFormat == SourceDataFormat.AVRO) {
-            return DFSSource.fromAvroFiles(convertor, pathStr, sparkContext);
-        } else if (dataFormat == SourceDataFormat.JSON) {
-            return DFSSource.fromJsonFiles(convertor, pathStr, sparkContext);
-        } else {
-            throw new HoodieNotSupportedException("Unsupported data format :" + dataFormat);
+    try {
+      // obtain all eligible files under root folder.
+      List<FileStatus> eligibleFiles = new ArrayList<>();
+      RemoteIterator<LocatedFileStatus> fitr = fs
+          .listFiles(new Path(config.getString(Config.ROOT_INPUT_PATH_PROP)), true);
+      while (fitr.hasNext()) {
+        LocatedFileStatus fileStatus = fitr.next();
+        if (fileStatus.isDirectory() ||
+            IGNORE_FILEPREFIX_LIST.stream()
+                .filter(pfx -> fileStatus.getPath().getName().startsWith(pfx)).count() > 0) {
+          continue;
         }
-    }
+        eligibleFiles.add(fileStatus);
+      }
+      // sort them by modification time.
+      eligibleFiles.sort((FileStatus f1, FileStatus f2) -> Long.valueOf(f1.getModificationTime())
+          .compareTo(Long.valueOf(f2.getModificationTime())));
 
-
-    @Override
-    public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr, long maxInputBytes) {
-
-        try {
-            // obtain all eligible files under root folder.
-            List<FileStatus> eligibleFiles = new ArrayList<>();
-            RemoteIterator<LocatedFileStatus> fitr = fs.listFiles(new Path(config.getString(Config.ROOT_INPUT_PATH_PROP)), true);
-            while (fitr.hasNext()) {
-                LocatedFileStatus fileStatus = fitr.next();
-                if (fileStatus.isDirectory() ||
-                        IGNORE_FILEPREFIX_LIST.stream().filter(pfx -> fileStatus.getPath().getName().startsWith(pfx)).count() > 0) {
-                    continue;
-                }
-                eligibleFiles.add(fileStatus);
-            }
-            // sort them by modification time.
-            eligibleFiles.sort((FileStatus f1, FileStatus f2) -> Long.valueOf(f1.getModificationTime()).compareTo(Long.valueOf(f2.getModificationTime())));
-
-            // Filter based on checkpoint & input size, if needed
-            long currentBytes = 0;
-            long maxModificationTime = Long.MIN_VALUE;
-            List<FileStatus> filteredFiles = new ArrayList<>();
-            for (FileStatus f : eligibleFiles) {
-                if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) {
-                    // skip processed files
-                    continue;
-                }
-
-                maxModificationTime = f.getModificationTime();
-                currentBytes += f.getLen();
-                filteredFiles.add(f);
-                if (currentBytes >= maxInputBytes) {
-                    // we have enough data, we are done
-                    break;
-                }
-            }
-
-            // no data to read
-            if (filteredFiles.size() == 0) {
-                return new ImmutablePair<>(Optional.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : String.valueOf(Long.MIN_VALUE));
-            }
-
-            // read the files out.
-            String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
-            String schemaStr = schemaProvider.getSourceSchema().toString();
-            final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);
-
-            return new ImmutablePair<>(Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
-                    String.valueOf(maxModificationTime));
-        } catch (IOException ioe) {
-            throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
+      // Filter based on checkpoint & input size, if needed
+      long currentBytes = 0;
+      long maxModificationTime = Long.MIN_VALUE;
+      List<FileStatus> filteredFiles = new ArrayList<>();
+      for (FileStatus f : eligibleFiles) {
+        if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long
+            .valueOf(lastCheckpointStr.get())) {
+          // skip processed files
+          continue;
         }
+
+        maxModificationTime = f.getModificationTime();
+        currentBytes += f.getLen();
+        filteredFiles.add(f);
+        if (currentBytes >= maxInputBytes) {
+          // we have enough data, we are done
+          break;
+        }
+      }
+
+      // no data to read
+      if (filteredFiles.size() == 0) {
+        return new ImmutablePair<>(Optional.empty(),
+            lastCheckpointStr.isPresent() ? lastCheckpointStr.get()
+                : String.valueOf(Long.MIN_VALUE));
+      }
+
+      // read the files out.
+      String pathStr = filteredFiles.stream().map(f -> f.getPath().toString())
+          .collect(Collectors.joining(","));
+      String schemaStr = schemaProvider.getSourceSchema().toString();
+      final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);
+
+      return new ImmutablePair<>(
+          Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
+          String.valueOf(maxModificationTime));
+    } catch (IOException ioe) {
+      throw new HoodieIOException(
+          "Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
     }
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/HiveIncrPullSource.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/HiveIncrPullSource.java
index 4aceb88b3..08c919366 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/HiveIncrPullSource.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/HiveIncrPullSource.java
@@ -22,7 +22,13 @@ import com.uber.hoodie.DataSourceUtils;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.utilities.schema.SchemaProvider;
-
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.lang3.tuple.ImmutablePair;
@@ -35,98 +41,98 @@ import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
 /**
  * Source to read deltas produced by {@link com.uber.hoodie.utilities.HiveIncrementalPuller}, commit
  * by commit and apply to the target table
  *
  * The general idea here is to have commits sync across the data pipeline.
  *
- *  [Source Tables(s)]  ====> HiveIncrementalScanner  ==> incrPullRootPath ==> targetTable
- *   {c1,c2,c3,...}                                       {c1,c2,c3,...}       {c1,c2,c3,...}
+ * [Source Tables(s)]  ====> HiveIncrementalScanner  ==> incrPullRootPath ==> targetTable
+ * {c1,c2,c3,...}                                       {c1,c2,c3,...}       {c1,c2,c3,...}
  *
  * This produces beautiful causality, that makes data issues in ETLs very easy to debug
- *
  */
 public class HiveIncrPullSource extends Source {
 
-    private static volatile Logger log = LogManager.getLogger(HiveIncrPullSource.class);
+  private static volatile Logger log = LogManager.getLogger(HiveIncrPullSource.class);
 
-    private final transient FileSystem fs;
+  private final transient FileSystem fs;
 
-    private final String incrPullRootPath;
+  private final String incrPullRootPath;
 
 
-    /**
-     * Configs supported
-     */
-    static class Config {
-        private final static String ROOT_INPUT_PATH_PROP = "hoodie.deltastreamer.source.incrpull.root";
+  /**
+   * Configs supported
+   */
+  static class Config {
+
+    private final static String ROOT_INPUT_PATH_PROP = "hoodie.deltastreamer.source.incrpull.root";
+  }
+
+  public HiveIncrPullSource(PropertiesConfiguration config, JavaSparkContext sparkContext,
+      SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
+    super(config, sparkContext, dataFormat, schemaProvider);
+    this.fs = FSUtils.getFs();
+    DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.ROOT_INPUT_PATH_PROP));
+    this.incrPullRootPath = config.getString(Config.ROOT_INPUT_PATH_PROP);
+  }
+
+  /**
+   * Finds the first commit from source, greater than the target's last commit, and reads it out.
+   */
+  private Optional<String> findCommitToPull(Optional<String> latestTargetCommit)
+      throws IOException {
+
+    log.info("Looking for commits ");
+
+    FileStatus[] commitTimePaths = fs.listStatus(new Path(incrPullRootPath));
+    List<String> commitTimes = new ArrayList<>(commitTimePaths.length);
+    for (FileStatus commitTimePath : commitTimePaths) {
+      String[] splits = commitTimePath.getPath().toString().split("/");
+      commitTimes.add(splits[splits.length - 1]);
+    }
+    Collections.sort(commitTimes);
+    log.info("Retrieved commit times " + commitTimes);
+
+    if (!latestTargetCommit.isPresent()) {
+      // start from the beginning
+      return Optional.of(commitTimes.get(0));
     }
 
-    public HiveIncrPullSource(PropertiesConfiguration config, JavaSparkContext sparkContext, SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
-        super(config, sparkContext, dataFormat, schemaProvider);
-        this.fs = FSUtils.getFs();
-        DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.ROOT_INPUT_PATH_PROP));
-        this.incrPullRootPath = config.getString(Config.ROOT_INPUT_PATH_PROP);
+    for (String commitTime : commitTimes) {
+      //TODO(vc): Add an option to delete consumed commits
+      if (commitTime.compareTo(latestTargetCommit.get()) > 0) {
+        return Optional.of(commitTime);
+      }
     }
+    return Optional.empty();
+  }
 
-    /**
-     * Finds the first commit from source, greater than the target's last commit, and reads it out.
-     */
-    private Optional<String> findCommitToPull(Optional<String> latestTargetCommit) throws IOException {
+  @Override
+  public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(
+      Optional<String> lastCheckpointStr, long maxInputBytes) {
+    try {
+      // find the source commit to pull
+      Optional<String> commitToPull = findCommitToPull(lastCheckpointStr);
 
-        log.info("Looking for commits ");
+      if (!commitToPull.isPresent()) {
+        return new ImmutablePair<>(Optional.empty(),
+            lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : "");
+      }
 
-        FileStatus[] commitTimePaths = fs.listStatus(new Path(incrPullRootPath));
-        List<String> commitTimes = new ArrayList<>(commitTimePaths.length);
-        for (FileStatus commitTimePath : commitTimePaths) {
-            String[] splits = commitTimePath.getPath().toString().split("/");
-            commitTimes.add(splits[splits.length - 1]);
-        }
-        Collections.sort(commitTimes);
-        log.info("Retrieved commit times " + commitTimes);
-
-        if (!latestTargetCommit.isPresent()) {
-            // start from the beginning
-            return Optional.of(commitTimes.get(0));
-        }
-
-        for (String commitTime : commitTimes) {
-            //TODO(vc): Add an option to delete consumed commits
-            if (commitTime.compareTo(latestTargetCommit.get()) > 0) {
-                return Optional.of(commitTime);
-            }
-        }
-        return Optional.empty();
-    }
-
-    @Override
-    public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr, long maxInputBytes) {
-        try {
-            // find the source commit to pull
-            Optional<String> commitToPull = findCommitToPull(lastCheckpointStr);
-
-            if (!commitToPull.isPresent()) {
-                return new ImmutablePair<>(Optional.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : "");
-            }
-
-            // read the files out.
-            List<FileStatus> commitDeltaFiles = Arrays.asList(fs.listStatus(new Path(incrPullRootPath, commitToPull.get())));
-            String pathStr = commitDeltaFiles.stream().map(f -> f.getPath().toString()).collect(Collectors.joining(","));
-            String schemaStr = schemaProvider.getSourceSchema().toString();
-            final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);
-            return new ImmutablePair<>(Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
-                    String.valueOf(commitToPull.get()));
-        } catch (IOException ioe) {
-            throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
-        }
+      // read the files out.
+      List<FileStatus> commitDeltaFiles = Arrays
+          .asList(fs.listStatus(new Path(incrPullRootPath, commitToPull.get())));
+      String pathStr = commitDeltaFiles.stream().map(f -> f.getPath().toString())
+          .collect(Collectors.joining(","));
+      String schemaStr = schemaProvider.getSourceSchema().toString();
+      final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);
+      return new ImmutablePair<>(
+          Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
+          String.valueOf(commitToPull.get()));
+    } catch (IOException ioe) {
+      throw new HoodieIOException(
+          "Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
     }
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/KafkaSource.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/KafkaSource.java
index f1738a385..2f2941e5d 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/KafkaSource.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/KafkaSource.java
@@ -22,20 +22,6 @@ import com.uber.hoodie.DataSourceUtils;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import com.uber.hoodie.utilities.exception.HoodieDeltaStreamerException;
 import com.uber.hoodie.utilities.schema.SchemaProvider;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.commons.configuration.PropertiesConfiguration;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.streaming.kafka.KafkaCluster;
-import org.apache.spark.streaming.kafka.KafkaUtils;
-import org.apache.spark.streaming.kafka.OffsetRange;
-import kafka.common.TopicAndPartition;
-
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -48,8 +34,19 @@ import java.util.Spliterators;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
-
+import kafka.common.TopicAndPartition;
 import kafka.serializer.DefaultDecoder;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.configuration.PropertiesConfiguration;
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.streaming.kafka.KafkaCluster;
+import org.apache.spark.streaming.kafka.KafkaUtils;
+import org.apache.spark.streaming.kafka.OffsetRange;
 import scala.Predef;
 import scala.Tuple2;
 import scala.collection.JavaConverters;
@@ -65,183 +62,192 @@ import scala.util.Either;
  */
 public class KafkaSource extends Source {
 
-    private static volatile Logger log = LogManager.getLogger(KafkaSource.class);
+  private static volatile Logger log = LogManager.getLogger(KafkaSource.class);
 
 
-    static class CheckpointUtils {
+  static class CheckpointUtils {
 
-        /**
-         * Reconstruct checkpoint from string.
-         *
-         * @param checkpointStr
-         * @return
-         */
-        public static HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> strToOffsets(String checkpointStr) {
-            HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> offsetMap = new HashMap<>();
-            String[] splits = checkpointStr.split(",");
-            String topic = splits[0];
-            for (int i = 1; i < splits.length; i++) {
-                String[] subSplits = splits[i].split(":");
-                offsetMap.put(new TopicAndPartition(topic, Integer.parseInt(subSplits[0])),
-                        new KafkaCluster.LeaderOffset("", -1, Long.parseLong(subSplits[1])));
-            }
-            return offsetMap;
-        }
-
-        /**
-         * String representation of checkpoint
-         *
-         * Format:
-         * topic1,0:offset0,1:offset1,2:offset2, .....
-         *
-         * @param offsetMap
-         * @return
-         */
-        public static String offsetsToStr(HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> offsetMap) {
-            StringBuilder sb = new StringBuilder();
-            // atleast 1 partition will be present.
-            sb.append(offsetMap.entrySet().stream().findFirst().get().getKey().topic() + ",");
-            sb.append(offsetMap.entrySet().stream()
-                    .map(e -> String.format("%s:%d",e.getKey().partition(), e.getValue().offset()))
-                    .collect(Collectors.joining(",")));
-            return sb.toString();
-        }
-
-        public static OffsetRange[] computeOffsetRanges(HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> fromOffsetMap,
-                                                        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> toOffsetMap) {
-            Comparator<OffsetRange> byPartition = (OffsetRange o1, OffsetRange o2) -> {
-                return Integer.valueOf(o1.partition()).compareTo(Integer.valueOf(o2.partition()));
-            };
-            List<OffsetRange> offsetRanges = toOffsetMap.entrySet().stream().map(e -> {
-                TopicAndPartition tp = e.getKey();
-                long fromOffset = -1;
-                if (fromOffsetMap.containsKey(tp)){
-                    fromOffset = fromOffsetMap.get(tp).offset();
-                }
-                return OffsetRange.create(tp, fromOffset, e.getValue().offset());
-            }).sorted(byPartition).collect(Collectors.toList());
-
-            OffsetRange[] ranges = new OffsetRange[offsetRanges.size()];
-            return offsetRanges.toArray(ranges);
-        }
-
-        public static long totalNewMessages(OffsetRange[] ranges) {
-            long totalMsgs = 0;
-            for (OffsetRange range: ranges) {
-                totalMsgs += Math.max(range.untilOffset()-range.fromOffset(), 0);
-            }
-            return totalMsgs;
-        }
+    /**
+     * Reconstruct checkpoint from string.
+     */
+    public static HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> strToOffsets(
+        String checkpointStr) {
+      HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> offsetMap = new HashMap<>();
+      String[] splits = checkpointStr.split(",");
+      String topic = splits[0];
+      for (int i = 1; i < splits.length; i++) {
+        String[] subSplits = splits[i].split(":");
+        offsetMap.put(new TopicAndPartition(topic, Integer.parseInt(subSplits[0])),
+            new KafkaCluster.LeaderOffset("", -1, Long.parseLong(subSplits[1])));
+      }
+      return offsetMap;
     }
 
     /**
-     * Helpers to deal with tricky scala <=> java conversions. (oh my!)
+     * String representation of checkpoint
+     *
+     * Format: topic1,0:offset0,1:offset1,2:offset2, .....
      */
-    static class ScalaHelpers {
-        public static <K,V>  Map<K, V> toScalaMap(HashMap<K, V> m) {
-            return JavaConverters.mapAsScalaMapConverter(m).asScala().toMap(
-                    Predef.<Tuple2<K, V>>conforms()
-            );
-        }
-
-        public static Set<String> toScalaSet(HashSet<String> s) {
-            return JavaConverters.asScalaSetConverter(s).asScala().<String>toSet();
-        }
-
-        public static <K, V>  java.util.Map<K, V> toJavaMap(Map<K, V> m) {
-            return JavaConverters.<K, V>mapAsJavaMapConverter(m).asJava();
-        }
+    public static String offsetsToStr(
+        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> offsetMap) {
+      StringBuilder sb = new StringBuilder();
+      // atleast 1 partition will be present.
+      sb.append(offsetMap.entrySet().stream().findFirst().get().getKey().topic() + ",");
+      sb.append(offsetMap.entrySet().stream()
+          .map(e -> String.format("%s:%d", e.getKey().partition(), e.getValue().offset()))
+          .collect(Collectors.joining(",")));
+      return sb.toString();
     }
 
+    public static OffsetRange[] computeOffsetRanges(
+        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> fromOffsetMap,
+        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> toOffsetMap) {
+      Comparator<OffsetRange> byPartition = (OffsetRange o1, OffsetRange o2) -> {
+        return Integer.valueOf(o1.partition()).compareTo(Integer.valueOf(o2.partition()));
+      };
+      List<OffsetRange> offsetRanges = toOffsetMap.entrySet().stream().map(e -> {
+        TopicAndPartition tp = e.getKey();
+        long fromOffset = -1;
+        if (fromOffsetMap.containsKey(tp)) {
+          fromOffset = fromOffsetMap.get(tp).offset();
+        }
+        return OffsetRange.create(tp, fromOffset, e.getValue().offset());
+      }).sorted(byPartition).collect(Collectors.toList());
 
-    /**
-     * Configs to be passed for this source. All standard Kafka consumer configs are also
-     * respected
-     */
-    static class Config {
-        private final static String KAFKA_TOPIC_NAME = "hoodie.deltastreamer.source.kafka.topic";
-        private final static String DEFAULT_AUTO_RESET_OFFSET = "largest";
+      OffsetRange[] ranges = new OffsetRange[offsetRanges.size()];
+      return offsetRanges.toArray(ranges);
     }
 
+    public static long totalNewMessages(OffsetRange[] ranges) {
+      long totalMsgs = 0;
+      for (OffsetRange range : ranges) {
+        totalMsgs += Math.max(range.untilOffset() - range.fromOffset(), 0);
+      }
+      return totalMsgs;
+    }
+  }
 
-    private HashMap<String, String> kafkaParams;
+  /**
+   * Helpers to deal with tricky scala <=> java conversions. (oh my!)
+   */
+  static class ScalaHelpers {
 
-    private final String topicName;
-
-    public KafkaSource(PropertiesConfiguration config, JavaSparkContext sparkContext, SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
-        super(config, sparkContext, dataFormat, schemaProvider);
-
-        kafkaParams = new HashMap<>();
-        Stream<String> keys = StreamSupport.stream(Spliterators.spliteratorUnknownSize(config.getKeys(), Spliterator.NONNULL), false);
-        keys.forEach(k -> kafkaParams.put(k, config.getString(k)));
-
-        DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.KAFKA_TOPIC_NAME));
-        topicName = config.getString(Config.KAFKA_TOPIC_NAME);
+    public static <K, V> Map<K, V> toScalaMap(HashMap<K, V> m) {
+      return JavaConverters.mapAsScalaMapConverter(m).asScala().toMap(
+          Predef.<Tuple2<K, V>>conforms()
+      );
     }
 
-    @Override
-    public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr, long maxInputBytes) {
-
-        // Obtain current metadata for the topic
-        KafkaCluster cluster = new KafkaCluster(ScalaHelpers.toScalaMap(kafkaParams));
-        Either<ArrayBuffer<Throwable>, Set<TopicAndPartition>> either = cluster.getPartitions(ScalaHelpers.toScalaSet(new HashSet<>(Arrays.asList(topicName))));
-        if (either.isLeft()) {
-            // log errors. and bail out.
-            throw new HoodieDeltaStreamerException("Error obtaining partition metadata", either.left().get().head());
-        }
-        Set<TopicAndPartition> topicPartitions = either.right().get();
-
-        // Determine the offset ranges to read from
-        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> fromOffsets;
-        if (lastCheckpointStr.isPresent()) {
-            fromOffsets = CheckpointUtils.strToOffsets(lastCheckpointStr.get());
-        } else {
-            String autoResetValue = config.getString("auto.offset.reset", Config.DEFAULT_AUTO_RESET_OFFSET);
-            if (autoResetValue.equals("smallest")) {
-                fromOffsets = new HashMap(ScalaHelpers.toJavaMap(cluster.getEarliestLeaderOffsets(topicPartitions).right().get()));
-            } else if (autoResetValue.equals("largest")) {
-                fromOffsets = new HashMap(ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
-            } else {
-                throw new HoodieNotSupportedException("Auto reset value must be one of 'smallest' or 'largest' ");
-            }
-        }
-
-        // Always read until the latest offset
-        HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> toOffsets = new HashMap(ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
-
-
-        // Come up with final set of OffsetRanges to read (account for new partitions)
-        // TODO(vc): Respect maxInputBytes, by estimating number of messages to read each batch from partition size
-        OffsetRange[] offsetRanges = CheckpointUtils.computeOffsetRanges(fromOffsets, toOffsets);
-        long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
-        if (totalNewMsgs <= 0) {
-            return new ImmutablePair<>(Optional.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : CheckpointUtils.offsetsToStr(toOffsets));
-        } else {
-            log.info("About to read " + totalNewMsgs + " from Kafka for topic :" + topicName);
-        }
-
-
-        // Perform the actual read from Kafka
-        JavaRDD<byte[]> kafkaRDD =  KafkaUtils.createRDD(
-                sparkContext,
-                byte[].class,
-                byte[].class,
-                DefaultDecoder.class,
-                DefaultDecoder.class,
-                kafkaParams,
-                offsetRanges).values();
-
-        // Produce a RDD[GenericRecord]
-        final AvroConvertor avroConvertor = new AvroConvertor(schemaProvider.getSourceSchema().toString());
-        JavaRDD<GenericRecord> newDataRDD;
-        if (dataFormat == SourceDataFormat.AVRO) {
-            newDataRDD = kafkaRDD.map(bytes -> avroConvertor.fromAvroBinary(bytes));
-        } else if (dataFormat == SourceDataFormat.JSON) {
-            newDataRDD = kafkaRDD.map(bytes -> avroConvertor.fromJson(new String(bytes, Charset.forName("utf-8"))));
-        } else {
-            throw new HoodieNotSupportedException("Unsupport data format :" + dataFormat);
-        }
-
-        return new ImmutablePair<>(Optional.of(newDataRDD), CheckpointUtils.offsetsToStr(toOffsets));
+    public static Set<String> toScalaSet(HashSet<String> s) {
+      return JavaConverters.asScalaSetConverter(s).asScala().<String>toSet();
     }
+
+    public static <K, V> java.util.Map<K, V> toJavaMap(Map<K, V> m) {
+      return JavaConverters.<K, V>mapAsJavaMapConverter(m).asJava();
+    }
+  }
+
+
+  /**
+   * Configs to be passed for this source. All standard Kafka consumer configs are also respected
+   */
+  static class Config {
+
+    private final static String KAFKA_TOPIC_NAME = "hoodie.deltastreamer.source.kafka.topic";
+    private final static String DEFAULT_AUTO_RESET_OFFSET = "largest";
+  }
+
+
+  private HashMap<String, String> kafkaParams;
+
+  private final String topicName;
+
+  public KafkaSource(PropertiesConfiguration config, JavaSparkContext sparkContext,
+      SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
+    super(config, sparkContext, dataFormat, schemaProvider);
+
+    kafkaParams = new HashMap<>();
+    Stream<String> keys = StreamSupport
+        .stream(Spliterators.spliteratorUnknownSize(config.getKeys(), Spliterator.NONNULL), false);
+    keys.forEach(k -> kafkaParams.put(k, config.getString(k)));
+
+    DataSourceUtils.checkRequiredProperties(config, Arrays.asList(Config.KAFKA_TOPIC_NAME));
+    topicName = config.getString(Config.KAFKA_TOPIC_NAME);
+  }
+
+  @Override
+  public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(
+      Optional<String> lastCheckpointStr, long maxInputBytes) {
+
+    // Obtain current metadata for the topic
+    KafkaCluster cluster = new KafkaCluster(ScalaHelpers.toScalaMap(kafkaParams));
+    Either<ArrayBuffer<Throwable>, Set<TopicAndPartition>> either = cluster
+        .getPartitions(ScalaHelpers.toScalaSet(new HashSet<>(Arrays.asList(topicName))));
+    if (either.isLeft()) {
+      // log errors. and bail out.
+      throw new HoodieDeltaStreamerException("Error obtaining partition metadata",
+          either.left().get().head());
+    }
+    Set<TopicAndPartition> topicPartitions = either.right().get();
+
+    // Determine the offset ranges to read from
+    HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> fromOffsets;
+    if (lastCheckpointStr.isPresent()) {
+      fromOffsets = CheckpointUtils.strToOffsets(lastCheckpointStr.get());
+    } else {
+      String autoResetValue = config
+          .getString("auto.offset.reset", Config.DEFAULT_AUTO_RESET_OFFSET);
+      if (autoResetValue.equals("smallest")) {
+        fromOffsets = new HashMap(ScalaHelpers
+            .toJavaMap(cluster.getEarliestLeaderOffsets(topicPartitions).right().get()));
+      } else if (autoResetValue.equals("largest")) {
+        fromOffsets = new HashMap(
+            ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
+      } else {
+        throw new HoodieNotSupportedException(
+            "Auto reset value must be one of 'smallest' or 'largest' ");
+      }
+    }
+
+    // Always read until the latest offset
+    HashMap<TopicAndPartition, KafkaCluster.LeaderOffset> toOffsets = new HashMap(
+        ScalaHelpers.toJavaMap(cluster.getLatestLeaderOffsets(topicPartitions).right().get()));
+
+    // Come up with final set of OffsetRanges to read (account for new partitions)
+    // TODO(vc): Respect maxInputBytes, by estimating number of messages to read each batch from partition size
+    OffsetRange[] offsetRanges = CheckpointUtils.computeOffsetRanges(fromOffsets, toOffsets);
+    long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
+    if (totalNewMsgs <= 0) {
+      return new ImmutablePair<>(Optional.empty(),
+          lastCheckpointStr.isPresent() ? lastCheckpointStr.get()
+              : CheckpointUtils.offsetsToStr(toOffsets));
+    } else {
+      log.info("About to read " + totalNewMsgs + " from Kafka for topic :" + topicName);
+    }
+
+    // Perform the actual read from Kafka
+    JavaRDD<byte[]> kafkaRDD = KafkaUtils.createRDD(
+        sparkContext,
+        byte[].class,
+        byte[].class,
+        DefaultDecoder.class,
+        DefaultDecoder.class,
+        kafkaParams,
+        offsetRanges).values();
+
+    // Produce a RDD[GenericRecord]
+    final AvroConvertor avroConvertor = new AvroConvertor(
+        schemaProvider.getSourceSchema().toString());
+    JavaRDD<GenericRecord> newDataRDD;
+    if (dataFormat == SourceDataFormat.AVRO) {
+      newDataRDD = kafkaRDD.map(bytes -> avroConvertor.fromAvroBinary(bytes));
+    } else if (dataFormat == SourceDataFormat.JSON) {
+      newDataRDD = kafkaRDD
+          .map(bytes -> avroConvertor.fromJson(new String(bytes, Charset.forName("utf-8"))));
+    } else {
+      throw new HoodieNotSupportedException("Unsupport data format :" + dataFormat);
+    }
+
+    return new ImmutablePair<>(Optional.of(newDataRDD), CheckpointUtils.offsetsToStr(toOffsets));
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/Source.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/Source.java
index b44ca614f..d8ff58e89 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/Source.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/Source.java
@@ -19,50 +19,46 @@
 package com.uber.hoodie.utilities.sources;
 
 import com.uber.hoodie.utilities.schema.SchemaProvider;
-
+import java.io.Serializable;
+import java.util.Optional;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.configuration.PropertiesConfiguration;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.Serializable;
-import java.util.Optional;
-
 /**
  * Represents a source from which we can tail data. Assumes a constructor that takes properties.
  */
 public abstract class Source implements Serializable {
 
-    protected transient PropertiesConfiguration config;
+  protected transient PropertiesConfiguration config;
 
-    protected transient JavaSparkContext sparkContext;
+  protected transient JavaSparkContext sparkContext;
 
-    protected transient SourceDataFormat dataFormat;
+  protected transient SourceDataFormat dataFormat;
 
-    protected transient SchemaProvider schemaProvider;
+  protected transient SchemaProvider schemaProvider;
 
 
-    protected Source(PropertiesConfiguration config, JavaSparkContext sparkContext, SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
-        this.config = config;
-        this.sparkContext = sparkContext;
-        this.dataFormat = dataFormat;
-        this.schemaProvider = schemaProvider;
-    }
+  protected Source(PropertiesConfiguration config, JavaSparkContext sparkContext,
+      SourceDataFormat dataFormat, SchemaProvider schemaProvider) {
+    this.config = config;
+    this.sparkContext = sparkContext;
+    this.dataFormat = dataFormat;
+    this.schemaProvider = schemaProvider;
+  }
 
-    /**
-     * Fetches new data upto maxInputBytes, from the provided checkpoint and returns an RDD of the data,
-     * as well as the checkpoint to be written as a result of that.
-     *
-     * @param lastCheckpointStr
-     * @param maxInputBytes
-     * @return
-     */
-    public abstract Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr,
-                                                                      long maxInputBytes);
+  /**
+   * Fetches new data upto maxInputBytes, from the provided checkpoint and returns an RDD of the
+   * data, as well as the checkpoint to be written as a result of that.
+   */
+  public abstract Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(
+      Optional<String> lastCheckpointStr,
+      long maxInputBytes);
 
 
-    public PropertiesConfiguration getConfig() {
-        return config;
-    }
+  public PropertiesConfiguration getConfig() {
+    return config;
+  }
 }
diff --git a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/SourceDataFormat.java b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/SourceDataFormat.java
index 229d7ff39..12596a7cd 100644
--- a/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/SourceDataFormat.java
+++ b/hoodie-utilities/src/main/java/com/uber/hoodie/utilities/sources/SourceDataFormat.java
@@ -22,8 +22,8 @@ package com.uber.hoodie.utilities.sources;
  * Format of the data within source.
  */
 public enum SourceDataFormat {
-    AVRO, // No conversion needed explicitly to avro
-    JSON, // we will try to convert to avro
-    ROW, // Will be added later, so we can plug/play with spark sources.
-    CUSTOM // the source is responsible for conversion to avro.
+  AVRO, // No conversion needed explicitly to avro
+  JSON, // we will try to convert to avro
+  ROW, // Will be added later, so we can plug/play with spark sources.
+  CUSTOM // the source is responsible for conversion to avro.
 }
diff --git a/hoodie-utilities/src/main/resources/delta-streamer-config/hoodie-client.properties b/hoodie-utilities/src/main/resources/delta-streamer-config/hoodie-client.properties
index 0a578f439..81f928b01 100644
--- a/hoodie-utilities/src/main/resources/delta-streamer-config/hoodie-client.properties
+++ b/hoodie-utilities/src/main/resources/delta-streamer-config/hoodie-client.properties
@@ -15,5 +15,4 @@
 #
 #
 #
-
 hoodie.upsert.shuffle.parallelism=2
diff --git a/hoodie-utilities/src/main/resources/delta-streamer-config/key-generator.properties b/hoodie-utilities/src/main/resources/delta-streamer-config/key-generator.properties
index e98189d99..c75201780 100644
--- a/hoodie-utilities/src/main/resources/delta-streamer-config/key-generator.properties
+++ b/hoodie-utilities/src/main/resources/delta-streamer-config/key-generator.properties
@@ -15,6 +15,5 @@
 #
 #
 #
-
 hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.partitionpath.field=driver
diff --git a/hoodie-utilities/src/main/resources/delta-streamer-config/schema-provider.properties b/hoodie-utilities/src/main/resources/delta-streamer-config/schema-provider.properties
index 187cd1193..1842069de 100644
--- a/hoodie-utilities/src/main/resources/delta-streamer-config/schema-provider.properties
+++ b/hoodie-utilities/src/main/resources/delta-streamer-config/schema-provider.properties
@@ -14,7 +14,5 @@
 # limitations under the License.
 #
 #
-
-
 hoodie.deltastreamer.filebased.schemaprovider.source.schema.file=file:///Users/vinoth/bin/hoodie/hoodie-utilities/src/main/resources/delta-streamer-config/source.avsc
 hoodie.deltastreamer.filebased.schemaprovider.target.schema.file=file:///Users/vinoth/bin/hoodie/hoodie-utilities/src/main/resources/delta-streamer-config/target.avsc
diff --git a/hoodie-utilities/src/main/resources/delta-streamer-config/source.properties b/hoodie-utilities/src/main/resources/delta-streamer-config/source.properties
index 85489c5ec..6e698db1d 100644
--- a/hoodie-utilities/src/main/resources/delta-streamer-config/source.properties
+++ b/hoodie-utilities/src/main/resources/delta-streamer-config/source.properties
@@ -15,10 +15,8 @@
 #
 #
 #
-
 # DFS Source
 hoodie.deltastreamer.source.dfs.root=file:///tmp/hoodie-dfs-input
-
 # Kafka Source
 hoodie.deltastreamer.source.kafka.topic=uber_trips
 metadata.broker.list=localhost:9092
diff --git a/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHDFSParquetImporter.java b/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHDFSParquetImporter.java
index 38feb184c..af0a52330 100644
--- a/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHDFSParquetImporter.java
+++ b/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHDFSParquetImporter.java
@@ -55,237 +55,240 @@ import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestHDFSParquetImporter implements Serializable {
-    private static String dfsBasePath;
-    private static HdfsTestService hdfsTestService;
-    private static MiniDFSCluster dfsCluster;
-    private static DistributedFileSystem dfs;
+
+  private static String dfsBasePath;
+  private static HdfsTestService hdfsTestService;
+  private static MiniDFSCluster dfsCluster;
+  private static DistributedFileSystem dfs;
 
 
-    @BeforeClass
-    public static void initClass() throws Exception {
-        hdfsTestService = new HdfsTestService();
-        dfsCluster = hdfsTestService.start(true);
+  @BeforeClass
+  public static void initClass() throws Exception {
+    hdfsTestService = new HdfsTestService();
+    dfsCluster = hdfsTestService.start(true);
 
-        // Create a temp folder as the base path
-        dfs = dfsCluster.getFileSystem();
-        dfsBasePath = dfs.getWorkingDirectory().toString();
-        dfs.mkdirs(new Path(dfsBasePath));
-        FSUtils.setFs(dfs);
+    // Create a temp folder as the base path
+    dfs = dfsCluster.getFileSystem();
+    dfsBasePath = dfs.getWorkingDirectory().toString();
+    dfs.mkdirs(new Path(dfsBasePath));
+    FSUtils.setFs(dfs);
+  }
+
+  @AfterClass
+  public static void cleanupClass() throws Exception {
+    if (hdfsTestService != null) {
+      hdfsTestService.stop();
     }
+    FSUtils.setFs(null);
+  }
 
-    @AfterClass
-    public static void cleanupClass() throws Exception {
-        if (hdfsTestService != null) {
-            hdfsTestService.stop();
+  /**
+   * Test successful data import with retries.
+   */
+  @Test
+  public void testDatasetImportWithRetries() throws Exception {
+    JavaSparkContext jsc = null;
+    try {
+      jsc = getJavaSparkContext();
+
+      // Test root folder.
+      String basePath = (new Path(dfsBasePath,
+          Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
+
+      // Hoodie root folder
+      Path hoodieFolder = new Path(basePath, "testTarget");
+
+      // Create schema file.
+      String schemaFile = new Path(basePath, "file.schema").toString();
+
+      //Create generic records.
+      Path srcFolder = new Path(basePath, "testSrc");
+      createRecords(srcFolder);
+
+      HDFSParquetImporter.Config cfg = getHDFSParquetImporterConfig(srcFolder.toString(),
+          hoodieFolder.toString(),
+          "testTable", "COPY_ON_WRITE", "_row_key", "timestamp",
+          1, schemaFile);
+      AtomicInteger retry = new AtomicInteger(3);
+      AtomicInteger fileCreated = new AtomicInteger(0);
+      HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg) {
+        @Override
+        protected int dataImport(JavaSparkContext jsc) throws IOException {
+          int ret = super.dataImport(jsc);
+          if (retry.decrementAndGet() == 0) {
+            fileCreated.incrementAndGet();
+            createSchemaFile(schemaFile);
+          }
+
+          return ret;
         }
-        FSUtils.setFs(null);
-    }
+      };
+      // Schema file is not created so this operation should fail.
+      assertEquals(0, dataImporter.dataImport(jsc, retry.get()));
+      assertEquals(retry.get(), -1);
+      assertEquals(fileCreated.get(), 1);
 
-    /**
-     * Test successful data import with retries.
-     */
-    @Test
-    public void testDatasetImportWithRetries() throws Exception {
-        JavaSparkContext jsc = null;
-        try {
-            jsc = getJavaSparkContext();
+      // Check if
+      // 1. .commit file is present
+      // 2. number of records in each partition == 24
+      // 3. total number of partitions == 4;
+      boolean isCommitFilePresent = false;
+      Map<String, Long> recordCounts = new HashMap<String, Long>();
+      RemoteIterator<LocatedFileStatus> hoodieFiles = dfs.listFiles(hoodieFolder, true);
+      while (hoodieFiles.hasNext()) {
+        LocatedFileStatus f = hoodieFiles.next();
+        isCommitFilePresent =
+            isCommitFilePresent || f.getPath().toString().endsWith(HoodieTimeline.COMMIT_EXTENSION);
 
-            // Test root folder.
-            String basePath = (new Path(dfsBasePath,
-                Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
-
-            // Hoodie root folder
-            Path hoodieFolder = new Path(basePath, "testTarget");
-
-            // Create schema file.
-            String schemaFile = new Path(basePath, "file.schema").toString();
-
-
-            //Create generic records.
-            Path srcFolder = new Path(basePath, "testSrc");
-            createRecords(srcFolder);
-
-            HDFSParquetImporter.Config cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
-                "testTable", "COPY_ON_WRITE", "_row_key", "timestamp",
-                1, schemaFile);
-            AtomicInteger retry = new AtomicInteger(3);
-            AtomicInteger fileCreated = new AtomicInteger(0);
-            HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg) {
-                @Override
-                protected int dataImport(JavaSparkContext jsc) throws IOException {
-                    int ret = super.dataImport(jsc);
-                    if (retry.decrementAndGet() == 0) {
-                        fileCreated.incrementAndGet();
-                        createSchemaFile(schemaFile);
-                    }
-
-                    return ret;
-                }
-            };
-            // Schema file is not created so this operation should fail.
-            assertEquals(0, dataImporter.dataImport(jsc, retry.get()));
-            assertEquals(retry.get(), -1);
-            assertEquals(fileCreated.get(), 1);
-
-            // Check if
-            // 1. .commit file is present
-            // 2. number of records in each partition == 24
-            // 3. total number of partitions == 4;
-            boolean isCommitFilePresent = false;
-            Map<String, Long> recordCounts = new HashMap<String, Long>();
-            RemoteIterator<LocatedFileStatus> hoodieFiles = dfs.listFiles(hoodieFolder, true);
-            while (hoodieFiles.hasNext()) {
-                LocatedFileStatus f = hoodieFiles.next();
-                isCommitFilePresent = isCommitFilePresent || f.getPath().toString().endsWith(HoodieTimeline.COMMIT_EXTENSION);
-
-                if (f.getPath().toString().endsWith("parquet")) {
-                    SQLContext sc = new SQLContext(jsc);
-                    String partitionPath = f.getPath().getParent().toString();
-                    long count = sc.read().parquet(f.getPath().toString()).count();
-                    if (!recordCounts.containsKey(partitionPath)) recordCounts.put(partitionPath, 0L);
-                    recordCounts.put(partitionPath, recordCounts.get(partitionPath) + count);
-                }
-            }
-            assertTrue("commit file is missing", isCommitFilePresent);
-            assertEquals("partition is missing", 4, recordCounts.size());
-            for (Entry<String, Long> e : recordCounts.entrySet()) {
-                assertEquals( "missing records", 24, e.getValue().longValue());
-            }
-        } finally {
-            if (jsc != null) {
-                jsc.stop();
-            }
+        if (f.getPath().toString().endsWith("parquet")) {
+          SQLContext sc = new SQLContext(jsc);
+          String partitionPath = f.getPath().getParent().toString();
+          long count = sc.read().parquet(f.getPath().toString()).count();
+          if (!recordCounts.containsKey(partitionPath)) {
+            recordCounts.put(partitionPath, 0L);
+          }
+          recordCounts.put(partitionPath, recordCounts.get(partitionPath) + count);
         }
+      }
+      assertTrue("commit file is missing", isCommitFilePresent);
+      assertEquals("partition is missing", 4, recordCounts.size());
+      for (Entry<String, Long> e : recordCounts.entrySet()) {
+        assertEquals("missing records", 24, e.getValue().longValue());
+      }
+    } finally {
+      if (jsc != null) {
+        jsc.stop();
+      }
     }
+  }
 
-    private void createRecords(Path srcFolder) throws ParseException, IOException {
-        Path srcFile = new Path(srcFolder.toString(), "file1.parquet");
-        long startTime = HoodieActiveTimeline.COMMIT_FORMATTER.parse("20170203000000").getTime() / 1000;
-        List<GenericRecord> records = new ArrayList<GenericRecord>();
-        for (long recordNum = 0; recordNum < 96; recordNum++) {
-            records.add(HoodieTestDataGenerator
-                .generateGenericRecord(Long.toString(recordNum), "rider-" + recordNum,
-                    "driver-" + recordNum, startTime +  TimeUnit.HOURS.toSeconds(recordNum)));
-        }
-        ParquetWriter<GenericRecord> writer = AvroParquetWriter
-            .<GenericRecord>builder(srcFile)
-            .withSchema(HoodieTestDataGenerator.avroSchema)
-            .withConf(new Configuration())
-            .build();
-        for (GenericRecord record : records) {
-            writer.write(record);
-        }
-        writer.close();
+  private void createRecords(Path srcFolder) throws ParseException, IOException {
+    Path srcFile = new Path(srcFolder.toString(), "file1.parquet");
+    long startTime = HoodieActiveTimeline.COMMIT_FORMATTER.parse("20170203000000").getTime() / 1000;
+    List<GenericRecord> records = new ArrayList<GenericRecord>();
+    for (long recordNum = 0; recordNum < 96; recordNum++) {
+      records.add(HoodieTestDataGenerator
+          .generateGenericRecord(Long.toString(recordNum), "rider-" + recordNum,
+              "driver-" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum)));
     }
-
-    private void createSchemaFile(String schemaFile) throws IOException {
-        FSDataOutputStream schemaFileOS = dfs.create(new Path(schemaFile));
-        schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes());
-        schemaFileOS.close();
+    ParquetWriter<GenericRecord> writer = AvroParquetWriter
+        .<GenericRecord>builder(srcFile)
+        .withSchema(HoodieTestDataGenerator.avroSchema)
+        .withConf(new Configuration())
+        .build();
+    for (GenericRecord record : records) {
+      writer.write(record);
     }
+    writer.close();
+  }
 
-    /**
-     * Tests for scheme file.
-     * 1. File is missing.
-     * 2. File has invalid data.
-     */
-    @Test
-    public void testSchemaFile() throws Exception {
-        JavaSparkContext jsc = null;
-        try {
-            jsc = getJavaSparkContext();
+  private void createSchemaFile(String schemaFile) throws IOException {
+    FSDataOutputStream schemaFileOS = dfs.create(new Path(schemaFile));
+    schemaFileOS.write(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA.getBytes());
+    schemaFileOS.close();
+  }
 
-            // Test root folder.
-            String basePath = (new Path(dfsBasePath,
-                Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
-            // Hoodie root folder
-            Path hoodieFolder = new Path(basePath, "testTarget");
-            Path srcFolder = new Path(basePath.toString(), "srcTest");
-            Path schemaFile = new Path(basePath.toString(), "missingFile.schema");
-            HDFSParquetImporter.Config cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
-                "testTable", "COPY_ON_WRITE", "_row_key", "timestamp",
-                1, schemaFile.toString());
-            HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
-            // Should fail - return : -1.
-            assertEquals(-1, dataImporter.dataImport(jsc, 0));
+  /**
+   * Tests for scheme file. 1. File is missing. 2. File has invalid data.
+   */
+  @Test
+  public void testSchemaFile() throws Exception {
+    JavaSparkContext jsc = null;
+    try {
+      jsc = getJavaSparkContext();
 
-            dfs.create(schemaFile).write("Random invalid schema data".getBytes());
-            // Should fail - return : -1.
-            assertEquals(-1, dataImporter.dataImport(jsc, 0));
+      // Test root folder.
+      String basePath = (new Path(dfsBasePath,
+          Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
+      // Hoodie root folder
+      Path hoodieFolder = new Path(basePath, "testTarget");
+      Path srcFolder = new Path(basePath.toString(), "srcTest");
+      Path schemaFile = new Path(basePath.toString(), "missingFile.schema");
+      HDFSParquetImporter.Config cfg = getHDFSParquetImporterConfig(srcFolder.toString(),
+          hoodieFolder.toString(),
+          "testTable", "COPY_ON_WRITE", "_row_key", "timestamp",
+          1, schemaFile.toString());
+      HDFSParquetImporter dataImporter = new HDFSParquetImporter(cfg);
+      // Should fail - return : -1.
+      assertEquals(-1, dataImporter.dataImport(jsc, 0));
 
-        } finally {
-            if (jsc != null) {
-                jsc.stop();
-            }
-        }
+      dfs.create(schemaFile).write("Random invalid schema data".getBytes());
+      // Should fail - return : -1.
+      assertEquals(-1, dataImporter.dataImport(jsc, 0));
+
+    } finally {
+      if (jsc != null) {
+        jsc.stop();
+      }
     }
+  }
 
-    /**
-     * Test for missing rowKey and partitionKey.
-     */
-    @Test
-    public void testRowAndPartitionKey() throws Exception {
-        JavaSparkContext jsc = null;
-        try {
-            jsc = getJavaSparkContext();
+  /**
+   * Test for missing rowKey and partitionKey.
+   */
+  @Test
+  public void testRowAndPartitionKey() throws Exception {
+    JavaSparkContext jsc = null;
+    try {
+      jsc = getJavaSparkContext();
 
-            // Test root folder.
-            String basePath = (new Path(dfsBasePath,
-                Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
-            // Hoodie root folder
-            Path hoodieFolder = new Path(basePath, "testTarget");
+      // Test root folder.
+      String basePath = (new Path(dfsBasePath,
+          Thread.currentThread().getStackTrace()[1].getMethodName())).toString();
+      // Hoodie root folder
+      Path hoodieFolder = new Path(basePath, "testTarget");
 
-            //Create generic records.
-            Path srcFolder = new Path(basePath, "testSrc");
-            createRecords(srcFolder);
+      //Create generic records.
+      Path srcFolder = new Path(basePath, "testSrc");
+      createRecords(srcFolder);
 
-            // Create schema file.
-            Path schemaFile = new Path(basePath.toString(), "missingFile.schema");
-            createSchemaFile(schemaFile.toString());
+      // Create schema file.
+      Path schemaFile = new Path(basePath.toString(), "missingFile.schema");
+      createSchemaFile(schemaFile.toString());
 
-            HDFSParquetImporter dataImporter;
-            HDFSParquetImporter.Config cfg;
+      HDFSParquetImporter dataImporter;
+      HDFSParquetImporter.Config cfg;
 
-            // Check for invalid row key.
-            cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
-                "testTable", "COPY_ON_WRITE", "invalidRowKey", "timestamp",
-                1, schemaFile.toString());
-            dataImporter = new HDFSParquetImporter(cfg);
-            assertEquals(-1, dataImporter.dataImport(jsc, 0));
+      // Check for invalid row key.
+      cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
+          "testTable", "COPY_ON_WRITE", "invalidRowKey", "timestamp",
+          1, schemaFile.toString());
+      dataImporter = new HDFSParquetImporter(cfg);
+      assertEquals(-1, dataImporter.dataImport(jsc, 0));
 
-            // Check for invalid partition key.
-            cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
-                "testTable", "COPY_ON_WRITE", "_row_key", "invalidTimeStamp",
-                1, schemaFile.toString());
-            dataImporter = new HDFSParquetImporter(cfg);
-            assertEquals(-1, dataImporter.dataImport(jsc, 0));
+      // Check for invalid partition key.
+      cfg = getHDFSParquetImporterConfig(srcFolder.toString(), hoodieFolder.toString(),
+          "testTable", "COPY_ON_WRITE", "_row_key", "invalidTimeStamp",
+          1, schemaFile.toString());
+      dataImporter = new HDFSParquetImporter(cfg);
+      assertEquals(-1, dataImporter.dataImport(jsc, 0));
 
-        } finally {
-            if (jsc != null) {
-                jsc.stop();
-            }
-        }
+    } finally {
+      if (jsc != null) {
+        jsc.stop();
+      }
     }
+  }
 
-    private HDFSParquetImporter.Config getHDFSParquetImporterConfig(String srcPath, String targetPath,
-        String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
-        String schemaFile) {
-        HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
-        cfg.srcPath = srcPath;
-        cfg.targetPath = targetPath;
-        cfg.tableName = tableName;
-        cfg.tableType = tableType;
-        cfg.rowKey = rowKey;
-        cfg.partitionKey = partitionKey;
-        cfg.parallelism = parallelism;
-        cfg.schemaFile = schemaFile;
-        return cfg;
-    }
+  private HDFSParquetImporter.Config getHDFSParquetImporterConfig(String srcPath, String targetPath,
+      String tableName, String tableType, String rowKey, String partitionKey, int parallelism,
+      String schemaFile) {
+    HDFSParquetImporter.Config cfg = new HDFSParquetImporter.Config();
+    cfg.srcPath = srcPath;
+    cfg.targetPath = targetPath;
+    cfg.tableName = tableName;
+    cfg.tableType = tableType;
+    cfg.rowKey = rowKey;
+    cfg.partitionKey = partitionKey;
+    cfg.parallelism = parallelism;
+    cfg.schemaFile = schemaFile;
+    return cfg;
+  }
 
-    private JavaSparkContext getJavaSparkContext() {
-        // Initialize a local spark env
-        SparkConf sparkConf = new SparkConf().setAppName("TestConversionCommand").setMaster("local[1]");
-        sparkConf = HoodieWriteClient.registerClasses(sparkConf);
-        return new JavaSparkContext(HoodieReadClient.addHoodieSupport(sparkConf));
-    }
+  private JavaSparkContext getJavaSparkContext() {
+    // Initialize a local spark env
+    SparkConf sparkConf = new SparkConf().setAppName("TestConversionCommand").setMaster("local[1]");
+    sparkConf = HoodieWriteClient.registerClasses(sparkConf);
+    return new JavaSparkContext(HoodieReadClient.addHoodieSupport(sparkConf));
+  }
 }
diff --git a/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHoodieSnapshotCopier.java b/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHoodieSnapshotCopier.java
index 33459b9fc..6f9acc489 100644
--- a/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHoodieSnapshotCopier.java
+++ b/hoodie-utilities/src/test/java/com/uber/hoodie/utilities/TestHoodieSnapshotCopier.java
@@ -16,9 +16,15 @@
 
 package com.uber.hoodie.utilities;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import com.uber.hoodie.common.HoodieTestDataGenerator;
 import com.uber.hoodie.common.model.HoodieTestUtils;
 import com.uber.hoodie.common.util.FSUtils;
+import java.io.File;
+import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.spark.SparkConf;
@@ -28,124 +34,129 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 
-import java.io.File;
-import java.io.IOException;
-
-import static org.junit.Assert.*;
-
 public class TestHoodieSnapshotCopier {
-    private String rootPath = null;
-    private String basePath = null;
-    private String outputPath = null;
-    private FileSystem fs = null;
-    private JavaSparkContext jsc = null;
 
-    @Before
-    public void init() throws IOException {
-        // Prepare directories
-        TemporaryFolder folder = new TemporaryFolder();
-        folder.create();
-        rootPath = folder.getRoot().getAbsolutePath();
-        basePath = rootPath + "/" + HoodieTestUtils.RAW_TRIPS_TEST_NAME;
-        HoodieTestUtils.init(basePath);
-        outputPath = rootPath + "/output";
-        fs = FSUtils.getFs();
-        // Start a local Spark job
-        SparkConf conf = new SparkConf().setAppName("snapshot-test-job").setMaster("local[2]");
-        jsc = new JavaSparkContext(conf);
+  private String rootPath = null;
+  private String basePath = null;
+  private String outputPath = null;
+  private FileSystem fs = null;
+  private JavaSparkContext jsc = null;
+
+  @Before
+  public void init() throws IOException {
+    // Prepare directories
+    TemporaryFolder folder = new TemporaryFolder();
+    folder.create();
+    rootPath = folder.getRoot().getAbsolutePath();
+    basePath = rootPath + "/" + HoodieTestUtils.RAW_TRIPS_TEST_NAME;
+    HoodieTestUtils.init(basePath);
+    outputPath = rootPath + "/output";
+    fs = FSUtils.getFs();
+    // Start a local Spark job
+    SparkConf conf = new SparkConf().setAppName("snapshot-test-job").setMaster("local[2]");
+    jsc = new JavaSparkContext(conf);
+  }
+
+  @Test
+  public void testEmptySnapshotCopy() throws IOException {
+    // There is no real data (only .hoodie directory)
+    assertEquals(fs.listStatus(new Path(basePath)).length, 1);
+    assertFalse(fs.exists(new Path(outputPath)));
+
+    // Do the snapshot
+    HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
+    copier.snapshot(jsc, basePath, outputPath, true);
+
+    // Nothing changed; we just bail out
+    assertEquals(fs.listStatus(new Path(basePath)).length, 1);
+    assertFalse(fs.exists(new Path(outputPath + "/_SUCCESS")));
+  }
+
+  //TODO - uncomment this after fixing test failures
+  //@Test
+  public void testSnapshotCopy() throws Exception {
+    // Generate some commits and corresponding parquets
+    String commitTime1 = "20160501010101";
+    String commitTime2 = "20160502020601";
+    String commitTime3 = "20160506030611";
+    new File(basePath + "/.hoodie").mkdirs();
+    new File(basePath + "/.hoodie/hoodie.properties").createNewFile();
+    // Only first two have commit files
+    new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
+    new File(basePath + "/.hoodie/" + commitTime3 + ".inflight").createNewFile();
+
+    // Some parquet files
+    new File(basePath + "/2016/05/01/").mkdirs();
+    new File(basePath + "/2016/05/02/").mkdirs();
+    new File(basePath + "/2016/05/06/").mkdirs();
+    HoodieTestDataGenerator.writePartitionMetadata(fs,
+        new String[]{"2016/05/01", "2016/05/02", "2016/05/06"},
+        basePath);
+    // Make commit1
+    File file11 = new File(
+        basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime1, 1, "id11"));
+    file11.createNewFile();
+    File file12 = new File(
+        basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime1, 1, "id12"));
+    file12.createNewFile();
+    File file13 = new File(
+        basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime1, 1, "id13"));
+    file13.createNewFile();
+
+    // Make commit2
+    File file21 = new File(
+        basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime2, 1, "id21"));
+    file21.createNewFile();
+    File file22 = new File(
+        basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime2, 1, "id22"));
+    file22.createNewFile();
+    File file23 = new File(
+        basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime2, 1, "id23"));
+    file23.createNewFile();
+
+    // Make commit3
+    File file31 = new File(
+        basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime3, 1, "id31"));
+    file31.createNewFile();
+    File file32 = new File(
+        basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime3, 1, "id32"));
+    file32.createNewFile();
+    File file33 = new File(
+        basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime3, 1, "id33"));
+    file33.createNewFile();
+
+    // Do a snapshot copy
+    HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
+    copier.snapshot(jsc, basePath, outputPath, false);
+
+    // Check results
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/01/" + file11.getName())));
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/02/" + file12.getName())));
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/06/" + file13.getName())));
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/01/" + file21.getName())));
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/02/" + file22.getName())));
+    assertTrue(fs.exists(new Path(outputPath + "/2016/05/06/" + file23.getName())));
+    assertFalse(fs.exists(new Path(outputPath + "/2016/05/01/" + file31.getName())));
+    assertFalse(fs.exists(new Path(outputPath + "/2016/05/02/" + file32.getName())));
+    assertFalse(fs.exists(new Path(outputPath + "/2016/05/06/" + file33.getName())));
+
+    assertTrue(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime1 + ".commit")));
+    assertTrue(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime2 + ".commit")));
+    assertFalse(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime3 + ".commit")));
+    assertFalse(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime3 + ".inflight")));
+    assertTrue(fs.exists(new Path(outputPath + "/.hoodie/hoodie.properties")));
+
+    assertTrue(fs.exists(new Path(outputPath + "/_SUCCESS")));
+  }
+
+  @After
+  public void cleanup() {
+    if (rootPath != null) {
+      new File(rootPath).delete();
     }
-
-    @Test
-    public void testEmptySnapshotCopy() throws IOException {
-        // There is no real data (only .hoodie directory)
-        assertEquals(fs.listStatus(new Path(basePath)).length, 1);
-        assertFalse(fs.exists(new Path(outputPath)));
-
-        // Do the snapshot
-        HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
-        copier.snapshot(jsc, basePath, outputPath, true);
-
-        // Nothing changed; we just bail out
-        assertEquals(fs.listStatus(new Path(basePath)).length, 1);
-        assertFalse(fs.exists(new Path(outputPath + "/_SUCCESS")));
-    }
-
-    //TODO - uncomment this after fixing test failures
-    //@Test
-    public void testSnapshotCopy() throws Exception {
-        // Generate some commits and corresponding parquets
-        String commitTime1 = "20160501010101";
-        String commitTime2 = "20160502020601";
-        String commitTime3 = "20160506030611";
-        new File(basePath + "/.hoodie").mkdirs();
-        new File(basePath + "/.hoodie/hoodie.properties").createNewFile();
-        // Only first two have commit files
-        new File(basePath + "/.hoodie/" + commitTime1 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime2 + ".commit").createNewFile();
-        new File(basePath + "/.hoodie/" + commitTime3 + ".inflight").createNewFile();
-
-        // Some parquet files
-        new File(basePath + "/2016/05/01/").mkdirs();
-        new File(basePath + "/2016/05/02/").mkdirs();
-        new File(basePath + "/2016/05/06/").mkdirs();
-        HoodieTestDataGenerator.writePartitionMetadata(fs,
-                new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
-                basePath);
-        // Make commit1
-        File file11 = new File(basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime1, 1, "id11"));
-        file11.createNewFile();
-        File file12 = new File(basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime1, 1, "id12"));
-        file12.createNewFile();
-        File file13 = new File(basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime1, 1, "id13"));
-        file13.createNewFile();
-
-        // Make commit2
-        File file21 = new File(basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime2, 1, "id21"));
-        file21.createNewFile();
-        File file22 = new File(basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime2, 1, "id22"));
-        file22.createNewFile();
-        File file23 = new File(basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime2, 1, "id23"));
-        file23.createNewFile();
-
-        // Make commit3
-        File file31 = new File(basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime3, 1, "id31"));
-        file31.createNewFile();
-        File file32 = new File(basePath + "/2016/05/02/" + FSUtils.makeDataFileName(commitTime3, 1, "id32"));
-        file32.createNewFile();
-        File file33 = new File(basePath + "/2016/05/06/" + FSUtils.makeDataFileName(commitTime3, 1, "id33"));
-        file33.createNewFile();
-
-        // Do a snapshot copy
-        HoodieSnapshotCopier copier = new HoodieSnapshotCopier();
-        copier.snapshot(jsc, basePath, outputPath, false);
-
-        // Check results
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/01/" + file11.getName())));
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/02/" + file12.getName())));
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/06/" + file13.getName())));
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/01/" + file21.getName())));
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/02/" + file22.getName())));
-        assertTrue(fs.exists(new Path(outputPath + "/2016/05/06/" + file23.getName())));
-        assertFalse(fs.exists(new Path(outputPath + "/2016/05/01/" + file31.getName())));
-        assertFalse(fs.exists(new Path(outputPath + "/2016/05/02/" + file32.getName())));
-        assertFalse(fs.exists(new Path(outputPath + "/2016/05/06/" + file33.getName())));
-
-        assertTrue(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime1 + ".commit")));
-        assertTrue(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime2 + ".commit")));
-        assertFalse(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime3 + ".commit")));
-        assertFalse(fs.exists(new Path(outputPath + "/.hoodie/" + commitTime3 + ".inflight")));
-        assertTrue(fs.exists(new Path(outputPath + "/.hoodie/hoodie.properties")));
-
-        assertTrue(fs.exists(new Path(outputPath + "/_SUCCESS")));
-    }
-
-    @After
-    public void cleanup() {
-        if (rootPath != null) {
-           new File(rootPath).delete();
-        }
-        if (jsc != null) {
-            jsc.stop();
-        }
+    if (jsc != null) {
+      jsc.stop();
     }
+  }
 }
diff --git a/hoodie-utilities/src/test/resources/log4j-surefire.properties b/hoodie-utilities/src/test/resources/log4j-surefire.properties
index eab225528..3613e7d12 100644
--- a/hoodie-utilities/src/test/resources/log4j-surefire.properties
+++ b/hoodie-utilities/src/test/resources/log4j-surefire.properties
@@ -16,7 +16,6 @@
 log4j.rootLogger=WARN, A1
 log4j.category.com.uber=INFO
 log4j.category.org.apache.parquet.hadoop=WARN
-
 # A1 is set to be a ConsoleAppender.
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 # A1 uses PatternLayout.
diff --git a/pom.xml b/pom.xml
index 1c61798d4..b19b801d2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -15,666 +15,673 @@
   ~ limitations under the License.
   -->
 
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
 
-    <groupId>com.uber.hoodie</groupId>
-    <artifactId>hoodie</artifactId>
-    <packaging>pom</packaging>
-    <version>0.4.1-SNAPSHOT</version>
-    <description>Hoodie is a Apache Spark library that provides the ability to efficiently do incremental processing on datasets in HDFS</description>
-    <url>https://github.com/uber/hoodie</url>
-    <name>Hoodie</name>
+  <groupId>com.uber.hoodie</groupId>
+  <artifactId>hoodie</artifactId>
+  <packaging>pom</packaging>
+  <version>0.4.1-SNAPSHOT</version>
+  <description>Hoodie is a Apache Spark library that provides the ability to efficiently do
+    incremental processing on datasets in HDFS
+  </description>
+  <url>https://github.com/uber/hoodie</url>
+  <name>Hoodie</name>
 
-    <modules>
-        <module>hoodie-common</module>
-        <module>hoodie-client</module>
-        <module>hoodie-cli</module>
-        <module>hoodie-hadoop-mr</module>
-        <module>hoodie-hive</module>
-        <module>hoodie-utilities</module>
-        <module>hoodie-spark</module>
-    </modules>
+  <modules>
+    <module>hoodie-common</module>
+    <module>hoodie-client</module>
+    <module>hoodie-cli</module>
+    <module>hoodie-hadoop-mr</module>
+    <module>hoodie-hive</module>
+    <module>hoodie-utilities</module>
+    <module>hoodie-spark</module>
+  </modules>
 
-    <licenses>
-        <license>
-            <name>Apache License, Version 2.0</name>
-            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
-            <distribution>repo</distribution>
-            <comments>A business-friendly OSS license</comments>
-        </license>
-    </licenses>
+  <licenses>
+    <license>
+      <name>Apache License, Version 2.0</name>
+      <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+      <distribution>repo</distribution>
+      <comments>A business-friendly OSS license</comments>
+    </license>
+  </licenses>
 
-    <organization>
-        <name>Uber Technologies Inc.</name>
-        <url>http://www.uber.com/</url>
-    </organization>
+  <organization>
+    <name>Uber Technologies Inc.</name>
+    <url>http://www.uber.com/</url>
+  </organization>
 
-    <developers>
-        <developer>
-            <id>vinoth</id>
-            <name>Vinoth Chandar</name>
-            <organization>Uber</organization>
-        </developer>
-        <developer>
-            <id>prasanna</id>
-            <name>Prasanna Rajaperumal</name>
-            <organization>Uber</organization>
-        </developer>
-    </developers>
+  <developers>
+    <developer>
+      <id>vinoth</id>
+      <name>Vinoth Chandar</name>
+      <organization>Uber</organization>
+    </developer>
+    <developer>
+      <id>prasanna</id>
+      <name>Prasanna Rajaperumal</name>
+      <organization>Uber</organization>
+    </developer>
+  </developers>
 
-    <contributors>
-        <contributor>
-            <name>Wei Yan</name>
-            <organization>Uber</organization>
-        </contributor>
-        <contributor>
-            <name>Siddhartha Gunda</name>
-            <organization>Uber</organization>
-        </contributor>
-        <contributor>
-            <name>Omkar Joshi</name>
-            <organization>Uber</organization>
-        </contributor>
-         <contributor>
-            <name>Zeeshan Qureshi</name>
-            <organization>Shopify</organization>
-        </contributor>
-        <contributor>
-            <name>Kathy Ge</name>
-            <organization>Shopify</organization>
-        </contributor>
-        <contributor>
-            <name>Kaushik Devarajaiah</name>
-            <organization>Uber</organization>
-        </contributor>
-        <contributor>
-            <name>Nishith Agarwal</name>
-            <organization>Uber</organization>
-        </contributor>
-    </contributors>
+  <contributors>
+    <contributor>
+      <name>Wei Yan</name>
+      <organization>Uber</organization>
+    </contributor>
+    <contributor>
+      <name>Siddhartha Gunda</name>
+      <organization>Uber</organization>
+    </contributor>
+    <contributor>
+      <name>Omkar Joshi</name>
+      <organization>Uber</organization>
+    </contributor>
+    <contributor>
+      <name>Zeeshan Qureshi</name>
+      <organization>Shopify</organization>
+    </contributor>
+    <contributor>
+      <name>Kathy Ge</name>
+      <organization>Shopify</organization>
+    </contributor>
+    <contributor>
+      <name>Kaushik Devarajaiah</name>
+      <organization>Uber</organization>
+    </contributor>
+    <contributor>
+      <name>Nishith Agarwal</name>
+      <organization>Uber</organization>
+    </contributor>
+  </contributors>
 
-    <inceptionYear>2015-2016</inceptionYear>
+  <inceptionYear>2015-2016</inceptionYear>
+  <dependencies>
+    <dependency>
+      <groupId>com.google.code.gson</groupId>
+      <artifactId>gson</artifactId>
+      <version>2.3.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <properties>
+    <maven-dependency-plugin.version>2.10</maven-dependency-plugin.version>
+    <maven-jar-plugin.version>2.6</maven-jar-plugin.version>
+    <maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
+    <parquet.version>1.8.1</parquet.version>
+    <junit.version>4.11</junit.version>
+    <mockito.version>1.9.5</mockito.version>
+    <log4j.version>1.2.17</log4j.version>
+    <cdh.version>5.7.2</cdh.version>
+    <hadoop.version>2.6.0</hadoop.version>
+    <hive.version>1.1.0</hive.version>
+    <metrics.version>3.1.1</metrics.version>
+    <spark.version>2.1.0</spark.version>
+    <scala.version>2.11.8</scala.version>
+    <scala.libversion>2.11</scala.libversion>
+  </properties>
+
+  <scm>
+    <connection>scm:git:git@github.com:uber/hoodie.git</connection>
+    <developerConnection>scm:git:git@github.com:uber/hoodie.git</developerConnection>
+    <url>git@github.com:uber/hoodie.git</url>
+    <tag>HEAD</tag>
+  </scm>
+
+  <mailingLists>
+    <mailingList>
+      <name>User List</name>
+      <post>hoodie-user@googlegroups.com</post>
+      <archive>https://groups.google.com/d/forum/hoodie-user/</archive>
+    </mailingList>
+    <mailingList>
+      <name>Developer List</name>
+      <post>hoodie-dev@googlegroups.com</post>
+      <archive>https://groups.google.com/d/forum/hoodie-dev/</archive>
+    </mailingList>
+  </mailingLists>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-release-plugin</artifactId>
+        <version>2.5.3</version>
+        <configuration>
+          <autoVersionSubmodules>true</autoVersionSubmodules>
+          <useReleaseProfile>false</useReleaseProfile>
+          <releaseProfiles>release</releaseProfiles>
+          <goals>deploy</goals>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>${maven-surefire-plugin.version}</version>
+        <configuration>
+          <!-- Sets the VM argument line used when unit tests are run. -->
+          <argLine>${surefireArgLine}</argLine>
+          <systemPropertyVariables>
+            <log4j.configuration>
+              file:${project.build.testOutputDirectory}/log4j-surefire.properties
+            </log4j.configuration>
+          </systemPropertyVariables>
+          <!-- Excludes integration tests when unit tests are run. -->
+          <excludes>
+            <exclude>**/IT*.java</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <version>${maven-dependency-plugin.version}</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>${maven-jar-plugin.version}</version>
+        </plugin>
+        <plugin>
+          <groupId>org.jacoco</groupId>
+          <artifactId>jacoco-maven-plugin</artifactId>
+          <version>0.7.8</version>
+          <executions>
+            <!--
+                Prepares the property pointing to the JaCoCo runtime agent which
+                is passed as VM argument when Maven the Surefire plugin is executed.
+            -->
+            <execution>
+              <id>pre-unit-test</id>
+              <goals>
+                <goal>prepare-agent</goal>
+              </goals>
+              <configuration>
+                <!-- Sets the path to the file which contains the execution data. -->
+                <destFile>${project.build.directory}/coverage-reports/jacoco-ut.exec</destFile>
+                <!--
+                    Sets the name of the property containing the settings
+                    for JaCoCo runtime agent.
+                -->
+                <propertyName>surefireArgLine</propertyName>
+              </configuration>
+            </execution>
+            <!--
+                Ensures that the code coverage report for unit tests is created after
+                unit tests have been run.
+            -->
+            <execution>
+              <id>post-unit-test</id>
+              <phase>test</phase>
+              <goals>
+                <goal>report</goal>
+              </goals>
+              <configuration>
+                <!-- Sets the path to the file which contains the execution data. -->
+                <dataFile>${project.build.directory}/coverage-reports/jacoco-ut.exec</dataFile>
+                <!-- Sets the output directory for the code coverage report. -->
+                <outputDirectory>${project.reporting.outputDirectory}/jacoco-ut</outputDirectory>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+        <!--<plugin>-->
+        <!--<groupId>org.codehaus.mojo</groupId>-->
+        <!--<artifactId>cobertura-maven-plugin</artifactId>-->
+        <!--<version>2.7</version>-->
+        <!--<configuration>-->
+        <!--<formats>-->
+        <!--<format>html</format>-->
+        <!--<format>xml</format>-->
+        <!--</formats>-->
+        <!--</configuration>-->
+        <!--<executions>-->
+        <!--<execution>-->
+        <!--<phase>test</phase>-->
+        <!--<goals>-->
+        <!--<goal>cobertura</goal>-->
+        <!--</goals>-->
+        <!--</execution>-->
+        <!--</executions>-->
+        <!--<dependencies>-->
+        <!--<dependency>-->
+        <!--<groupId>org.ow2.asm</groupId>-->
+        <!--<artifactId>asm</artifactId>-->
+        <!--<version>5.0.3</version>-->
+        <!--</dependency>-->
+        <!--</dependencies>-->
+        <!--</plugin>-->
+        <plugin>
+          <!-- excludes are inherited -->
+          <groupId>org.apache.rat</groupId>
+          <artifactId>apache-rat-plugin</artifactId>
+          <version>0.11</version>
+          <configuration>
+            <excludes>
+              <exclude>**/.*</exclude>
+              <exclude>**/*.txt</exclude>
+              <exclude>**/*.sh</exclude>
+              <exclude>**/*.log</exclude>
+              <exclude>**/dependency-reduced-pom.xml</exclude>
+              <exclude>**/test/resources/*.avro</exclude>
+              <exclude>**/test/resources/*.data</exclude>
+              <exclude>**/test/resources/*.schema</exclude>
+              <exclude>**/test/resources/*.csv</exclude>
+              <exclude>**/main/avro/*.avsc</exclude>
+              <exclude>**/target/*</exclude>
+            </excludes>
+          </configuration>
+          <executions>
+            <execution>
+              <phase>package</phase>
+              <goals>
+                <goal>check</goal>
+              </goals>
+            </execution>
+          </executions>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro-maven-plugin</artifactId>
+          <version>1.7.6</version>
+          <executions>
+            <execution>
+              <phase>generate-sources</phase>
+              <goals>
+                <goal>schema</goal>
+              </goals>
+              <configuration>
+                <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
+                <outputDirectory>${project.build.directory}/generated-sources/src/main/java/
+                </outputDirectory>
+                <stringType>String</stringType>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
+
+  <dependencyManagement>
     <dependencies>
-        <dependency>
-            <groupId>com.google.code.gson</groupId>
-            <artifactId>gson</artifactId>
-            <version>2.3.1</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>${junit.version}</version>
-            <scope>test</scope>
-        </dependency>
+
+      <dependency>
+        <groupId>com.beust</groupId>
+        <artifactId>jcommander</artifactId>
+        <version>1.48</version>
+      </dependency>
+
+      <dependency>
+        <groupId>log4j</groupId>
+        <artifactId>log4j</artifactId>
+        <version>${log4j.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client</artifactId>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-avro</artifactId>
+        <version>${parquet.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-hadoop</artifactId>
+        <version>${parquet.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-mapred</artifactId>
+        <version>1.7.7</version>
+      </dependency>
+
+      <!-- we have to stay at <= 16.0, due to issues with HBase client -->
+      <dependency>
+        <groupId>com.google.guava</groupId>
+        <artifactId>guava</artifactId>
+        <version>15.0</version>
+      </dependency>
+
+      <!-- Hadoop Libraries -->
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-auth</artifactId>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-common</artifactId>
+        <version>${hive.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-core</artifactId>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-common</artifactId>
+        <version>2.6.0-cdh5.7.2</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-exec</artifactId>
+        <version>1.1.0-cdh5.7.2</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>commons-logging</groupId>
+        <artifactId>commons-logging</artifactId>
+        <version>1.2</version>
+      </dependency>
+
+
+      <!-- Storage formats -->
+      <!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-hadoop-bundle</artifactId>
+        <version>1.5.0-cdh5.7.2</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-hive-bundle</artifactId>
+        <version>1.5.0</version>
+      </dependency>
+      <dependency>
+        <groupId>com.twitter</groupId>
+        <artifactId>parquet-avro</artifactId>
+        <version>1.5.0-cdh5.7.2</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.parquet</groupId>
+        <artifactId>parquet-hive-bundle</artifactId>
+        <version>1.8.1</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-core_2.11</artifactId>
+        <version>${spark.version}</version>
+        <scope>provided</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-sql_2.11</artifactId>
+        <version>${spark.version}</version>
+        <scope>provided</scope>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hbase</groupId>
+        <artifactId>hbase-client</artifactId>
+        <version>1.0.0</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro</artifactId>
+        <version>1.7.6-cdh5.7.2</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
+      <!-- Metrics -->
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-graphite</artifactId>
+        <version>${metrics.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.dropwizard.metrics</groupId>
+        <artifactId>metrics-core</artifactId>
+        <version>${metrics.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>xerces</groupId>
+        <artifactId>xercesImpl</artifactId>
+        <version>2.9.1</version>
+      </dependency>
+      <dependency>
+        <groupId>xalan</groupId>
+        <artifactId>xalan</artifactId>
+        <version>2.7.1</version>
+      </dependency>
+
+      <dependency>
+        <groupId>commons-dbcp</groupId>
+        <artifactId>commons-dbcp</artifactId>
+        <version>1.4</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpcore</artifactId>
+        <version>4.3.2</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-api</artifactId>
+        <version>1.7.5</version>
+      </dependency>
+      <dependency>
+        <groupId>org.slf4j</groupId>
+        <artifactId>slf4j-log4j12</artifactId>
+        <version>1.7.5</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-configuration2</artifactId>
+        <version>2.1.1</version>
+      </dependency>
+
+      <dependency>
+        <groupId>com.fasterxml.jackson.core</groupId>
+        <artifactId>jackson-annotations</artifactId>
+        <version>2.6.0</version>
+      </dependency>
+      <dependency>
+        <groupId>org.codehaus.jackson</groupId>
+        <artifactId>jackson-mapper-asl</artifactId>
+        <version>1.9.13</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-jdbc</artifactId>
+        <version>${hive.version}-cdh${cdh.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-service</artifactId>
+        <version>${hive.version}-cdh${cdh.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hive</groupId>
+        <artifactId>hive-metastore</artifactId>
+        <version>${hive.version}-cdh${cdh.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-lang3</artifactId>
+        <version>3.4</version>
+      </dependency>
+
+      <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <version>4.12</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <classifier>tests</classifier>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <classifier>tests</classifier>
+        <version>${hadoop.version}-cdh${cdh.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.mockito</groupId>
+        <artifactId>mockito-all</artifactId>
+        <scope>test</scope>
+        <version>1.10.19</version>
+      </dependency>
+      <dependency>
+        <!--Used to test execution in task executor after de-serializing-->
+        <groupId>com.esotericsoftware</groupId>
+        <artifactId>kryo</artifactId>
+        <version>4.0.0</version>
+        <scope>test</scope>
+      </dependency>
     </dependencies>
 
-    <properties>
-        <maven-dependency-plugin.version>2.10</maven-dependency-plugin.version>
-        <maven-jar-plugin.version>2.6</maven-jar-plugin.version>
-        <maven-surefire-plugin.version>2.19.1</maven-surefire-plugin.version>
-        <parquet.version>1.8.1</parquet.version>
-        <junit.version>4.11</junit.version>
-        <mockito.version>1.9.5</mockito.version>
-        <log4j.version>1.2.17</log4j.version>
-        <cdh.version>5.7.2</cdh.version>
-        <hadoop.version>2.6.0</hadoop.version>
-        <hive.version>1.1.0</hive.version>
-        <metrics.version>3.1.1</metrics.version>
-        <spark.version>2.1.0</spark.version>
-        <scala.version>2.11.8</scala.version>
-        <scala.libversion>2.11</scala.libversion>
-    </properties>
+  </dependencyManagement>
 
-    <scm>
-        <connection>scm:git:git@github.com:uber/hoodie.git</connection>
-        <developerConnection>scm:git:git@github.com:uber/hoodie.git</developerConnection>
-        <url>git@github.com:uber/hoodie.git</url>
-        <tag>HEAD</tag>
-    </scm>
+  <repositories>
+    <repository>
+      <id>cloudera-repo-releases</id>
+      <url>https://repository.cloudera.com/artifactory/public/</url>
+    </repository>
+  </repositories>
 
-    <mailingLists>
-        <mailingList>
-            <name>User List</name>
-            <post>hoodie-user@googlegroups.com</post>
-            <archive>https://groups.google.com/d/forum/hoodie-user/</archive>
-        </mailingList>
-        <mailingList>
-            <name>Developer List</name>
-            <post>hoodie-dev@googlegroups.com</post>
-            <archive>https://groups.google.com/d/forum/hoodie-dev/</archive>
-        </mailingList>
-    </mailingLists>
+  <distributionManagement>
+    <snapshotRepository>
+      <id>ossrh</id>
+      <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+    </snapshotRepository>
+    <repository>
+      <id>ossrh</id>
+      <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
+    </repository>
+  </distributionManagement>
 
-    <build>
+  <profiles>
+    <profile>
+      <id>release</id>
+      <activation>
+        <property>
+          <name>deployArtifacts</name>
+          <value>true</value>
+        </property>
+      </activation>
+      <build>
         <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-compiler-plugin</artifactId>
-                <configuration>
-                    <source>1.8</source>
-                    <target>1.8</target>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-release-plugin</artifactId>
-                <version>2.5.3</version>
-                <configuration>
-                    <autoVersionSubmodules>true</autoVersionSubmodules>
-                    <useReleaseProfile>false</useReleaseProfile>
-                    <releaseProfiles>release</releaseProfiles>
-                    <goals>deploy</goals>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-surefire-plugin</artifactId>
-                <version>${maven-surefire-plugin.version}</version>
-                <configuration>
-                    <!-- Sets the VM argument line used when unit tests are run. -->
-                    <argLine>${surefireArgLine}</argLine>
-                    <systemPropertyVariables>
-                        <log4j.configuration>file:${project.build.testOutputDirectory}/log4j-surefire.properties</log4j.configuration>
-                    </systemPropertyVariables>
-                    <!-- Excludes integration tests when unit tests are run. -->
-                    <excludes>
-                        <exclude>**/IT*.java</exclude>
-                    </excludes>
-                </configuration>
-            </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-source-plugin</artifactId>
+            <version>2.2.1</version>
+            <executions>
+              <execution>
+                <id>attach-sources</id>
+                <goals>
+                  <goal>jar-no-fork</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-javadoc-plugin</artifactId>
+            <version>2.9.1</version>
+            <executions>
+              <execution>
+                <id>attach-javadocs</id>
+                <goals>
+                  <goal>jar</goal>
+                </goals>
+              </execution>
+            </executions>
+            <configuration>
+              <additionalparam>-Xdoclint:none</additionalparam>
+            </configuration>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-gpg-plugin</artifactId>
+            <version>1.5</version>
+            <executions>
+              <execution>
+                <id>sign-artifacts</id>
+                <phase>verify</phase>
+                <goals>
+                  <goal>sign</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.sonatype.plugins</groupId>
+            <artifactId>nexus-staging-maven-plugin</artifactId>
+            <version>1.6.2</version>
+            <extensions>true</extensions>
+            <configuration>
+              <serverId>ossrh</serverId>
+              <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+              <autoReleaseAfterClose>true</autoReleaseAfterClose>
+            </configuration>
+          </plugin>
         </plugins>
+      </build>
+    </profile>
+  </profiles>
 
-        <pluginManagement>
-            <plugins>
-                <plugin>
-                    <artifactId>maven-dependency-plugin</artifactId>
-                    <version>${maven-dependency-plugin.version}</version>
-                </plugin>
-                <plugin>
-                    <artifactId>maven-jar-plugin</artifactId>
-                    <version>${maven-jar-plugin.version}</version>
-                </plugin>
-                <plugin>
-                    <groupId>org.jacoco</groupId>
-                    <artifactId>jacoco-maven-plugin</artifactId>
-                    <version>0.7.8</version>
-                    <executions>
-                        <!--
-                            Prepares the property pointing to the JaCoCo runtime agent which
-                            is passed as VM argument when Maven the Surefire plugin is executed.
-                        -->
-                        <execution>
-                            <id>pre-unit-test</id>
-                            <goals>
-                                <goal>prepare-agent</goal>
-                            </goals>
-                            <configuration>
-                                <!-- Sets the path to the file which contains the execution data. -->
-                                <destFile>${project.build.directory}/coverage-reports/jacoco-ut.exec</destFile>
-                                <!--
-                                    Sets the name of the property containing the settings
-                                    for JaCoCo runtime agent.
-                                -->
-                                <propertyName>surefireArgLine</propertyName>
-                            </configuration>
-                        </execution>
-                        <!--
-                            Ensures that the code coverage report for unit tests is created after
-                            unit tests have been run.
-                        -->
-                        <execution>
-                            <id>post-unit-test</id>
-                            <phase>test</phase>
-                            <goals>
-                                <goal>report</goal>
-                            </goals>
-                            <configuration>
-                                <!-- Sets the path to the file which contains the execution data. -->
-                                <dataFile>${project.build.directory}/coverage-reports/jacoco-ut.exec</dataFile>
-                                <!-- Sets the output directory for the code coverage report. -->
-                                <outputDirectory>${project.reporting.outputDirectory}/jacoco-ut</outputDirectory>
-                            </configuration>
-                        </execution>
-                    </executions>
-                </plugin>
-                <!--<plugin>-->
-                    <!--<groupId>org.codehaus.mojo</groupId>-->
-                    <!--<artifactId>cobertura-maven-plugin</artifactId>-->
-                    <!--<version>2.7</version>-->
-                    <!--<configuration>-->
-                        <!--<formats>-->
-                            <!--<format>html</format>-->
-                            <!--<format>xml</format>-->
-                        <!--</formats>-->
-                    <!--</configuration>-->
-                    <!--<executions>-->
-                        <!--<execution>-->
-                            <!--<phase>test</phase>-->
-                            <!--<goals>-->
-                                <!--<goal>cobertura</goal>-->
-                            <!--</goals>-->
-                        <!--</execution>-->
-                    <!--</executions>-->
-                    <!--<dependencies>-->
-                        <!--<dependency>-->
-                            <!--<groupId>org.ow2.asm</groupId>-->
-                            <!--<artifactId>asm</artifactId>-->
-                            <!--<version>5.0.3</version>-->
-                        <!--</dependency>-->
-                    <!--</dependencies>-->
-                <!--</plugin>-->
-                <plugin>
-                    <!-- excludes are inherited -->
-                    <groupId>org.apache.rat</groupId>
-                    <artifactId>apache-rat-plugin</artifactId>
-                    <version>0.11</version>
-                    <configuration>
-                        <excludes>
-                            <exclude>**/.*</exclude>
-                            <exclude>**/*.txt</exclude>
-                            <exclude>**/*.sh</exclude>
-                            <exclude>**/*.log</exclude>
-                            <exclude>**/dependency-reduced-pom.xml</exclude>
-                            <exclude>**/test/resources/*.avro</exclude>
-                            <exclude>**/test/resources/*.data</exclude>
-                            <exclude>**/test/resources/*.schema</exclude>
-                            <exclude>**/test/resources/*.csv</exclude>
-                            <exclude>**/main/avro/*.avsc</exclude>
-                            <exclude>**/target/*</exclude>
-                        </excludes>
-                    </configuration>
-                    <executions>
-                        <execution>
-                            <phase>package</phase>
-                            <goals>
-                                <goal>check</goal>
-                            </goals>
-                        </execution>
-                    </executions>
-                </plugin>
-                <plugin>
-                    <groupId>org.apache.avro</groupId>
-                    <artifactId>avro-maven-plugin</artifactId>
-                    <version>1.7.6</version>
-                    <executions>
-                        <execution>
-                            <phase>generate-sources</phase>
-                            <goals>
-                                <goal>schema</goal>
-                            </goals>
-                            <configuration>
-                                <sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
-                                <outputDirectory>${project.build.directory}/generated-sources/src/main/java/</outputDirectory>
-                                <stringType>String</stringType>
-                            </configuration>
-                        </execution>
-                    </executions>
-                </plugin>
-            </plugins>
-        </pluginManagement>
-    </build>
-
-    <dependencyManagement>
-        <dependencies>
-
-            <dependency>
-                <groupId>com.beust</groupId>
-                <artifactId>jcommander</artifactId>
-                <version>1.48</version>
-            </dependency>
-
-            <dependency>
-                <groupId>log4j</groupId>
-                <artifactId>log4j</artifactId>
-                <version>${log4j.version}</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-client</artifactId>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.parquet</groupId>
-                <artifactId>parquet-avro</artifactId>
-                <version>${parquet.version}</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.parquet</groupId>
-                <artifactId>parquet-hadoop</artifactId>
-                <version>${parquet.version}</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.avro</groupId>
-                <artifactId>avro-mapred</artifactId>
-                <version>1.7.7</version>
-            </dependency>
-
-            <!-- we have to stay at <= 16.0, due to issues with HBase client -->
-            <dependency>
-                <groupId>com.google.guava</groupId>
-                <artifactId>guava</artifactId>
-                <version>15.0</version>
-            </dependency>
-
-            <!-- Hadoop Libraries -->
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-common</artifactId>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-hdfs</artifactId>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-auth</artifactId>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hive</groupId>
-                <artifactId>hive-common</artifactId>
-                <version>${hive.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-mapreduce-client-core</artifactId>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-mapreduce-client-common</artifactId>
-                <version>2.6.0-cdh5.7.2</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hive</groupId>
-                <artifactId>hive-exec</artifactId>
-                <version>1.1.0-cdh5.7.2</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>commons-logging</groupId>
-                <artifactId>commons-logging</artifactId>
-                <version>1.2</version>
-            </dependency>
-
-
-            <!-- Storage formats -->
-            <!-- Spark parquet version 1.7.0 does not play well with the hive 1.1.0 installed in cluster (which requires twitter parquet 1.5.0) -->
-            <dependency>
-                <groupId>com.twitter</groupId>
-                <artifactId>parquet-hadoop-bundle</artifactId>
-                <version>1.5.0-cdh5.7.2</version>
-            </dependency>
-            <dependency>
-                <groupId>com.twitter</groupId>
-                <artifactId>parquet-hive-bundle</artifactId>
-                <version>1.5.0</version>
-            </dependency>
-            <dependency>
-                <groupId>com.twitter</groupId>
-                <artifactId>parquet-avro</artifactId>
-                <version>1.5.0-cdh5.7.2</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.parquet</groupId>
-                <artifactId>parquet-hive-bundle</artifactId>
-                <version>1.8.1</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.spark</groupId>
-                <artifactId>spark-core_2.11</artifactId>
-                <version>${spark.version}</version>
-                <scope>provided</scope>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.spark</groupId>
-                <artifactId>spark-sql_2.11</artifactId>
-                <version>${spark.version}</version>
-                <scope>provided</scope>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.hbase</groupId>
-                <artifactId>hbase-client</artifactId>
-                <version>1.0.0</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.avro</groupId>
-                <artifactId>avro</artifactId>
-                <version>1.7.6-cdh5.7.2</version>
-                <exclusions>
-                    <exclusion>
-                        <groupId>org.slf4j</groupId>
-                        <artifactId>slf4j-api</artifactId>
-                    </exclusion>
-                </exclusions>
-            </dependency>
-
-            <!-- Metrics -->
-            <dependency>
-                <groupId>io.dropwizard.metrics</groupId>
-                <artifactId>metrics-graphite</artifactId>
-                <version>${metrics.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>io.dropwizard.metrics</groupId>
-                <artifactId>metrics-core</artifactId>
-                <version>${metrics.version}</version>
-            </dependency>
-
-            <dependency>
-                <groupId>xerces</groupId>
-                <artifactId>xercesImpl</artifactId>
-                <version>2.9.1</version>
-            </dependency>
-            <dependency>
-                <groupId>xalan</groupId>
-                <artifactId>xalan</artifactId>
-                <version>2.7.1</version>
-            </dependency>
-
-            <dependency>
-                <groupId>commons-dbcp</groupId>
-                <artifactId>commons-dbcp</artifactId>
-                <version>1.4</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.httpcomponents</groupId>
-                <artifactId>httpcore</artifactId>
-                <version>4.3.2</version>
-            </dependency>
-            <dependency>
-                <groupId>org.slf4j</groupId>
-                <artifactId>slf4j-api</artifactId>
-                <version>1.7.5</version>
-            </dependency>
-            <dependency>
-                <groupId>org.slf4j</groupId>
-                <artifactId>slf4j-log4j12</artifactId>
-                <version>1.7.5</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.commons</groupId>
-                <artifactId>commons-configuration2</artifactId>
-                <version>2.1.1</version>
-            </dependency>
-
-            <dependency>
-                <groupId>com.fasterxml.jackson.core</groupId>
-                <artifactId>jackson-annotations</artifactId>
-                <version>2.6.0</version>
-            </dependency>
-            <dependency>
-                <groupId>org.codehaus.jackson</groupId>
-                <artifactId>jackson-mapper-asl</artifactId>
-                <version>1.9.13</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.hive</groupId>
-                <artifactId>hive-jdbc</artifactId>
-                <version>${hive.version}-cdh${cdh.version}</version>
-            </dependency>
-
-            <dependency>
-                <groupId>org.apache.hive</groupId>
-                <artifactId>hive-service</artifactId>
-                <version>${hive.version}-cdh${cdh.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hive</groupId>
-                <artifactId>hive-metastore</artifactId>
-                <version>${hive.version}-cdh${cdh.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.commons</groupId>
-                <artifactId>commons-lang3</artifactId>
-                <version>3.4</version>
-            </dependency>
-
-            <dependency>
-                <groupId>junit</groupId>
-                <artifactId>junit</artifactId>
-                <version>4.12</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-hdfs</artifactId>
-                <classifier>tests</classifier>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.apache.hadoop</groupId>
-                <artifactId>hadoop-common</artifactId>
-                <classifier>tests</classifier>
-                <version>${hadoop.version}-cdh${cdh.version}</version>
-            </dependency>
-            <dependency>
-                <groupId>org.mockito</groupId>
-                <artifactId>mockito-all</artifactId>
-                <scope>test</scope>
-                <version>1.10.19</version>
-            </dependency>
-            <dependency>
-                <!--Used to test execution in task executor after de-serializing-->
-                <groupId>com.esotericsoftware</groupId>
-                <artifactId>kryo</artifactId>
-                <version>4.0.0</version>
-                <scope>test</scope>
-            </dependency>
-        </dependencies>
-
-    </dependencyManagement>
-
-    <repositories>
-        <repository>
-            <id>cloudera-repo-releases</id>
-            <url>https://repository.cloudera.com/artifactory/public/</url>
-        </repository>
-    </repositories>
-
-    <distributionManagement>
-        <snapshotRepository>
-            <id>ossrh</id>
-            <url>https://oss.sonatype.org/content/repositories/snapshots</url>
-        </snapshotRepository>
-        <repository>
-            <id>ossrh</id>
-            <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
-        </repository>
-    </distributionManagement>
-
-    <profiles>
-        <profile>
-            <id>release</id>
-            <activation>
-                <property>
-                    <name>deployArtifacts</name>
-                    <value>true</value>
-                </property>
-            </activation>
-            <build>
-                <plugins>
-                    <plugin>
-                        <groupId>org.apache.maven.plugins</groupId>
-                        <artifactId>maven-source-plugin</artifactId>
-                        <version>2.2.1</version>
-                        <executions>
-                            <execution>
-                                <id>attach-sources</id>
-                                <goals>
-                                    <goal>jar-no-fork</goal>
-                                </goals>
-                            </execution>
-                        </executions>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.apache.maven.plugins</groupId>
-                        <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>2.9.1</version>
-                        <executions>
-                            <execution>
-                                <id>attach-javadocs</id>
-                                <goals>
-                                    <goal>jar</goal>
-                                </goals>
-                            </execution>
-                        </executions>
-                        <configuration>
-                            <additionalparam>-Xdoclint:none</additionalparam>
-                        </configuration>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.apache.maven.plugins</groupId>
-                        <artifactId>maven-gpg-plugin</artifactId>
-                        <version>1.5</version>
-                        <executions>
-                            <execution>
-                                <id>sign-artifacts</id>
-                                <phase>verify</phase>
-                                <goals>
-                                    <goal>sign</goal>
-                                </goals>
-                            </execution>
-                        </executions>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.sonatype.plugins</groupId>
-                        <artifactId>nexus-staging-maven-plugin</artifactId>
-                        <version>1.6.2</version>
-                        <extensions>true</extensions>
-                        <configuration>
-                            <serverId>ossrh</serverId>
-                            <nexusUrl>https://oss.sonatype.org/</nexusUrl>
-                            <autoReleaseAfterClose>true</autoReleaseAfterClose>
-                        </configuration>
-                    </plugin>
-                </plugins>
-            </build>
-        </profile>
-    </profiles>
-
-    <issueManagement>
-        <system>GitHub</system>
-        <url>https://github.com/uber/hoodie/issues</url>
-    </issueManagement>
+  <issueManagement>
+    <system>GitHub</system>
+    <url>https://github.com/uber/hoodie/issues</url>
+  </issueManagement>
 </project>