Reformatting code per Google Code Style all over

2017-11-12 22:54:56 -08:00
parent 5a62480a92
commit e45679f5e2
254 changed files with 21580 additions and 21108 deletions
--- a/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/HoodieReadClient.java
@@ -17,25 +17,19 @@
 package com.uber.hoodie;

 import com.google.common.base.Optional;
-
-import com.uber.hoodie.common.model.HoodieCommitMetadata;
-import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.common.table.TableFileSystemView;
-import com.uber.hoodie.common.table.timeline.HoodieInstant;
-import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.index.bloom.HoodieBloomIndex;
-
 import com.uber.hoodie.table.HoodieTable;
-
+import java.io.Serializable;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
@@ -46,136 +40,126 @@ import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.types.StructType;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.stream.Collectors;
-
 import scala.Tuple2;

 /**
 * Provides an RDD based API for accessing/filtering Hoodie tables, based on keys.
- *
 */
 public class HoodieReadClient implements Serializable {

-    private static Logger logger = LogManager.getLogger(HoodieReadClient.class);
+  private static Logger logger = LogManager.getLogger(HoodieReadClient.class);

-    private transient final JavaSparkContext jsc;
+  private transient final JavaSparkContext jsc;

-    private transient final FileSystem fs;
-    /**
-     * TODO: We need to persist the index type into hoodie.properties and be able to access the
-     * index just with a simple basepath pointing to the dataset. Until, then just always assume a
-     * BloomIndex
-     */
-    private transient final HoodieBloomIndex index;
-    private final HoodieTimeline commitTimeline;
-    private HoodieTable hoodieTable;
-    private transient Optional<SQLContext> sqlContextOpt;
+  private transient final FileSystem fs;
+  /**
+   * TODO: We need to persist the index type into hoodie.properties and be able to access the index
+   * just with a simple basepath pointing to the dataset. Until, then just always assume a
+   * BloomIndex
+   */
+  private transient final HoodieBloomIndex index;
+  private final HoodieTimeline commitTimeline;
+  private HoodieTable hoodieTable;
+  private transient Optional<SQLContext> sqlContextOpt;

-    /**
-     * @param basePath path to Hoodie dataset
-     */
-    public HoodieReadClient(JavaSparkContext jsc, String basePath) {
-        this.jsc = jsc;
-        this.fs = FSUtils.getFs();
-        // Create a Hoodie table which encapsulated the commits and files visible
-        this.hoodieTable = HoodieTable
-                .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
-        this.commitTimeline = hoodieTable.getCompletedCompactionCommitTimeline();
-        this.index =
-                new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
-        this.sqlContextOpt = Optional.absent();
+  /**
+   * @param basePath path to Hoodie dataset
+   */
+  public HoodieReadClient(JavaSparkContext jsc, String basePath) {
+    this.jsc = jsc;
+    this.fs = FSUtils.getFs();
+    // Create a Hoodie table which encapsulated the commits and files visible
+    this.hoodieTable = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, basePath, true), null);
+    this.commitTimeline = hoodieTable.getCompletedCompactionCommitTimeline();
+    this.index =
+        new HoodieBloomIndex(HoodieWriteConfig.newBuilder().withPath(basePath).build(), jsc);
+    this.sqlContextOpt = Optional.absent();
+  }
+
+  /**
+   *
+   * @param jsc
+   * @param basePath
+   * @param sqlContext
+   */
+  public HoodieReadClient(JavaSparkContext jsc, String basePath, SQLContext sqlContext) {
+    this(jsc, basePath);
+    this.sqlContextOpt = Optional.of(sqlContext);
+  }
+
+  /**
+   * Adds support for accessing Hoodie built tables from SparkSQL, as you normally would.
+   *
+   * @return SparkConf object to be used to construct the SparkContext by caller
+   */
+  public static SparkConf addHoodieSupport(SparkConf conf) {
+    conf.set("spark.sql.hive.convertMetastoreParquet", "false");
+    return conf;
+  }
+
+  private void assertSqlContext() {
+    if (!sqlContextOpt.isPresent()) {
+      throw new IllegalStateException(
+          "SQLContext must be set, when performing dataframe operations");
    }
+  }

-    /**
-     *
-     * @param jsc
-     * @param basePath
-     * @param sqlContext
-     */
-    public HoodieReadClient(JavaSparkContext jsc, String basePath, SQLContext sqlContext) {
-        this(jsc, basePath);
-        this.sqlContextOpt = Optional.of(sqlContext);
-    }
+  /**
+   * Given a bunch of hoodie keys, fetches all the individual records out as a data frame
+   *
+   * @return a dataframe
+   */
+  public Dataset<Row> read(JavaRDD<HoodieKey> hoodieKeys, int parallelism)
+      throws Exception {

-    /**
-     * Adds support for accessing Hoodie built tables from SparkSQL, as you normally would.
-     *
-     * @return SparkConf object to be used to construct the SparkContext by caller
-     */
-    public static SparkConf addHoodieSupport(SparkConf conf) {
-        conf.set("spark.sql.hive.convertMetastoreParquet", "false");
-        return conf;
-    }
+    assertSqlContext();
+    JavaPairRDD<HoodieKey, Optional<String>> keyToFileRDD =
+        index.fetchRecordLocation(hoodieKeys, hoodieTable);
+    List<String> paths = keyToFileRDD
+        .filter(keyFileTuple -> keyFileTuple._2().isPresent())
+        .map(keyFileTuple -> keyFileTuple._2().get())
+        .collect();

-    private void assertSqlContext() {
-        if (!sqlContextOpt.isPresent()) {
-            throw new IllegalStateException("SQLContext must be set, when performing dataframe operations");
-        }
-    }
+    // record locations might be same for multiple keys, so need a unique list
+    Set<String> uniquePaths = new HashSet<>(paths);
+    Dataset<Row> originalDF = sqlContextOpt.get().read()
+        .parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
+    StructType schema = originalDF.schema();
+    JavaPairRDD<HoodieKey, Row> keyRowRDD = originalDF.javaRDD()
+        .mapToPair(row -> {
+          HoodieKey key = new HoodieKey(
+              row.<String>getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD),
+              row.<String>getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD));
+          return new Tuple2<>(key, row);
+        });

-    /**
-     * Given a bunch of hoodie keys, fetches all the individual records out as a data frame
-     *
-     * @return a dataframe
-     */
-    public Dataset<Row> read(JavaRDD<HoodieKey> hoodieKeys, int parallelism)
-            throws Exception {
+    // Now, we need to further filter out, for only rows that match the supplied hoodie keys
+    JavaRDD<Row> rowRDD = keyRowRDD.join(keyToFileRDD, parallelism)
+        .map(tuple -> tuple._2()._1());

-        assertSqlContext();
-        JavaPairRDD<HoodieKey, Optional<String>> keyToFileRDD =
-                index.fetchRecordLocation(hoodieKeys, hoodieTable);
-        List<String> paths = keyToFileRDD
-                .filter(keyFileTuple -> keyFileTuple._2().isPresent())
-                .map(keyFileTuple -> keyFileTuple._2().get())
-                .collect();
+    return sqlContextOpt.get().createDataFrame(rowRDD, schema);
+  }

-        // record locations might be same for multiple keys, so need a unique list
-        Set<String> uniquePaths = new HashSet<>(paths);
-        Dataset<Row> originalDF = sqlContextOpt.get().read()
-                .parquet(uniquePaths.toArray(new String[uniquePaths.size()]));
-        StructType schema = originalDF.schema();
-        JavaPairRDD<HoodieKey, Row> keyRowRDD = originalDF.javaRDD()
-                .mapToPair(row -> {
-                    HoodieKey key = new HoodieKey(
-                            row.<String>getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD),
-                            row.<String>getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD));
-                    return new Tuple2<>(key, row);
-                });
+  /**
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
+   * If the optional FullFilePath value is not present, then the key is not found. If the
+   * FullFilePath value is present, it is the path component (without scheme) of the URI underlying
+   * file
+   */
+  public JavaPairRDD<HoodieKey, Optional<String>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
+    return index.fetchRecordLocation(hoodieKeys, hoodieTable);
+  }

-        // Now, we need to further filter out, for only rows that match the supplied hoodie keys
-        JavaRDD<Row> rowRDD = keyRowRDD.join(keyToFileRDD, parallelism)
-                .map(tuple -> tuple._2()._1());
-
-        return sqlContextOpt.get().createDataFrame(rowRDD, schema);
-    }
-
-    /**
-     * Checks if the given [Keys] exists in the hoodie table and returns [Key,
-     * Optional[FullFilePath]] If the optional FullFilePath value is not present, then the key is
-     * not found. If the FullFilePath value is present, it is the path component (without scheme) of
-     * the URI underlying file
-     */
-    public JavaPairRDD<HoodieKey, Optional<String>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
-        return index.fetchRecordLocation(hoodieKeys, hoodieTable);
-    }
-
-    /**
-     * Filter out HoodieRecords that already exists in the output folder. This is useful in
-     * deduplication.
-     *
-     * @param hoodieRecords Input RDD of Hoodie records.
-     * @return A subset of hoodieRecords RDD, with existing records filtered out.
-     */
-    public JavaRDD<HoodieRecord> filterExists(JavaRDD<HoodieRecord> hoodieRecords) {
-        JavaRDD<HoodieRecord> recordsWithLocation = index.tagLocation(hoodieRecords, hoodieTable);
-        return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
-    }
+  /**
+   * Filter out HoodieRecords that already exists in the output folder. This is useful in
+   * deduplication.
+   *
+   * @param hoodieRecords Input RDD of Hoodie records.
+   * @return A subset of hoodieRecords RDD, with existing records filtered out.
+   */
+  public JavaRDD<HoodieRecord> filterExists(JavaRDD<HoodieRecord> hoodieRecords) {
+    JavaRDD<HoodieRecord> recordsWithLocation = index.tagLocation(hoodieRecords, hoodieTable);
+    return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/HoodieWriteClient.java
@@ -50,10 +50,21 @@ import com.uber.hoodie.func.BulkInsertMapFunction;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.io.HoodieCommitArchiveLog;
 import com.uber.hoodie.metrics.HoodieMetrics;
-import com.uber.hoodie.table.UserDefinedBulkInsertPartitioner;
 import com.uber.hoodie.table.HoodieTable;
+import com.uber.hoodie.table.UserDefinedBulkInsertPartitioner;
 import com.uber.hoodie.table.WorkloadProfile;
 import com.uber.hoodie.table.WorkloadStat;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -66,25 +77,12 @@ import org.apache.spark.storage.StorageLevel;
 import scala.Option;
 import scala.Tuple2;

-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-import java.text.ParseException;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-import java.util.stream.Collectors;
-
 /**
- * Hoodie Write Client helps you build datasets on HDFS [insert()] and then
- * perform efficient mutations on a HDFS dataset [upsert()]
- *
- * Note that, at any given time, there can only be one Spark job performing
- * these operatons on a Hoodie dataset.
+ * Hoodie Write Client helps you build datasets on HDFS [insert()] and then perform efficient
+ * mutations on a HDFS dataset [upsert()]
 *
+ * Note that, at any given time, there can only be one Spark job performing these operatons on a
+ * Hoodie dataset.
 */
 public class HoodieWriteClient<T extends HoodieRecordPayload> implements Serializable {

@@ -102,7 +100,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     * @param clientConfig
     * @throws Exception
     */
-    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig) throws Exception {
+    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig)
+        throws Exception {
        this(jsc, clientConfig, false);
    }

@@ -111,7 +110,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     * @param clientConfig
     * @param rollbackInFlight
     */
-    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight) {
+    public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig,
+        boolean rollbackInFlight) {
        this.fs = FSUtils.getFs();
        this.jsc = jsc;
        this.config = clientConfig;
@@ -121,7 +121,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali

        if (rollbackInFlight) {
            rollbackInflightCommits();
-        }
+    }
    }

    /**
@@ -163,17 +163,17 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                throw (HoodieUpsertException) e;
            }
            throw new HoodieUpsertException("Failed to upsert for commit time " + commitTime, e);
-        }
+    }
    }

    /**
     * Inserts the given HoodieRecords, into the table. This API is intended to be used for normal
     * writes.
     *
-     * This implementation skips the index check and is able to leverage benefits such as
-     * small file handling/blocking alignment, as with upsert(), by profiling the workload
+     * This implementation skips the index check and is able to leverage benefits such as small file
+     * handling/blocking alignment, as with upsert(), by profiling the workload
     *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
     * @param commitTime Commit Time handle
     * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
     */
@@ -194,7 +194,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                throw e;
            }
            throw new HoodieInsertException("Failed to insert for commit time " + commitTime, e);
-        }
+    }
    }

    /**
@@ -206,11 +206,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     * attempts to control the numbers of files with less memory compared to the {@link
     * HoodieWriteClient#insert(JavaRDD, String)}
     *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
     * @param commitTime Commit Time handle
     * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
     */
-    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime) {
+    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records,
+        final String commitTime) {
        return bulkInsert(records, commitTime, Option.empty());
    }

@@ -221,16 +222,18 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     *
     * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and
     * attempts to control the numbers of files with less memory compared to the {@link
-     * HoodieWriteClient#insert(JavaRDD, String)}. Optionally it allows users to specify their own partitioner. If
-     * specified then it will be used for repartitioning records. See {@link UserDefinedBulkInsertPartitioner}.
+     * HoodieWriteClient#insert(JavaRDD, String)}. Optionally it allows users to specify their own
+     * partitioner. If specified then it will be used for repartitioning records. See {@link
+     * UserDefinedBulkInsertPartitioner}.
     *
-     * @param records    HoodieRecords to insert
+     * @param records HoodieRecords to insert
     * @param commitTime Commit Time handle
-     * @param bulkInsertPartitioner If specified then it will be used to partition input records before they are
-     * inserted into hoodie.
+     * @param bulkInsertPartitioner If specified then it will be used to partition input records
+     * before they are inserted into hoodie.
     * @return JavaRDD[WriteStatus] - RDD of WriteStatus to inspect errors and counts
     */
-    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records, final String commitTime,
+    public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> records,
+        final String commitTime,
        Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
        writeContext = metrics.getCommitCtx();
        // Create a Hoodie table which encapsulated the commits and files visible
@@ -240,7 +243,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
        try {
            // De-dupe/merge if needed
            JavaRDD<HoodieRecord<T>> dedupedRecords =
-                combineOnCondition(config.shouldCombineBeforeInsert(), records, config.getInsertShuffleParallelism());
+                combineOnCondition(config.shouldCombineBeforeInsert(), records,
+                    config.getInsertShuffleParallelism());

            final JavaRDD<HoodieRecord<T>> repartitionedRecords;
            if (bulkInsertPartitioner.isDefined()) {
@@ -259,20 +263,22 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                    }, true, config.getBulkInsertShuffleParallelism());
            }
            JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
-                    .mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table), true)
-                    .flatMap(writeStatuses -> writeStatuses.iterator());
+                .mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table),
+                    true)
+                .flatMap(writeStatuses -> writeStatuses.iterator());

            return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
        } catch (Throwable e) {
            if (e instanceof HoodieInsertException) {
                throw e;
            }
-            throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime, e);
-        }
+            throw new HoodieInsertException("Failed to bulk insert for commit time " + commitTime,
+                e);
+    }
    }

    private void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD) {
-        if(config.shouldAutoCommit()) {
+        if (config.shouldAutoCommit()) {
            logger.info("Auto commit enabled: Committing " + commitTime);
            boolean commitResult = commit(commitTime, resultRDD);
            if (!commitResult) {
@@ -280,30 +286,28 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            }
        } else {
            logger.info("Auto commit disabled for " + commitTime);
-        }
+    }
    }

    private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition,
-                                                        JavaRDD<HoodieRecord<T>> records,
-                                                        int parallelism) {
-        if(condition) {
+        JavaRDD<HoodieRecord<T>> records,
+        int parallelism) {
+        if (condition) {
            return deduplicateRecords(records, parallelism);
        }
        return records;
    }

    /**
-     *
-     * Save the workload profile in an intermediate file (here re-using commit files)
-     * This is useful when performing rollback for MOR datasets. Only updates are recorded
-     * in the workload profile metadata since updates to log blocks are unknown across batches
-     * Inserts (which are new parquet files) are rolled back based on commit time.
-     * // TODO : Create a new WorkloadProfile metadata file instead of using HoodieCommitMetadata
-     * @param profile
-     * @param commitTime
-     * @throws HoodieCommitException
+     * Save the workload profile in an intermediate file (here re-using commit files) This is useful
+     * when performing rollback for MOR datasets. Only updates are recorded in the workload profile
+     * metadata since updates to log blocks are unknown across batches Inserts (which are new parquet
+     * files) are rolled back based on commit time. // TODO : Create a new WorkloadProfile metadata
+     * file instead of using HoodieCommitMetadata
     */
-    private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, HoodieTable<T> table, String commitTime) throws HoodieCommitException {
+    private void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile,
+        HoodieTable<T> table,
+        String commitTime) throws HoodieCommitException {
        try {
            HoodieCommitMetadata metadata = new HoodieCommitMetadata();
            profile.getPartitionPaths().stream().forEach(path -> {
@@ -319,16 +323,17 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
            Optional<HoodieInstant> instant = activeTimeline.filterInflights().lastInstant();
            activeTimeline.saveToInflight(instant.get(),
-                    Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
-        } catch(IOException io) {
-            throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io);
-        }
+                Optional.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+        } catch (IOException io) {
+            throw new HoodieCommitException(
+                "Failed to commit " + commitTime + " unable to save inflight metadata ", io);
+    }
    }

    private JavaRDD<WriteStatus> upsertRecordsInternal(JavaRDD<HoodieRecord<T>> preppedRecords,
-                                                       String commitTime,
-                                                       HoodieTable<T> hoodieTable,
-                                                       final boolean isUpsert) {
+        String commitTime,
+        HoodieTable<T> hoodieTable,
+        final boolean isUpsert) {

        // Cache the tagged records, so we don't end up computing both
        preppedRecords.persist(StorageLevel.MEMORY_AND_DISK_SER());
@@ -344,29 +349,31 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
        final Partitioner partitioner = getPartitioner(hoodieTable, isUpsert, profile);
        JavaRDD<HoodieRecord<T>> partitionedRecords = partition(preppedRecords, partitioner);
        JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords
-                .mapPartitionsWithIndex((partition, recordItr) -> {
-                    if (isUpsert) {
-                        return hoodieTable
-                            .handleUpsertPartition(commitTime, partition, recordItr, partitioner);
-                    } else {
-                        return hoodieTable
-                            .handleInsertPartition(commitTime, partition, recordItr, partitioner);
-                    }
-                }, true)
-                .flatMap(writeStatuses -> writeStatuses.iterator());
+            .mapPartitionsWithIndex((partition, recordItr) -> {
+                if (isUpsert) {
+                    return hoodieTable
+                        .handleUpsertPartition(commitTime, partition, recordItr, partitioner);
+                } else {
+                    return hoodieTable
+                        .handleInsertPartition(commitTime, partition, recordItr, partitioner);
+                }
+            }, true)
+            .flatMap(writeStatuses -> writeStatuses.iterator());

        return updateIndexAndCommitIfNeeded(writeStatusRDD, hoodieTable, commitTime);
    }

-    private Partitioner getPartitioner(HoodieTable table, boolean isUpsert, WorkloadProfile profile) {
+    private Partitioner getPartitioner(HoodieTable table, boolean isUpsert,
+        WorkloadProfile profile) {
        if (isUpsert) {
            return table.getUpsertPartitioner(profile);
        } else {
            return table.getInsertPartitioner(profile);
-        }
+    }
    }

-    private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table, String commitTime) {
+    private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
+        HoodieTable<T> table, String commitTime) {
        // Update the index back
        JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, table);
        // Trigger the insert and collect statuses
@@ -375,12 +382,15 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
        return statuses;
    }

-    private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords, Partitioner partitioner) {
+    private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords,
+        Partitioner partitioner) {
        return dedupedRecords
-                .mapToPair(record ->
-                        new Tuple2<>(new Tuple2<>(record.getKey(), Option.apply(record.getCurrentLocation())), record))
-                .partitionBy(partitioner)
-                .map(tuple -> tuple._2());
+            .mapToPair(record ->
+                new Tuple2<>(
+                    new Tuple2<>(record.getKey(), Option.apply(record.getCurrentLocation())),
+                    record))
+            .partitionBy(partitioner)
+            .map(tuple -> tuple._2());
    }

    /**
@@ -394,8 +404,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
     * Commit changes performed at the given commitTime marker
     */
    public boolean commit(String commitTime,
-                          JavaRDD<WriteStatus> writeStatuses,
-                          Optional<HashMap<String, String>> extraMetadata) {
+        JavaRDD<WriteStatus> writeStatuses,
+        Optional<HashMap<String, String>> extraMetadata) {

        logger.info("Commiting " + commitTime);
        // Create a Hoodie table which encapsulated the commits and files visible
@@ -405,9 +415,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
        HoodieActiveTimeline activeTimeline = table.getActiveTimeline();

        List<Tuple2<String, HoodieWriteStat>> stats = writeStatuses
-                    .mapToPair((PairFunction<WriteStatus, String, HoodieWriteStat>) writeStatus ->
-                            new Tuple2<>(writeStatus.getPartitionPath(), writeStatus.getStat()))
-                    .collect();
+            .mapToPair((PairFunction<WriteStatus, String, HoodieWriteStat>) writeStatus ->
+                new Tuple2<>(writeStatus.getPartitionPath(), writeStatus.getStat()))
+            .collect();

        HoodieCommitMetadata metadata = new HoodieCommitMetadata();
        for (Tuple2<String, HoodieWriteStat> stat : stats) {
@@ -438,7 +448,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali

            // We cannot have unbounded commit files. Archive commits if we have to archive
            archiveLog.archiveIfRequired();
-            if(config.isAutoClean()) {
+            if (config.isAutoClean()) {
                // Call clean to cleanup if there is anything to cleanup after the commit,
                logger.info("Auto cleaning is enabled. Running cleaner now");
                clean(commitTime);
@@ -465,12 +475,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime
-     * will be referenced in the savepoint and will never be cleaned. The savepointed commit
-     * will never be rolledback or archived.
+     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime will
+     * be referenced in the savepoint and will never be cleaned. The savepointed commit will never be
+     * rolledback or archived.
     *
-     * This gives an option to rollback the state to the savepoint anytime.
-     * Savepoint needs to be manually created and deleted.
+     * This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be
+     * manually created and deleted.
     *
     * Savepoint should be on a commit that could not have been cleaned.
     *
@@ -491,12 +501,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime
-     * will be referenced in the savepoint and will never be cleaned. The savepointed commit
-     * will never be rolledback or archived.
+     * Savepoint a specific commit. Latest version of data files as of the passed in commitTime will
+     * be referenced in the savepoint and will never be cleaned. The savepointed commit will never be
+     * rolledback or archived.
     *
-     * This gives an option to rollback the state to the savepoint anytime.
-     * Savepoint needs to be manually created and deleted.
+     * This gives an option to rollback the state to the savepoint anytime. Savepoint needs to be
+     * manually created and deleted.
     *
     * Savepoint should be on a commit that could not have been cleaned.
     *
@@ -510,9 +520,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            .getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
        Optional<HoodieInstant> cleanInstant = table.getCompletedCleanTimeline().lastInstant();

-        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime);
-        if(!table.getCompletedCommitTimeline().containsInstant(commitInstant)) {
-            throw new HoodieSavepointException("Could not savepoint non-existing commit " + commitInstant);
+        HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION,
+            commitTime);
+        if (!table.getCompletedCommitTimeline().containsInstant(commitInstant)) {
+            throw new HoodieSavepointException(
+                "Could not savepoint non-existing commit " + commitInstant);
        }

        try {
@@ -534,7 +546,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
                    + lastCommitRetained);

            Map<String, List<String>> latestFilesMap = jsc.parallelize(
-                FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(), config.shouldAssumeDatePartitioning()))
+                FSUtils.getAllPartitionPaths(fs, table.getMetaClient().getBasePath(),
+                    config.shouldAssumeDatePartitioning()))
                .mapToPair((PairFunction<String, String, List<String>>) partitionPath -> {
                    // Scan all partitions files with this commit time
                    logger.info("Collecting latest files in partition path " + partitionPath);
@@ -555,12 +568,12 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            return true;
        } catch (IOException e) {
            throw new HoodieSavepointException("Failed to savepoint " + commitTime, e);
-        }
+    }
    }

    /**
-     * Delete a savepoint that was created. Once the savepoint is deleted, the commit can be rolledback
-     * and cleaner may clean up data files.
+     * Delete a savepoint that was created. Once the savepoint is deleted, the commit can be
+     * rolledback and cleaner may clean up data files.
     *
     * @param savepointTime - delete the savepoint
     * @return true if the savepoint was deleted successfully
@@ -586,9 +599,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Rollback the state to the savepoint.
-     * WARNING: This rollsback recent commits and deleted data files. Queries accessing the files
-     * will mostly fail. This should be done during a downtime.
+     * Rollback the state to the savepoint. WARNING: This rollsback recent commits and deleted data
+     * files. Queries accessing the files will mostly fail. This should be done during a downtime.
     *
     * @param savepointTime - savepoint time to rollback to
     * @return true if the savepoint was rollecback to successfully
@@ -616,7 +628,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali

        // Make sure the rollback was successful
        Optional<HoodieInstant> lastInstant =
-            activeTimeline.reload().getCommitsAndCompactionsTimeline().filterCompletedInstants().lastInstant();
+            activeTimeline.reload().getCommitsAndCompactionsTimeline().filterCompletedInstants()
+                .lastInstant();
        Preconditions.checkArgument(lastInstant.isPresent());
        Preconditions.checkArgument(lastInstant.get().getTimestamp().equals(savepointTime),
            savepointTime + "is not the last commit after rolling back " + commitsToRollback
@@ -625,12 +638,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Rollback the (inflight/committed) record changes with the given commit time.
-     * Three steps:
-     * (1) Atomically unpublish this commit
-     * (2) clean indexing data,
-     * (3) clean new generated parquet files.
-     * (4) Finally delete .commit or .inflight file,
+     * Rollback the (inflight/committed) record changes with the given commit time. Three steps: (1)
+     * Atomically unpublish this commit (2) clean indexing data, (3) clean new generated parquet
+     * files. (4) Finally delete .commit or .inflight file,
     */
    public boolean rollback(final String commitTime) throws HoodieRollbackException {
        rollback(Lists.newArrayList(commitTime));
@@ -638,7 +648,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    private void rollback(List<String> commits) {
-        if(commits.isEmpty()) {
+        if (commits.isEmpty()) {
            logger.info("List of commits to rollback is empty");
            return;
        }
@@ -702,7 +712,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            Optional<Long> durationInMs = Optional.empty();
            if (context != null) {
                durationInMs = Optional.of(metrics.getDurationInMs(context.stop()));
-                Long numFilesDeleted = stats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
+                Long numFilesDeleted = stats.stream()
+                    .mapToLong(stat -> stat.getSuccessDeleteFiles().size())
+                    .sum();
                metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
            }
            HoodieRollbackMetadata rollbackMetadata =
@@ -722,7 +734,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
        } catch (IOException e) {
            throw new HoodieRollbackException("Failed to rollback " +
                config.getBasePath() + " commits " + commits, e);
-        }
+    }
    }

    /**
@@ -733,9 +745,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Clean up any stale/old files/data lying around (either on file storage or index storage)
-     * based on the configurations and CleaningPolicy used. (typically files that no longer can be used
-     * by a running query can be cleaned)
+     * Clean up any stale/old files/data lying around (either on file storage or index storage) based
+     * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a
+     * running query can be cleaned)
     */
    public void clean() throws HoodieIOException {
        String startCleanTime = HoodieActiveTimeline.createNewCommitTime();
@@ -743,11 +755,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    /**
-     * Clean up any stale/old files/data lying around (either on file storage or index storage)
-     * based on the configurations and CleaningPolicy used. (typically files that no longer can be used
-     * by a running query can be cleaned)
+     * Clean up any stale/old files/data lying around (either on file storage or index storage) based
+     * on the configurations and CleaningPolicy used. (typically files that no longer can be used by a
+     * running query can be cleaned)
     */
-    private void clean(String startCleanTime) throws HoodieIOException  {
+    private void clean(String startCleanTime) throws HoodieIOException {
        try {
            logger.info("Cleaner started");
            final Timer.Context context = metrics.getCleanCtx();
@@ -788,7 +800,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
            }
        } catch (IOException e) {
            throw new HoodieIOException("Failed to clean up after commit", e);
-        }
+    }
    }

    /**
@@ -811,30 +823,30 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
    }

    public static SparkConf registerClasses(SparkConf conf) {
-        conf.registerKryoClasses(new Class[]{HoodieWriteConfig.class, HoodieRecord.class, HoodieKey.class});
+        conf.registerKryoClasses(
+            new Class[]{HoodieWriteConfig.class, HoodieRecord.class, HoodieKey.class});
        return conf;
    }

    /**
     * Deduplicate Hoodie records, using the given deduplication funciton.
     */
-    private JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) {
+    private JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records,
+        int parallelism) {
        return records
-                .mapToPair(record -> new Tuple2<>(record.getKey(), record))
-                .reduceByKey((rec1, rec2) -> {
-                    @SuppressWarnings("unchecked")
-                    T reducedData = (T) rec1.getData().preCombine(rec2.getData());
-                    // we cannot allow the user to change the key or partitionPath, since that will affect everything
-                    // so pick it from one of the records.
-                    return new HoodieRecord<T>(rec1.getKey(), reducedData);
-                }, parallelism)
-                .map(recordTuple -> recordTuple._2());
+            .mapToPair(record -> new Tuple2<>(record.getKey(), record))
+            .reduceByKey((rec1, rec2) -> {
+                @SuppressWarnings("unchecked")
+                T reducedData = (T) rec1.getData().preCombine(rec2.getData());
+                // we cannot allow the user to change the key or partitionPath, since that will affect everything
+                // so pick it from one of the records.
+                return new HoodieRecord<T>(rec1.getKey(), reducedData);
+            }, parallelism)
+            .map(recordTuple -> recordTuple._2());
    }

    /**
     * Cleanup all inflight commits
-     *
-     * @throws IOException
     */
    private void rollbackInflightCommits() {
        HoodieTable<T> table = HoodieTable
--- a/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java
@@ -19,7 +19,6 @@ package com.uber.hoodie;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieWriteStat;
-
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -32,124 +31,130 @@ import java.util.Optional;
 */
 public class WriteStatus implements Serializable {

-    private final HashMap<HoodieKey, Throwable> errors = new HashMap<>();
+  private final HashMap<HoodieKey, Throwable> errors = new HashMap<>();

-    private final List<HoodieRecord> writtenRecords = new ArrayList<>();
+  private final List<HoodieRecord> writtenRecords = new ArrayList<>();

-    private final List<HoodieRecord> failedRecords  = new ArrayList<>();
+  private final List<HoodieRecord> failedRecords = new ArrayList<>();

-    private Throwable globalError = null;
+  private Throwable globalError = null;

-    private String fileId = null;
+  private String fileId = null;

-    private String partitionPath = null;
+  private String partitionPath = null;

-    private HoodieWriteStat stat = null;
+  private HoodieWriteStat stat = null;

-    private long totalRecords = 0;
-    private long totalErrorRecords = 0;
+  private long totalRecords = 0;
+  private long totalErrorRecords = 0;

-    /**
-     * Mark write as success, optionally using given parameters for the purpose of calculating
-     * some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
-     * objects are collected in Spark Driver.
-     *
-     * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-     * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
-     */
-    public void markSuccess(HoodieRecord record,
-        Optional<Map<String, String>> optionalRecordMetadata) {
-      writtenRecords.add(record);
-      totalRecords++;
-    }
+  /**
+   * Mark write as success, optionally using given parameters for the purpose of calculating some
+   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * objects are collected in Spark Driver.
+   *
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link
+   * HoodieRecord} before deflation.
+   */
+  public void markSuccess(HoodieRecord record,
+      Optional<Map<String, String>> optionalRecordMetadata) {
+    writtenRecords.add(record);
+    totalRecords++;
+  }

-    /**
-     * Mark write as failed, optionally using given parameters for the purpose of calculating
-     * some aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
-     * objects are collected in Spark Driver.
-     *
-     * @param record deflated {@code HoodieRecord} containing information that uniquely identifies it.
-     * @param optionalRecordMetadata optional metadata related to data contained in {@link HoodieRecord} before deflation.
-     */
-    public void markFailure(HoodieRecord record, Throwable t,
-        Optional<Map<String, String>> optionalRecordMetadata) {
-      failedRecords.add(record);
-      errors.put(record.getKey(), t);
-      totalRecords++;
-      totalErrorRecords++;
-    }
+  /**
+   * Mark write as failed, optionally using given parameters for the purpose of calculating some
+   * aggregate metrics. This method is not meant to cache passed arguments, since WriteStatus
+   * objects are collected in Spark Driver.
+   *
+   * @param record deflated {@code HoodieRecord} containing information that uniquely identifies
+   * it.
+   * @param optionalRecordMetadata optional metadata related to data contained in {@link
+   * HoodieRecord} before deflation.
+   */
+  public void markFailure(HoodieRecord record, Throwable t,
+      Optional<Map<String, String>> optionalRecordMetadata) {
+    failedRecords.add(record);
+    errors.put(record.getKey(), t);
+    totalRecords++;
+    totalErrorRecords++;
+  }

-    public String getFileId() {
-        return fileId;
-    }
+  public String getFileId() {
+    return fileId;
+  }

-    public void setFileId(String fileId) {
-        this.fileId = fileId;
-    }
+  public void setFileId(String fileId) {
+    this.fileId = fileId;
+  }

-    public boolean hasErrors() {
-        return totalErrorRecords > 0;
-    }
+  public boolean hasErrors() {
+    return totalErrorRecords > 0;
+  }

-    public boolean isErrored(HoodieKey key) {
-        return errors.containsKey(key);
-    }
+  public boolean isErrored(HoodieKey key) {
+    return errors.containsKey(key);
+  }

-    public HashMap<HoodieKey, Throwable> getErrors() {
-        return errors;
-    }
+  public HashMap<HoodieKey, Throwable> getErrors() {
+    return errors;
+  }

-    public boolean hasGlobalError() {
-        return globalError != null;
-    }
+  public boolean hasGlobalError() {
+    return globalError != null;
+  }

-    public void setGlobalError(Throwable t) {
-        this.globalError = t;
-    }
+  public void setGlobalError(Throwable t) {
+    this.globalError = t;
+  }

-    public Throwable getGlobalError() {
-        return this.globalError;
-    }
+  public Throwable getGlobalError() {
+    return this.globalError;
+  }

-    public List<HoodieRecord> getWrittenRecords() {
-        return writtenRecords;
-    }
+  public List<HoodieRecord> getWrittenRecords() {
+    return writtenRecords;
+  }

-    public List<HoodieRecord> getFailedRecords() {
-        return failedRecords;
-    }
+  public List<HoodieRecord> getFailedRecords() {
+    return failedRecords;
+  }

-    public HoodieWriteStat getStat() {
-        return stat;
-    }
+  public HoodieWriteStat getStat() {
+    return stat;
+  }

-    public void setStat(HoodieWriteStat stat) {
-        this.stat = stat;
-    }
+  public void setStat(HoodieWriteStat stat) {
+    this.stat = stat;
+  }

-    public String getPartitionPath() {
-        return partitionPath;
-    }
+  public String getPartitionPath() {
+    return partitionPath;
+  }

-    public void setPartitionPath(String partitionPath) {
-        this.partitionPath = partitionPath;
-    }
+  public void setPartitionPath(String partitionPath) {
+    this.partitionPath = partitionPath;
+  }

-    public long getTotalRecords() {
-        return totalRecords;
-    }
+  public long getTotalRecords() {
+    return totalRecords;
+  }

-    public long getTotalErrorRecords() { return totalErrorRecords; }
+  public long getTotalErrorRecords() {
+    return totalErrorRecords;
+  }

-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WriteStatus {");
-        sb.append("fileId=").append(fileId);
-        sb.append(", globalError='").append(globalError).append('\'');
-        sb.append(", hasErrors='").append(hasErrors()).append('\'');
-        sb.append(", errorCount='").append(totalErrorRecords).append('\'');
-        sb.append(", errorPct='").append((100.0 * totalErrorRecords) / totalRecords).append('\'');
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WriteStatus {");
+    sb.append("fileId=").append(fileId);
+    sb.append(", globalError='").append(globalError).append('\'');
+    sb.append(", hasErrors='").append(hasErrors()).append('\'');
+    sb.append(", errorCount='").append(totalErrorRecords).append('\'');
+    sb.append(", errorPct='").append((100.0 * totalErrorRecords) / totalRecords).append('\'');
+    sb.append('}');
+    return sb.toString();
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/DefaultHoodieConfig.java
@@ -17,33 +17,35 @@
 package com.uber.hoodie.config;

 import java.io.Serializable;
-import java.util.Map;
 import java.util.Properties;

 /**
 * Default Way to load Hoodie config through a java.util.Properties
 */
 public class DefaultHoodieConfig implements Serializable {
-    protected final Properties props;
-    public DefaultHoodieConfig(Properties props) {
-        this.props = props;
-    }

-    public Properties getProps() {
-        return props;
-    }
+  protected final Properties props;

-    public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
-        String defaultValue) {
-        if (condition) {
-            props.setProperty(propName, defaultValue);
-        }
-    }
+  public DefaultHoodieConfig(Properties props) {
+    this.props = props;
+  }

-    public static void setDefaultOnCondition(Properties props, boolean condition, DefaultHoodieConfig config) {
-        if (condition) {
-            props.putAll(config.getProps());
-        }
+  public Properties getProps() {
+    return props;
+  }
+
+  public static void setDefaultOnCondition(Properties props, boolean condition, String propName,
+      String defaultValue) {
+    if (condition) {
+      props.setProperty(propName, defaultValue);
    }
+  }
+
+  public static void setDefaultOnCondition(Properties props, boolean condition,
+      DefaultHoodieConfig config) {
+    if (condition) {
+      props.putAll(config.getProps());
+    }
+  }

 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieCompactionConfig.java
@@ -19,231 +19,239 @@ package com.uber.hoodie.config;
 import com.google.common.base.Preconditions;
 import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieCleaningPolicy;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import com.uber.hoodie.io.compact.strategy.LogFileSizeBasedCompactionStrategy;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;

 /**
 * Compaction related config
 */
@Immutable
 public class HoodieCompactionConfig extends DefaultHoodieConfig {
-    public static final String CLEANER_POLICY_PROP = "hoodie.cleaner.policy";
-    private static final String DEFAULT_CLEANER_POLICY =
-        HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();

-    public static final String AUTO_CLEAN_PROP = "hoodie.clean.automatic";
-    private static final String DEFAULT_AUTO_CLEAN = "true";
+  public static final String CLEANER_POLICY_PROP = "hoodie.cleaner.policy";
+  private static final String DEFAULT_CLEANER_POLICY =
+      HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name();

-    // Turn on inline compaction - after fw delta commits a inline compaction will be run
-    public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline";
-    private static final String DEFAULT_INLINE_COMPACT = "true";
+  public static final String AUTO_CLEAN_PROP = "hoodie.clean.automatic";
+  private static final String DEFAULT_AUTO_CLEAN = "true";

-    // Run a compaction every N delta commits
-    public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits";
-    private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "10";
+  // Turn on inline compaction - after fw delta commits a inline compaction will be run
+  public static final String INLINE_COMPACT_PROP = "hoodie.compact.inline";
+  private static final String DEFAULT_INLINE_COMPACT = "true";

-    public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
-        "hoodie.cleaner.fileversions.retained";
-    private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";
+  // Run a compaction every N delta commits
+  public static final String INLINE_COMPACT_NUM_DELTA_COMMITS_PROP = "hoodie.compact.inline.max.delta.commits";
+  private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = "10";

-    public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained";
-    private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "24";
+  public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP =
+      "hoodie.cleaner.fileversions.retained";
+  private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = "3";

-    public static final String MAX_COMMITS_TO_KEEP = "hoodie.keep.max.commits";
-    private static final String DEFAULT_MAX_COMMITS_TO_KEEP = String.valueOf(128);
-    public static final String MIN_COMMITS_TO_KEEP = "hoodie.keep.min.commits";
-    private static final String DEFAULT_MIN_COMMITS_TO_KEEP = String.valueOf(96);
-    // Upsert uses this file size to compact new data onto existing files..
-    public static final String PARQUET_SMALL_FILE_LIMIT_BYTES = "hoodie.parquet.small.file.limit";
-    // Turned off by default
-    public static final String DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES = String.valueOf(0);
+  public static final String CLEANER_COMMITS_RETAINED_PROP = "hoodie.cleaner.commits.retained";
+  private static final String DEFAULT_CLEANER_COMMITS_RETAINED = "24";
+
+  public static final String MAX_COMMITS_TO_KEEP = "hoodie.keep.max.commits";
+  private static final String DEFAULT_MAX_COMMITS_TO_KEEP = String.valueOf(128);
+  public static final String MIN_COMMITS_TO_KEEP = "hoodie.keep.min.commits";
+  private static final String DEFAULT_MIN_COMMITS_TO_KEEP = String.valueOf(96);
+  // Upsert uses this file size to compact new data onto existing files..
+  public static final String PARQUET_SMALL_FILE_LIMIT_BYTES = "hoodie.parquet.small.file.limit";
+  // Turned off by default
+  public static final String DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES = String.valueOf(0);


-    /** Configs related to specific table types **/
-    // Number of inserts, that will be put each partition/bucket for writing
-    public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert.split.size";
-    // The rationale to pick the insert parallelism is the following. Writing out 100MB files,
-    // with atleast 1kb records, means 100K records per file. we just overprovision to 500K
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000);
+  /**
+   * Configs related to specific table types
+   **/
+  // Number of inserts, that will be put each partition/bucket for writing
+  public static final String COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = "hoodie.copyonwrite.insert.split.size";
+  // The rationale to pick the insert parallelism is the following. Writing out 100MB files,
+  // with atleast 1kb records, means 100K records per file. we just overprovision to 500K
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE = String.valueOf(500000);

-    // Config to control whether we control insert split sizes automatically based on average record sizes
-    public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert.auto.split";
-    // its off by default
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(false);
+  // Config to control whether we control insert split sizes automatically based on average record sizes
+  public static final String COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = "hoodie.copyonwrite.insert.auto.split";
+  // its off by default
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS = String.valueOf(false);


-    // This value is used as a guessimate for the record size, if we can't determine this from previous commits
-    public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite.record.size.estimate";
-    // Used to determine how much more can be packed into a small file, before it exceeds the size limit.
-    public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String.valueOf(1024);
+  // This value is used as a guessimate for the record size, if we can't determine this from previous commits
+  public static final String COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = "hoodie.copyonwrite.record.size.estimate";
+  // Used to determine how much more can be packed into a small file, before it exceeds the size limit.
+  public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = String
+      .valueOf(1024);

-    public static final String CLEANER_PARALLELISM = "hoodie.cleaner.parallelism";
-    public static final String DEFAULT_CLEANER_PARALLELISM = String.valueOf(200);
+  public static final String CLEANER_PARALLELISM = "hoodie.cleaner.parallelism";
+  public static final String DEFAULT_CLEANER_PARALLELISM = String.valueOf(200);

-    public static final String TARGET_IO_PER_COMPACTION_IN_MB_PROP = "hoodie.compaction.target.io";
-    // 500GB of target IO per compaction (both read and write)
-    public static final String DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB = String.valueOf(500 * 1024);
+  public static final String TARGET_IO_PER_COMPACTION_IN_MB_PROP = "hoodie.compaction.target.io";
+  // 500GB of target IO per compaction (both read and write)
+  public static final String DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB = String.valueOf(500 * 1024);

-    public static final String COMPACTION_STRATEGY_PROP = "hoodie.compaction.strategy";
-    // 200GB of target IO per compaction
-    public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class.getName();
+  public static final String COMPACTION_STRATEGY_PROP = "hoodie.compaction.strategy";
+  // 200GB of target IO per compaction
+  public static final String DEFAULT_COMPACTION_STRATEGY = LogFileSizeBasedCompactionStrategy.class
+      .getName();

-    // used to merge records written to log file
-    public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
-    public static final String PAYLOAD_CLASS = "hoodie.compaction.payload.class";
+  // used to merge records written to log file
+  public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
+  public static final String PAYLOAD_CLASS = "hoodie.compaction.payload.class";

-    private HoodieCompactionConfig(Properties props) {
-        super(props);
+  private HoodieCompactionConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieCompactionConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
    }

-    public static HoodieCompactionConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
    }

-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-
-        public Builder withAutoClean(Boolean autoClean) {
-            props.setProperty(AUTO_CLEAN_PROP, String.valueOf(autoClean));
-            return this;
-        }
-
-        public Builder withInlineCompaction(Boolean inlineCompaction) {
-            props.setProperty(INLINE_COMPACT_PROP, String.valueOf(inlineCompaction));
-            return this;
-        }
-
-        public Builder inlineCompactionEvery(int deltaCommits) {
-            props.setProperty(INLINE_COMPACT_PROP, String.valueOf(deltaCommits));
-            return this;
-        }
-
-        public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
-            props.setProperty(CLEANER_POLICY_PROP, policy.name());
-            return this;
-        }
-
-        public Builder retainFileVersions(int fileVersionsRetained) {
-            props.setProperty(CLEANER_FILE_VERSIONS_RETAINED_PROP,
-                String.valueOf(fileVersionsRetained));
-            return this;
-        }
-
-        public Builder retainCommits(int commitsRetained) {
-            props.setProperty(CLEANER_COMMITS_RETAINED_PROP, String.valueOf(commitsRetained));
-            return this;
-        }
-
-        public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
-            props.setProperty(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
-            props.setProperty(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
-            return this;
-        }
-
-        public Builder compactionSmallFileSize(long smallFileLimitBytes) {
-            props.setProperty(PARQUET_SMALL_FILE_LIMIT_BYTES, String.valueOf(smallFileLimitBytes));
-            return this;
-        }
-
-        public Builder insertSplitSize(int insertSplitSize) {
-            props.setProperty(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, String.valueOf(insertSplitSize));
-            return this;
-        }
-
-        public Builder autoTuneInsertSplits(boolean autoTuneInsertSplits) {
-            props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, String.valueOf(autoTuneInsertSplits));
-            return this;
-        }
-
-        public Builder approxRecordSize(int recordSizeEstimate) {
-            props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, String.valueOf(recordSizeEstimate));
-            return this;
-        }
-
-        public Builder withCleanerParallelism(int cleanerParallelism) {
-            props.setProperty(CLEANER_PARALLELISM, String.valueOf(cleanerParallelism));
-            return this;
-        }
-
-        public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
-            props.setProperty(COMPACTION_STRATEGY_PROP, compactionStrategy.getClass().getName());
-            return this;
-        }
-
-        public Builder withPayloadClass(String payloadClassName) {
-            props.setProperty(PAYLOAD_CLASS, payloadClassName);
-            return this;
-        }
-
-        public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
-            props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP, String.valueOf(targetIOPerCompactionInMB));
-            return this;
-        }
-
-        public HoodieCompactionConfig build() {
-            HoodieCompactionConfig config = new HoodieCompactionConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP),
-                    AUTO_CLEAN_PROP, DEFAULT_AUTO_CLEAN);
-            setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_PROP),
-                INLINE_COMPACT_PROP, DEFAULT_INLINE_COMPACT);
-            setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP),
-                INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_POLICY_PROP),
-                CLEANER_POLICY_PROP, DEFAULT_CLEANER_POLICY);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_FILE_VERSIONS_RETAINED_PROP),
-                CLEANER_FILE_VERSIONS_RETAINED_PROP, DEFAULT_CLEANER_FILE_VERSIONS_RETAINED);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP),
-                CLEANER_COMMITS_RETAINED_PROP, DEFAULT_CLEANER_COMMITS_RETAINED);
-            setDefaultOnCondition(props, !props.containsKey(MAX_COMMITS_TO_KEEP),
-                MAX_COMMITS_TO_KEEP, DEFAULT_MAX_COMMITS_TO_KEEP);
-            setDefaultOnCondition(props, !props.containsKey(MIN_COMMITS_TO_KEEP),
-                MIN_COMMITS_TO_KEEP, DEFAULT_MIN_COMMITS_TO_KEEP);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES),
-                PARQUET_SMALL_FILE_LIMIT_BYTES, DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE),
-                COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS),
-                COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS);
-            setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE),
-                COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE, DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
-            setDefaultOnCondition(props, !props.containsKey(CLEANER_PARALLELISM),
-                CLEANER_PARALLELISM, DEFAULT_CLEANER_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP),
-                COMPACTION_STRATEGY_PROP, DEFAULT_COMPACTION_STRATEGY);
-            setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS),
-                    PAYLOAD_CLASS, DEFAULT_PAYLOAD_CLASS);
-            setDefaultOnCondition(props, !props.containsKey(TARGET_IO_PER_COMPACTION_IN_MB_PROP),
-                TARGET_IO_PER_COMPACTION_IN_MB_PROP, DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB);
-
-            HoodieCleaningPolicy.valueOf(props.getProperty(CLEANER_POLICY_PROP));
-            Preconditions.checkArgument(
-                Integer.parseInt(props.getProperty(MAX_COMMITS_TO_KEEP)) > Integer
-                    .parseInt(props.getProperty(MIN_COMMITS_TO_KEEP)));
-            return config;
-        }

+    public Builder withAutoClean(Boolean autoClean) {
+      props.setProperty(AUTO_CLEAN_PROP, String.valueOf(autoClean));
+      return this;
    }
+
+    public Builder withInlineCompaction(Boolean inlineCompaction) {
+      props.setProperty(INLINE_COMPACT_PROP, String.valueOf(inlineCompaction));
+      return this;
+    }
+
+    public Builder inlineCompactionEvery(int deltaCommits) {
+      props.setProperty(INLINE_COMPACT_PROP, String.valueOf(deltaCommits));
+      return this;
+    }
+
+    public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
+      props.setProperty(CLEANER_POLICY_PROP, policy.name());
+      return this;
+    }
+
+    public Builder retainFileVersions(int fileVersionsRetained) {
+      props.setProperty(CLEANER_FILE_VERSIONS_RETAINED_PROP,
+          String.valueOf(fileVersionsRetained));
+      return this;
+    }
+
+    public Builder retainCommits(int commitsRetained) {
+      props.setProperty(CLEANER_COMMITS_RETAINED_PROP, String.valueOf(commitsRetained));
+      return this;
+    }
+
+    public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
+      props.setProperty(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
+      props.setProperty(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
+      return this;
+    }
+
+    public Builder compactionSmallFileSize(long smallFileLimitBytes) {
+      props.setProperty(PARQUET_SMALL_FILE_LIMIT_BYTES, String.valueOf(smallFileLimitBytes));
+      return this;
+    }
+
+    public Builder insertSplitSize(int insertSplitSize) {
+      props.setProperty(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, String.valueOf(insertSplitSize));
+      return this;
+    }
+
+    public Builder autoTuneInsertSplits(boolean autoTuneInsertSplits) {
+      props.setProperty(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS,
+          String.valueOf(autoTuneInsertSplits));
+      return this;
+    }
+
+    public Builder approxRecordSize(int recordSizeEstimate) {
+      props.setProperty(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+          String.valueOf(recordSizeEstimate));
+      return this;
+    }
+
+    public Builder withCleanerParallelism(int cleanerParallelism) {
+      props.setProperty(CLEANER_PARALLELISM, String.valueOf(cleanerParallelism));
+      return this;
+    }
+
+    public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
+      props.setProperty(COMPACTION_STRATEGY_PROP, compactionStrategy.getClass().getName());
+      return this;
+    }
+
+    public Builder withPayloadClass(String payloadClassName) {
+      props.setProperty(PAYLOAD_CLASS, payloadClassName);
+      return this;
+    }
+
+    public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
+      props.setProperty(TARGET_IO_PER_COMPACTION_IN_MB_PROP,
+          String.valueOf(targetIOPerCompactionInMB));
+      return this;
+    }
+
+    public HoodieCompactionConfig build() {
+      HoodieCompactionConfig config = new HoodieCompactionConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(AUTO_CLEAN_PROP),
+          AUTO_CLEAN_PROP, DEFAULT_AUTO_CLEAN);
+      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_PROP),
+          INLINE_COMPACT_PROP, DEFAULT_INLINE_COMPACT);
+      setDefaultOnCondition(props, !props.containsKey(INLINE_COMPACT_NUM_DELTA_COMMITS_PROP),
+          INLINE_COMPACT_NUM_DELTA_COMMITS_PROP, DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_POLICY_PROP),
+          CLEANER_POLICY_PROP, DEFAULT_CLEANER_POLICY);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_FILE_VERSIONS_RETAINED_PROP),
+          CLEANER_FILE_VERSIONS_RETAINED_PROP, DEFAULT_CLEANER_FILE_VERSIONS_RETAINED);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_COMMITS_RETAINED_PROP),
+          CLEANER_COMMITS_RETAINED_PROP, DEFAULT_CLEANER_COMMITS_RETAINED);
+      setDefaultOnCondition(props, !props.containsKey(MAX_COMMITS_TO_KEEP),
+          MAX_COMMITS_TO_KEEP, DEFAULT_MAX_COMMITS_TO_KEEP);
+      setDefaultOnCondition(props, !props.containsKey(MIN_COMMITS_TO_KEEP),
+          MIN_COMMITS_TO_KEEP, DEFAULT_MIN_COMMITS_TO_KEEP);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_SMALL_FILE_LIMIT_BYTES),
+          PARQUET_SMALL_FILE_LIMIT_BYTES, DEFAULT_PARQUET_SMALL_FILE_LIMIT_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE),
+          COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE, DEFAULT_COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS),
+          COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS, DEFAULT_COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS);
+      setDefaultOnCondition(props, !props.containsKey(COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE),
+          COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE,
+          DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE);
+      setDefaultOnCondition(props, !props.containsKey(CLEANER_PARALLELISM),
+          CLEANER_PARALLELISM, DEFAULT_CLEANER_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMPACTION_STRATEGY_PROP),
+          COMPACTION_STRATEGY_PROP, DEFAULT_COMPACTION_STRATEGY);
+      setDefaultOnCondition(props, !props.containsKey(PAYLOAD_CLASS),
+          PAYLOAD_CLASS, DEFAULT_PAYLOAD_CLASS);
+      setDefaultOnCondition(props, !props.containsKey(TARGET_IO_PER_COMPACTION_IN_MB_PROP),
+          TARGET_IO_PER_COMPACTION_IN_MB_PROP, DEFAULT_TARGET_IO_PER_COMPACTION_IN_MB);
+
+      HoodieCleaningPolicy.valueOf(props.getProperty(CLEANER_POLICY_PROP));
+      Preconditions.checkArgument(
+          Integer.parseInt(props.getProperty(MAX_COMMITS_TO_KEEP)) > Integer
+              .parseInt(props.getProperty(MIN_COMMITS_TO_KEEP)));
+      return config;
+    }
+
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieIndexConfig.java
@@ -16,14 +16,12 @@

 package com.uber.hoodie.config;

-import com.google.common.base.Preconditions;
 import com.uber.hoodie.index.HoodieIndex;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;

 /**
 * Indexing related config
@@ -31,123 +29,124 @@ import java.util.Properties;
@Immutable
 public class HoodieIndexConfig extends DefaultHoodieConfig {

-    public static final String INDEX_TYPE_PROP = "hoodie.index.type";
-    public static final String DEFAULT_INDEX_TYPE = HoodieIndex.IndexType.BLOOM.name();
+  public static final String INDEX_TYPE_PROP = "hoodie.index.type";
+  public static final String DEFAULT_INDEX_TYPE = HoodieIndex.IndexType.BLOOM.name();

-    // *****  Bloom Index configs *****
-    public static final String BLOOM_FILTER_NUM_ENTRIES = "hoodie.index.bloom.num_entries";
-    public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = "60000";
-    public static final String BLOOM_FILTER_FPP = "hoodie.index.bloom.fpp";
-    public static final String DEFAULT_BLOOM_FILTER_FPP = "0.000000001";
-    public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
-    // Disable explicit bloom index parallelism setting by default - hoodie auto computes
-    public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
-    public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges";
-    public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
-    public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
-    public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";
+  // *****  Bloom Index configs *****
+  public static final String BLOOM_FILTER_NUM_ENTRIES = "hoodie.index.bloom.num_entries";
+  public static final String DEFAULT_BLOOM_FILTER_NUM_ENTRIES = "60000";
+  public static final String BLOOM_FILTER_FPP = "hoodie.index.bloom.fpp";
+  public static final String DEFAULT_BLOOM_FILTER_FPP = "0.000000001";
+  public static final String BLOOM_INDEX_PARALLELISM_PROP = "hoodie.bloom.index.parallelism";
+  // Disable explicit bloom index parallelism setting by default - hoodie auto computes
+  public static final String DEFAULT_BLOOM_INDEX_PARALLELISM = "0";
+  public static final String BLOOM_INDEX_PRUNE_BY_RANGES_PROP = "hoodie.bloom.index.prune.by.ranges";
+  public static final String DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES = "true";
+  public static final String BLOOM_INDEX_USE_CACHING_PROP = "hoodie.bloom.index.use.caching";
+  public static final String DEFAULT_BLOOM_INDEX_USE_CACHING = "true";

-    // ***** HBase Index Configs *****
-    public final static String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum";
-    public final static String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport";
-    public final static String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table";
+  // ***** HBase Index Configs *****
+  public final static String HBASE_ZKQUORUM_PROP = "hoodie.index.hbase.zkquorum";
+  public final static String HBASE_ZKPORT_PROP = "hoodie.index.hbase.zkport";
+  public final static String HBASE_TABLENAME_PROP = "hoodie.index.hbase.table";

-    // ***** Bucketed Index Configs *****
-    public final static String BUCKETED_INDEX_NUM_BUCKETS_PROP = "hoodie.index.bucketed.numbuckets";
+  // ***** Bucketed Index Configs *****
+  public final static String BUCKETED_INDEX_NUM_BUCKETS_PROP = "hoodie.index.bucketed.numbuckets";

-    private HoodieIndexConfig(Properties props) {
-        super(props);
+  private HoodieIndexConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieIndexConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
    }

-    public static HoodieIndexConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
    }

-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-        public Builder withIndexType(HoodieIndex.IndexType indexType) {
-            props.setProperty(INDEX_TYPE_PROP, indexType.name());
-            return this;
-        }
-
-        public Builder bloomFilterNumEntries(int numEntries) {
-            props.setProperty(BLOOM_FILTER_NUM_ENTRIES, String.valueOf(numEntries));
-            return this;
-        }
-
-        public Builder bloomFilterFPP(double fpp) {
-            props.setProperty(BLOOM_FILTER_FPP, String.valueOf(fpp));
-            return this;
-        }
-
-        public Builder hbaseZkQuorum(String zkString) {
-            props.setProperty(HBASE_ZKQUORUM_PROP, zkString);
-            return this;
-        }
-
-        public Builder hbaseZkPort(int port) {
-            props.setProperty(HBASE_ZKPORT_PROP, String.valueOf(port));
-            return this;
-        }
-
-        public Builder hbaseTableName(String tableName) {
-            props.setProperty(HBASE_TABLENAME_PROP, tableName);
-            return this;
-        }
-
-        public Builder bloomIndexParallelism(int parallelism) {
-            props.setProperty(BLOOM_INDEX_PARALLELISM_PROP, String.valueOf(parallelism));
-            return this;
-        }
-
-        public Builder bloomIndexPruneByRanges(boolean pruneRanges) {
-            props.setProperty(BLOOM_INDEX_PRUNE_BY_RANGES_PROP, String.valueOf(pruneRanges));
-            return this;
-        }
-
-        public Builder bloomIndexUseCaching(boolean useCaching) {
-            props.setProperty(BLOOM_INDEX_USE_CACHING_PROP, String.valueOf(useCaching));
-            return this;
-        }
-
-        public Builder numBucketsPerPartition(int numBuckets) {
-            props.setProperty(BUCKETED_INDEX_NUM_BUCKETS_PROP, String.valueOf(numBuckets));
-            return this;
-        }
-
-        public HoodieIndexConfig build() {
-            HoodieIndexConfig config = new HoodieIndexConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP),
-                INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES),
-                BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP),
-                BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP),
-                    BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP),
-                    BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES);
-            setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP),
-                    BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
-            // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
-            HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
-            return config;
-        }
+    public Builder withIndexType(HoodieIndex.IndexType indexType) {
+      props.setProperty(INDEX_TYPE_PROP, indexType.name());
+      return this;
    }
+
+    public Builder bloomFilterNumEntries(int numEntries) {
+      props.setProperty(BLOOM_FILTER_NUM_ENTRIES, String.valueOf(numEntries));
+      return this;
+    }
+
+    public Builder bloomFilterFPP(double fpp) {
+      props.setProperty(BLOOM_FILTER_FPP, String.valueOf(fpp));
+      return this;
+    }
+
+    public Builder hbaseZkQuorum(String zkString) {
+      props.setProperty(HBASE_ZKQUORUM_PROP, zkString);
+      return this;
+    }
+
+    public Builder hbaseZkPort(int port) {
+      props.setProperty(HBASE_ZKPORT_PROP, String.valueOf(port));
+      return this;
+    }
+
+    public Builder hbaseTableName(String tableName) {
+      props.setProperty(HBASE_TABLENAME_PROP, tableName);
+      return this;
+    }
+
+    public Builder bloomIndexParallelism(int parallelism) {
+      props.setProperty(BLOOM_INDEX_PARALLELISM_PROP, String.valueOf(parallelism));
+      return this;
+    }
+
+    public Builder bloomIndexPruneByRanges(boolean pruneRanges) {
+      props.setProperty(BLOOM_INDEX_PRUNE_BY_RANGES_PROP, String.valueOf(pruneRanges));
+      return this;
+    }
+
+    public Builder bloomIndexUseCaching(boolean useCaching) {
+      props.setProperty(BLOOM_INDEX_USE_CACHING_PROP, String.valueOf(useCaching));
+      return this;
+    }
+
+    public Builder numBucketsPerPartition(int numBuckets) {
+      props.setProperty(BUCKETED_INDEX_NUM_BUCKETS_PROP, String.valueOf(numBuckets));
+      return this;
+    }
+
+    public HoodieIndexConfig build() {
+      HoodieIndexConfig config = new HoodieIndexConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(INDEX_TYPE_PROP),
+          INDEX_TYPE_PROP, DEFAULT_INDEX_TYPE);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_NUM_ENTRIES),
+          BLOOM_FILTER_NUM_ENTRIES, DEFAULT_BLOOM_FILTER_NUM_ENTRIES);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_FILTER_FPP),
+          BLOOM_FILTER_FPP, DEFAULT_BLOOM_FILTER_FPP);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PARALLELISM_PROP),
+          BLOOM_INDEX_PARALLELISM_PROP, DEFAULT_BLOOM_INDEX_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_PRUNE_BY_RANGES_PROP),
+          BLOOM_INDEX_PRUNE_BY_RANGES_PROP, DEFAULT_BLOOM_INDEX_PRUNE_BY_RANGES);
+      setDefaultOnCondition(props, !props.containsKey(BLOOM_INDEX_USE_CACHING_PROP),
+          BLOOM_INDEX_USE_CACHING_PROP, DEFAULT_BLOOM_INDEX_USE_CACHING);
+      // Throws IllegalArgumentException if the value set is not a known Hoodie Index Type
+      HoodieIndex.IndexType.valueOf(props.getProperty(INDEX_TYPE_PROP));
+      return config;
+    }
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieMetricsConfig.java
@@ -17,12 +17,11 @@
 package com.uber.hoodie.config;

 import com.uber.hoodie.metrics.MetricsReporterType;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;

 /**
 * Fetch the configurations used by the Metrics system.
@@ -30,89 +29,90 @@ import java.util.Properties;
@Immutable
 public class HoodieMetricsConfig extends DefaultHoodieConfig {

-    public final static String METRIC_PREFIX = "hoodie.metrics";
-    public final static String METRICS_ON = METRIC_PREFIX + ".on";
-    public final static boolean DEFAULT_METRICS_ON = false;
-    public final static String METRICS_REPORTER_TYPE = METRIC_PREFIX + ".reporter.type";
-    public final static MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE =
-        MetricsReporterType.GRAPHITE;
+  public final static String METRIC_PREFIX = "hoodie.metrics";
+  public final static String METRICS_ON = METRIC_PREFIX + ".on";
+  public final static boolean DEFAULT_METRICS_ON = false;
+  public final static String METRICS_REPORTER_TYPE = METRIC_PREFIX + ".reporter.type";
+  public final static MetricsReporterType DEFAULT_METRICS_REPORTER_TYPE =
+      MetricsReporterType.GRAPHITE;

-    // Graphite
-    public final static String GRAPHITE_PREFIX = METRIC_PREFIX + ".graphite";
-    public final static String GRAPHITE_SERVER_HOST = GRAPHITE_PREFIX + ".host";
-    public final static String DEFAULT_GRAPHITE_SERVER_HOST = "localhost";
+  // Graphite
+  public final static String GRAPHITE_PREFIX = METRIC_PREFIX + ".graphite";
+  public final static String GRAPHITE_SERVER_HOST = GRAPHITE_PREFIX + ".host";
+  public final static String DEFAULT_GRAPHITE_SERVER_HOST = "localhost";

-    public final static String GRAPHITE_SERVER_PORT = GRAPHITE_PREFIX + ".port";
-    public final static int DEFAULT_GRAPHITE_SERVER_PORT = 4756;
+  public final static String GRAPHITE_SERVER_PORT = GRAPHITE_PREFIX + ".port";
+  public final static int DEFAULT_GRAPHITE_SERVER_PORT = 4756;

-    public final static String GRAPHITE_METRIC_PREFIX = GRAPHITE_PREFIX + ".metric.prefix";
+  public final static String GRAPHITE_METRIC_PREFIX = GRAPHITE_PREFIX + ".metric.prefix";

-    private HoodieMetricsConfig(Properties props) {
-        super(props);
+  private HoodieMetricsConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieMetricsConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
    }

-    public static HoodieMetricsConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
    }

-    public static class Builder {
-        private final Properties props = new Properties();

-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-
-        public Builder on(boolean metricsOn) {
-            props.setProperty(METRICS_ON, String.valueOf(metricsOn));
-            return this;
-        }
-
-        public Builder withReporterType(String reporterType) {
-            props.setProperty(METRICS_REPORTER_TYPE, reporterType);
-            return this;
-        }
-
-        public Builder toGraphiteHost(String host) {
-            props.setProperty(GRAPHITE_SERVER_HOST, host);
-            return this;
-        }
-
-        public Builder onGraphitePort(int port) {
-            props.setProperty(GRAPHITE_SERVER_PORT, String.valueOf(port));
-            return this;
-        }
-
-        public Builder usePrefix(String prefix) {
-            props.setProperty(GRAPHITE_METRIC_PREFIX, prefix);
-            return this;
-        }
-
-        public HoodieMetricsConfig build() {
-            HoodieMetricsConfig config = new HoodieMetricsConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON,
-                String.valueOf(DEFAULT_METRICS_ON));
-            setDefaultOnCondition(props, !props.containsKey(METRICS_REPORTER_TYPE),
-                METRICS_REPORTER_TYPE, DEFAULT_METRICS_REPORTER_TYPE.name());
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_HOST),
-                GRAPHITE_SERVER_HOST, DEFAULT_GRAPHITE_SERVER_HOST);
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
-                GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
-            setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
-                GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
-            return config;
-        }
+    public Builder on(boolean metricsOn) {
+      props.setProperty(METRICS_ON, String.valueOf(metricsOn));
+      return this;
    }

+    public Builder withReporterType(String reporterType) {
+      props.setProperty(METRICS_REPORTER_TYPE, reporterType);
+      return this;
+    }
+
+    public Builder toGraphiteHost(String host) {
+      props.setProperty(GRAPHITE_SERVER_HOST, host);
+      return this;
+    }
+
+    public Builder onGraphitePort(int port) {
+      props.setProperty(GRAPHITE_SERVER_PORT, String.valueOf(port));
+      return this;
+    }
+
+    public Builder usePrefix(String prefix) {
+      props.setProperty(GRAPHITE_METRIC_PREFIX, prefix);
+      return this;
+    }
+
+    public HoodieMetricsConfig build() {
+      HoodieMetricsConfig config = new HoodieMetricsConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(METRICS_ON), METRICS_ON,
+          String.valueOf(DEFAULT_METRICS_ON));
+      setDefaultOnCondition(props, !props.containsKey(METRICS_REPORTER_TYPE),
+          METRICS_REPORTER_TYPE, DEFAULT_METRICS_REPORTER_TYPE.name());
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_HOST),
+          GRAPHITE_SERVER_HOST, DEFAULT_GRAPHITE_SERVER_HOST);
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
+          GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
+      setDefaultOnCondition(props, !props.containsKey(GRAPHITE_SERVER_PORT),
+          GRAPHITE_SERVER_PORT, String.valueOf(DEFAULT_GRAPHITE_SERVER_PORT));
+      return config;
+    }
+  }
+
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
@@ -16,75 +16,77 @@

 package com.uber.hoodie.config;

-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;

 /**
 * Storage related config
 */
@Immutable
 public class HoodieStorageConfig extends DefaultHoodieConfig {
-    public static final String PARQUET_FILE_MAX_BYTES = "hoodie.parquet.max.file.size";
-    public static final String DEFAULT_PARQUET_FILE_MAX_BYTES = String.valueOf(120 * 1024 * 1024);
-    public static final String PARQUET_BLOCK_SIZE_BYTES = "hoodie.parquet.block.size";
-    public static final String DEFAULT_PARQUET_BLOCK_SIZE_BYTES = DEFAULT_PARQUET_FILE_MAX_BYTES;
-    public static final String PARQUET_PAGE_SIZE_BYTES = "hoodie.parquet.page.size";
-    public static final String DEFAULT_PARQUET_PAGE_SIZE_BYTES = String.valueOf(1 * 1024 * 1024);

-    private HoodieStorageConfig(Properties props) {
-        super(props);
+  public static final String PARQUET_FILE_MAX_BYTES = "hoodie.parquet.max.file.size";
+  public static final String DEFAULT_PARQUET_FILE_MAX_BYTES = String.valueOf(120 * 1024 * 1024);
+  public static final String PARQUET_BLOCK_SIZE_BYTES = "hoodie.parquet.block.size";
+  public static final String DEFAULT_PARQUET_BLOCK_SIZE_BYTES = DEFAULT_PARQUET_FILE_MAX_BYTES;
+  public static final String PARQUET_PAGE_SIZE_BYTES = "hoodie.parquet.page.size";
+  public static final String DEFAULT_PARQUET_PAGE_SIZE_BYTES = String.valueOf(1 * 1024 * 1024);
+
+  private HoodieStorageConfig(Properties props) {
+    super(props);
+  }
+
+  public static HoodieStorageConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public static class Builder {
+
+    private final Properties props = new Properties();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
    }

-    public static HoodieStorageConfig.Builder newBuilder() {
-        return new Builder();
+    public Builder fromProperties(Properties props) {
+      this.props.putAll(props);
+      return this;
    }

-    public static class Builder {
-        private final Properties props = new Properties();
-
-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
-
-        public Builder fromProperties(Properties props) {
-            this.props.putAll(props);
-            return this;
-        }
-
-        public Builder limitFileSize(int maxFileSize) {
-            props.setProperty(PARQUET_FILE_MAX_BYTES, String.valueOf(maxFileSize));
-            return this;
-        }
-
-        public Builder parquetBlockSize(int blockSize) {
-            props.setProperty(PARQUET_BLOCK_SIZE_BYTES, String.valueOf(blockSize));
-            return this;
-        }
-
-        public Builder parquetPageSize(int pageSize) {
-            props.setProperty(PARQUET_PAGE_SIZE_BYTES, String.valueOf(pageSize));
-            return this;
-        }
-
-        public HoodieStorageConfig build() {
-            HoodieStorageConfig config = new HoodieStorageConfig(props);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES),
-                PARQUET_FILE_MAX_BYTES, DEFAULT_PARQUET_FILE_MAX_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES),
-                PARQUET_BLOCK_SIZE_BYTES, DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
-            setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES),
-                PARQUET_PAGE_SIZE_BYTES, DEFAULT_PARQUET_PAGE_SIZE_BYTES);
-            return config;
-        }
+    public Builder limitFileSize(int maxFileSize) {
+      props.setProperty(PARQUET_FILE_MAX_BYTES, String.valueOf(maxFileSize));
+      return this;
    }

+    public Builder parquetBlockSize(int blockSize) {
+      props.setProperty(PARQUET_BLOCK_SIZE_BYTES, String.valueOf(blockSize));
+      return this;
+    }
+
+    public Builder parquetPageSize(int pageSize) {
+      props.setProperty(PARQUET_PAGE_SIZE_BYTES, String.valueOf(pageSize));
+      return this;
+    }
+
+    public HoodieStorageConfig build() {
+      HoodieStorageConfig config = new HoodieStorageConfig(props);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_FILE_MAX_BYTES),
+          PARQUET_FILE_MAX_BYTES, DEFAULT_PARQUET_FILE_MAX_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_BLOCK_SIZE_BYTES),
+          PARQUET_BLOCK_SIZE_BYTES, DEFAULT_PARQUET_BLOCK_SIZE_BYTES);
+      setDefaultOnCondition(props, !props.containsKey(PARQUET_PAGE_SIZE_BYTES),
+          PARQUET_PAGE_SIZE_BYTES, DEFAULT_PARQUET_PAGE_SIZE_BYTES);
+      return config;
+    }
+  }
+
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
@@ -24,395 +24,401 @@ import com.uber.hoodie.common.util.ReflectionUtils;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import com.uber.hoodie.metrics.MetricsReporterType;
-import org.apache.spark.storage.StorageLevel;
-
-import javax.annotation.concurrent.Immutable;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Map;
 import java.util.Properties;
+import javax.annotation.concurrent.Immutable;
+import org.apache.spark.storage.StorageLevel;

 /**
 * Class storing configs for the {@link com.uber.hoodie.HoodieWriteClient}
 */
@Immutable
 public class HoodieWriteConfig extends DefaultHoodieConfig {
-    private static final String BASE_PATH_PROP = "hoodie.base.path";
-    private static final String AVRO_SCHEMA = "hoodie.avro.schema";
-    public static final String TABLE_NAME = "hoodie.table.name";
-    private static final String DEFAULT_PARALLELISM = "200";
-    private static final String INSERT_PARALLELISM = "hoodie.insert.shuffle.parallelism";
-    private static final String BULKINSERT_PARALLELISM = "hoodie.bulkinsert.shuffle.parallelism";
-    private static final String UPSERT_PARALLELISM = "hoodie.upsert.shuffle.parallelism";
-    private static final String COMBINE_BEFORE_INSERT_PROP = "hoodie.combine.before.insert";
-    private static final String DEFAULT_COMBINE_BEFORE_INSERT = "false";
-    private static final String COMBINE_BEFORE_UPSERT_PROP = "hoodie.combine.before.upsert";
-    private static final String DEFAULT_COMBINE_BEFORE_UPSERT = "true";
-    private static final String WRITE_STATUS_STORAGE_LEVEL = "hoodie.write.status.storage.level";
-    private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
-    private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
-    private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
-    private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
-    private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
-    private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
-    private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();

-    private HoodieWriteConfig(Properties props) {
-        super(props);
-    }
+  private static final String BASE_PATH_PROP = "hoodie.base.path";
+  private static final String AVRO_SCHEMA = "hoodie.avro.schema";
+  public static final String TABLE_NAME = "hoodie.table.name";
+  private static final String DEFAULT_PARALLELISM = "200";
+  private static final String INSERT_PARALLELISM = "hoodie.insert.shuffle.parallelism";
+  private static final String BULKINSERT_PARALLELISM = "hoodie.bulkinsert.shuffle.parallelism";
+  private static final String UPSERT_PARALLELISM = "hoodie.upsert.shuffle.parallelism";
+  private static final String COMBINE_BEFORE_INSERT_PROP = "hoodie.combine.before.insert";
+  private static final String DEFAULT_COMBINE_BEFORE_INSERT = "false";
+  private static final String COMBINE_BEFORE_UPSERT_PROP = "hoodie.combine.before.upsert";
+  private static final String DEFAULT_COMBINE_BEFORE_UPSERT = "true";
+  private static final String WRITE_STATUS_STORAGE_LEVEL = "hoodie.write.status.storage.level";
+  private static final String DEFAULT_WRITE_STATUS_STORAGE_LEVEL = "MEMORY_AND_DISK_SER";
+  private static final String HOODIE_AUTO_COMMIT_PROP = "hoodie.auto.commit";
+  private static final String DEFAULT_HOODIE_AUTO_COMMIT = "true";
+  private static final String HOODIE_ASSUME_DATE_PARTITIONING_PROP = "hoodie.assume.date.partitioning";
+  private static final String DEFAULT_ASSUME_DATE_PARTITIONING = "false";
+  private static final String HOODIE_WRITE_STATUS_CLASS_PROP = "hoodie.writestatus.class";
+  private static final String DEFAULT_HOODIE_WRITE_STATUS_CLASS = WriteStatus.class.getName();

-    /**
-     * base properties
-     **/
-    public String getBasePath() {
-        return props.getProperty(BASE_PATH_PROP);
-    }
+  private HoodieWriteConfig(Properties props) {
+    super(props);
+  }

-    public String getSchema() {
-        return props.getProperty(AVRO_SCHEMA);
-    }
+  /**
+   * base properties
+   **/
+  public String getBasePath() {
+    return props.getProperty(BASE_PATH_PROP);
+  }

-    public String getTableName() {
-        return props.getProperty(TABLE_NAME);
-    }
+  public String getSchema() {
+    return props.getProperty(AVRO_SCHEMA);
+  }

-    public Boolean shouldAutoCommit() {
-        return Boolean.parseBoolean(props.getProperty(HOODIE_AUTO_COMMIT_PROP));
-    }
+  public String getTableName() {
+    return props.getProperty(TABLE_NAME);
+  }

-    public Boolean shouldAssumeDatePartitioning() {
-        return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
-    }
+  public Boolean shouldAutoCommit() {
+    return Boolean.parseBoolean(props.getProperty(HOODIE_AUTO_COMMIT_PROP));
+  }

-    public int getBulkInsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
-    }
+  public Boolean shouldAssumeDatePartitioning() {
+    return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
+  }

-    public int getInsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(INSERT_PARALLELISM));
-    }
+  public int getBulkInsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
+  }

-    public int getUpsertShuffleParallelism() {
-        return Integer.parseInt(props.getProperty(UPSERT_PARALLELISM));
-    }
+  public int getInsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(INSERT_PARALLELISM));
+  }

-    public boolean shouldCombineBeforeInsert() {
-        return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_INSERT_PROP));
-    }
+  public int getUpsertShuffleParallelism() {
+    return Integer.parseInt(props.getProperty(UPSERT_PARALLELISM));
+  }

-    public boolean shouldCombineBeforeUpsert() {
-        return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_UPSERT_PROP));
-    }
+  public boolean shouldCombineBeforeInsert() {
+    return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_INSERT_PROP));
+  }

-    public StorageLevel getWriteStatusStorageLevel() {
-        return StorageLevel.fromString(props.getProperty(WRITE_STATUS_STORAGE_LEVEL));
-    }
+  public boolean shouldCombineBeforeUpsert() {
+    return Boolean.parseBoolean(props.getProperty(COMBINE_BEFORE_UPSERT_PROP));
+  }

-    public String getWriteStatusClassName() {
-        return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
-    }
+  public StorageLevel getWriteStatusStorageLevel() {
+    return StorageLevel.fromString(props.getProperty(WRITE_STATUS_STORAGE_LEVEL));
+  }

-    /**
-     * compaction properties
-     **/
-    public HoodieCleaningPolicy getCleanerPolicy() {
-        return HoodieCleaningPolicy
-            .valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
-    }
+  public String getWriteStatusClassName() {
+    return props.getProperty(HOODIE_WRITE_STATUS_CLASS_PROP);
+  }

-    public int getCleanerFileVersionsRetained() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
-    }
+  /**
+   * compaction properties
+   **/
+  public HoodieCleaningPolicy getCleanerPolicy() {
+    return HoodieCleaningPolicy
+        .valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_POLICY_PROP));
+  }

-    public int getCleanerCommitsRetained() {
-        return Integer
-            .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
-    }
+  public int getCleanerFileVersionsRetained() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED_PROP));
+  }

-    public int getMaxCommitsToKeep() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
-    }
+  public int getCleanerCommitsRetained() {
+    return Integer
+        .parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED_PROP));
+  }

-    public int getMinCommitsToKeep() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
-    }
+  public int getMaxCommitsToKeep() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
+  }

-    public int getParquetSmallFileLimit() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
-    }
+  public int getMinCommitsToKeep() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
+  }

-    public int getCopyOnWriteInsertSplitSize() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
-    }
+  public int getParquetSmallFileLimit() {
+    return Integer
+        .parseInt(props.getProperty(HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT_BYTES));
+  }

-    public int getCopyOnWriteRecordSizeEstimate() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
-    }
+  public int getCopyOnWriteInsertSplitSize() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_INSERT_SPLIT_SIZE));
+  }

-    public boolean shouldAutoTuneInsertSplits() {
-        return Boolean.parseBoolean(
-                props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
-    }
+  public int getCopyOnWriteRecordSizeEstimate() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE));
+  }

-    public int getCleanerParallelism() {
-        return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_PARALLELISM));
-    }
+  public boolean shouldAutoTuneInsertSplits() {
+    return Boolean.parseBoolean(
+        props.getProperty(HoodieCompactionConfig.COPY_ON_WRITE_TABLE_AUTO_SPLIT_INSERTS));
+  }

-    public boolean isAutoClean() {
-        return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.AUTO_CLEAN_PROP));
-    }
+  public int getCleanerParallelism() {
+    return Integer.parseInt(props.getProperty(HoodieCompactionConfig.CLEANER_PARALLELISM));
+  }

-    public boolean isInlineCompaction() {
-        return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_PROP));
-    }
+  public boolean isAutoClean() {
+    return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.AUTO_CLEAN_PROP));
+  }

-    public int getInlineCompactDeltaCommitMax() {
-        return Integer.parseInt(
-            props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
-    }
+  public boolean isInlineCompaction() {
+    return Boolean.parseBoolean(props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_PROP));
+  }

-    public CompactionStrategy getCompactionStrategy() {
-        return ReflectionUtils.loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
-    }
+  public int getInlineCompactDeltaCommitMax() {
+    return Integer.parseInt(
+        props.getProperty(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS_PROP));
+  }

-    public Long getTargetIOPerCompactionInMB() {
-        return Long.parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
-    }
+  public CompactionStrategy getCompactionStrategy() {
+    return ReflectionUtils
+        .loadClass(props.getProperty(HoodieCompactionConfig.COMPACTION_STRATEGY_PROP));
+  }

-    /**
-     * index properties
-     **/
-    public HoodieIndex.IndexType getIndexType() {
-        return HoodieIndex.IndexType.valueOf(props.getProperty(HoodieIndexConfig.INDEX_TYPE_PROP));
-    }
+  public Long getTargetIOPerCompactionInMB() {
+    return Long
+        .parseLong(props.getProperty(HoodieCompactionConfig.TARGET_IO_PER_COMPACTION_IN_MB_PROP));
+  }

-    public int getBloomFilterNumEntries() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES));
-    }
+  /**
+   * index properties
+   **/
+  public HoodieIndex.IndexType getIndexType() {
+    return HoodieIndex.IndexType.valueOf(props.getProperty(HoodieIndexConfig.INDEX_TYPE_PROP));
+  }

-    public double getBloomFilterFPP() {
-        return Double.parseDouble(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_FPP));
-    }
+  public int getBloomFilterNumEntries() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES));
+  }

-    public String getHbaseZkQuorum() {
-        return props.getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
-    }
+  public double getBloomFilterFPP() {
+    return Double.parseDouble(props.getProperty(HoodieIndexConfig.BLOOM_FILTER_FPP));
+  }

-    public int getHbaseZkPort() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP));
-    }
+  public String getHbaseZkQuorum() {
+    return props.getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
+  }

-    public String getHbaseTableName() {
-        return props.getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
-    }
+  public int getHbaseZkPort() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP));
+  }

-    public int getBloomIndexParallelism() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PARALLELISM_PROP));
-    }
+  public String getHbaseTableName() {
+    return props.getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  }

-    public boolean getBloomIndexPruneByRanges() {
-        return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
-    }
+  public int getBloomIndexParallelism() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PARALLELISM_PROP));
+  }

-    public boolean getBloomIndexUseCaching() {
-        return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
-    }
+  public boolean getBloomIndexPruneByRanges() {
+    return Boolean
+        .parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_PRUNE_BY_RANGES_PROP));
+  }

-    public int getNumBucketsPerPartition() {
-        return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP));
-    }
+  public boolean getBloomIndexUseCaching() {
+    return Boolean.parseBoolean(props.getProperty(HoodieIndexConfig.BLOOM_INDEX_USE_CACHING_PROP));
+  }

-    /**
-     * storage properties
-     **/
-    public int getParquetMaxFileSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES));
-    }
+  public int getNumBucketsPerPartition() {
+    return Integer.parseInt(props.getProperty(HoodieIndexConfig.BUCKETED_INDEX_NUM_BUCKETS_PROP));
+  }

-    public int getParquetBlockSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_BLOCK_SIZE_BYTES));
-    }
+  /**
+   * storage properties
+   **/
+  public int getParquetMaxFileSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_FILE_MAX_BYTES));
+  }

-    public int getParquetPageSize() {
-        return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_PAGE_SIZE_BYTES));
-    }
+  public int getParquetBlockSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_BLOCK_SIZE_BYTES));
+  }

-    /**
-     * metrics properties
-     **/
-    public boolean isMetricsOn() {
-        return Boolean.parseBoolean(props.getProperty(HoodieMetricsConfig.METRICS_ON));
-    }
+  public int getParquetPageSize() {
+    return Integer.parseInt(props.getProperty(HoodieStorageConfig.PARQUET_PAGE_SIZE_BYTES));
+  }

-    public MetricsReporterType getMetricsReporterType() {
-        return MetricsReporterType
-            .valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
-    }
+  /**
+   * metrics properties
+   **/
+  public boolean isMetricsOn() {
+    return Boolean.parseBoolean(props.getProperty(HoodieMetricsConfig.METRICS_ON));
+  }

-    public String getGraphiteServerHost() {
-        return props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_HOST);
-    }
+  public MetricsReporterType getMetricsReporterType() {
+    return MetricsReporterType
+        .valueOf(props.getProperty(HoodieMetricsConfig.METRICS_REPORTER_TYPE));
+  }

-    public int getGraphiteServerPort() {
-        return Integer.parseInt(props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_PORT));
-    }
+  public String getGraphiteServerHost() {
+    return props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_HOST);
+  }

-    public String getGraphiteMetricPrefix() {
-        return props.getProperty(HoodieMetricsConfig.GRAPHITE_METRIC_PREFIX);
-    }
+  public int getGraphiteServerPort() {
+    return Integer.parseInt(props.getProperty(HoodieMetricsConfig.GRAPHITE_SERVER_PORT));
+  }

-    public static HoodieWriteConfig.Builder newBuilder() {
-        return new Builder();
-    }
+  public String getGraphiteMetricPrefix() {
+    return props.getProperty(HoodieMetricsConfig.GRAPHITE_METRIC_PREFIX);
+  }

+  public static HoodieWriteConfig.Builder newBuilder() {
+    return new Builder();
+  }


  public static class Builder {
-        private final Properties props = new Properties();
-        private boolean isIndexConfigSet = false;
-        private boolean isStorageConfigSet = false;
-        private boolean isCompactionConfigSet = false;
-        private boolean isMetricsConfigSet = false;
-        private boolean isAutoCommit = true;

-        public Builder fromFile(File propertiesFile) throws IOException {
-            FileReader reader = new FileReader(propertiesFile);
-            try {
-                this.props.load(reader);
-                return this;
-            } finally {
-                reader.close();
-            }
-        }
+    private final Properties props = new Properties();
+    private boolean isIndexConfigSet = false;
+    private boolean isStorageConfigSet = false;
+    private boolean isCompactionConfigSet = false;
+    private boolean isMetricsConfigSet = false;
+    private boolean isAutoCommit = true;

-        public Builder fromInputStream(InputStream inputStream) throws IOException {
-            try {
-                this.props.load(inputStream);
-                return this;
-            } finally {
-                inputStream.close();
-            }
-        }
-
-        public Builder withProps(Map kvprops) {
-            props.putAll(kvprops);
-            return this;
-        }
-
-        public Builder withPath(String basePath) {
-            props.setProperty(BASE_PATH_PROP, basePath);
-            return this;
-        }
-
-        public Builder withSchema(String schemaStr) {
-            props.setProperty(AVRO_SCHEMA, schemaStr);
-            return this;
-        }
-
-        public Builder forTable(String tableName) {
-            props.setProperty(TABLE_NAME, tableName);
-            return this;
-        }
-
-        public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
-            props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
-            return this;
-        }
-
-        public Builder withParallelism(int insertShuffleParallelism, int upsertShuffleParallelism) {
-            props.setProperty(INSERT_PARALLELISM, String.valueOf(insertShuffleParallelism));
-            props.setProperty(UPSERT_PARALLELISM, String.valueOf(upsertShuffleParallelism));
-            return this;
-        }
-
-        public Builder combineInput(boolean onInsert, boolean onUpsert) {
-            props.setProperty(COMBINE_BEFORE_INSERT_PROP, String.valueOf(onInsert));
-            props.setProperty(COMBINE_BEFORE_UPSERT_PROP, String.valueOf(onUpsert));
-            return this;
-        }
-
-        public Builder withWriteStatusStorageLevel(String level) {
-            props.setProperty(WRITE_STATUS_STORAGE_LEVEL, level);
-            return this;
-        }
-
-        public Builder withIndexConfig(HoodieIndexConfig indexConfig) {
-            props.putAll(indexConfig.getProps());
-            isIndexConfigSet = true;
-            return this;
-        }
-
-        public Builder withStorageConfig(HoodieStorageConfig storageConfig) {
-            props.putAll(storageConfig.getProps());
-            isStorageConfigSet = true;
-            return this;
-        }
-
-        public Builder withCompactionConfig(HoodieCompactionConfig compactionConfig) {
-            props.putAll(compactionConfig.getProps());
-            isCompactionConfigSet = true;
-            return this;
-        }
-
-        public Builder withMetricsConfig(HoodieMetricsConfig metricsConfig) {
-            props.putAll(metricsConfig.getProps());
-            isMetricsConfigSet = true;
-            return this;
-        }
-
-        public Builder withAutoCommit(boolean autoCommit) {
-            props.setProperty(HOODIE_AUTO_COMMIT_PROP, String.valueOf(autoCommit));
-            return this;
-        }
-
-        public Builder withAssumeDatePartitioning(boolean assumeDatePartitioning) {
-            props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP, String.valueOf(assumeDatePartitioning));
-            return this;
-        }
-
-        public Builder withWriteStatusClass(Class<? extends WriteStatus> writeStatusClass) {
-            props.setProperty(HOODIE_WRITE_STATUS_CLASS_PROP, writeStatusClass.getName());
-            return this;
-        }
-
-        public HoodieWriteConfig build() {
-            HoodieWriteConfig config = new HoodieWriteConfig(props);
-            // Check for mandatory properties
-            Preconditions.checkArgument(config.getBasePath() != null);
-            setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
-                DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM), BULKINSERT_PARALLELISM,
-                    DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM,
-                DEFAULT_PARALLELISM);
-            setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP),
-                COMBINE_BEFORE_INSERT_PROP, DEFAULT_COMBINE_BEFORE_INSERT);
-            setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP),
-                COMBINE_BEFORE_UPSERT_PROP, DEFAULT_COMBINE_BEFORE_UPSERT);
-            setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL),
-                WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
-                HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
-                    HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
-            setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
-                    HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
-
-            // Make sure the props is propagated
-            setDefaultOnCondition(props, !isIndexConfigSet,
-                HoodieIndexConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isStorageConfigSet,
-                HoodieStorageConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isCompactionConfigSet,
-                HoodieCompactionConfig.newBuilder().fromProperties(props).build());
-            setDefaultOnCondition(props, !isMetricsConfigSet,
-                HoodieMetricsConfig.newBuilder().fromProperties(props).build());
-            return config;
-        }
+    public Builder fromFile(File propertiesFile) throws IOException {
+      FileReader reader = new FileReader(propertiesFile);
+      try {
+        this.props.load(reader);
+        return this;
+      } finally {
+        reader.close();
+      }
    }
+
+    public Builder fromInputStream(InputStream inputStream) throws IOException {
+      try {
+        this.props.load(inputStream);
+        return this;
+      } finally {
+        inputStream.close();
+      }
+    }
+
+    public Builder withProps(Map kvprops) {
+      props.putAll(kvprops);
+      return this;
+    }
+
+    public Builder withPath(String basePath) {
+      props.setProperty(BASE_PATH_PROP, basePath);
+      return this;
+    }
+
+    public Builder withSchema(String schemaStr) {
+      props.setProperty(AVRO_SCHEMA, schemaStr);
+      return this;
+    }
+
+    public Builder forTable(String tableName) {
+      props.setProperty(TABLE_NAME, tableName);
+      return this;
+    }
+
+    public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
+      props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
+      return this;
+    }
+
+    public Builder withParallelism(int insertShuffleParallelism, int upsertShuffleParallelism) {
+      props.setProperty(INSERT_PARALLELISM, String.valueOf(insertShuffleParallelism));
+      props.setProperty(UPSERT_PARALLELISM, String.valueOf(upsertShuffleParallelism));
+      return this;
+    }
+
+    public Builder combineInput(boolean onInsert, boolean onUpsert) {
+      props.setProperty(COMBINE_BEFORE_INSERT_PROP, String.valueOf(onInsert));
+      props.setProperty(COMBINE_BEFORE_UPSERT_PROP, String.valueOf(onUpsert));
+      return this;
+    }
+
+    public Builder withWriteStatusStorageLevel(String level) {
+      props.setProperty(WRITE_STATUS_STORAGE_LEVEL, level);
+      return this;
+    }
+
+    public Builder withIndexConfig(HoodieIndexConfig indexConfig) {
+      props.putAll(indexConfig.getProps());
+      isIndexConfigSet = true;
+      return this;
+    }
+
+    public Builder withStorageConfig(HoodieStorageConfig storageConfig) {
+      props.putAll(storageConfig.getProps());
+      isStorageConfigSet = true;
+      return this;
+    }
+
+    public Builder withCompactionConfig(HoodieCompactionConfig compactionConfig) {
+      props.putAll(compactionConfig.getProps());
+      isCompactionConfigSet = true;
+      return this;
+    }
+
+    public Builder withMetricsConfig(HoodieMetricsConfig metricsConfig) {
+      props.putAll(metricsConfig.getProps());
+      isMetricsConfigSet = true;
+      return this;
+    }
+
+    public Builder withAutoCommit(boolean autoCommit) {
+      props.setProperty(HOODIE_AUTO_COMMIT_PROP, String.valueOf(autoCommit));
+      return this;
+    }
+
+    public Builder withAssumeDatePartitioning(boolean assumeDatePartitioning) {
+      props.setProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP,
+          String.valueOf(assumeDatePartitioning));
+      return this;
+    }
+
+    public Builder withWriteStatusClass(Class<? extends WriteStatus> writeStatusClass) {
+      props.setProperty(HOODIE_WRITE_STATUS_CLASS_PROP, writeStatusClass.getName());
+      return this;
+    }
+
+    public HoodieWriteConfig build() {
+      HoodieWriteConfig config = new HoodieWriteConfig(props);
+      // Check for mandatory properties
+      Preconditions.checkArgument(config.getBasePath() != null);
+      setDefaultOnCondition(props, !props.containsKey(INSERT_PARALLELISM), INSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(BULKINSERT_PARALLELISM),
+          BULKINSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM,
+          DEFAULT_PARALLELISM);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP),
+          COMBINE_BEFORE_INSERT_PROP, DEFAULT_COMBINE_BEFORE_INSERT);
+      setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP),
+          COMBINE_BEFORE_UPSERT_PROP, DEFAULT_COMBINE_BEFORE_UPSERT);
+      setDefaultOnCondition(props, !props.containsKey(WRITE_STATUS_STORAGE_LEVEL),
+          WRITE_STATUS_STORAGE_LEVEL, DEFAULT_WRITE_STATUS_STORAGE_LEVEL);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_AUTO_COMMIT_PROP),
+          HOODIE_AUTO_COMMIT_PROP, DEFAULT_HOODIE_AUTO_COMMIT);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_ASSUME_DATE_PARTITIONING_PROP),
+          HOODIE_ASSUME_DATE_PARTITIONING_PROP, DEFAULT_ASSUME_DATE_PARTITIONING);
+      setDefaultOnCondition(props, !props.containsKey(HOODIE_WRITE_STATUS_CLASS_PROP),
+          HOODIE_WRITE_STATUS_CLASS_PROP, DEFAULT_HOODIE_WRITE_STATUS_CLASS);
+
+      // Make sure the props is propagated
+      setDefaultOnCondition(props, !isIndexConfigSet,
+          HoodieIndexConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isStorageConfigSet,
+          HoodieStorageConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isCompactionConfigSet,
+          HoodieCompactionConfig.newBuilder().fromProperties(props).build());
+      setDefaultOnCondition(props, !isMetricsConfigSet,
+          HoodieMetricsConfig.newBuilder().fromProperties(props).build());
+      return config;
+    }
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieAppendException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;

 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta commit
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a delta
+ * commit </p>
 */
 public class HoodieAppendException extends HoodieException {
-    public HoodieAppendException(String msg, Throwable e) {
-        super(msg, e);
-    }

-    public HoodieAppendException(String msg) {
-        super(msg);
-    }
+  public HoodieAppendException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieAppendException(String msg) {
+    super(msg);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCommitException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;

 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a Commit
 * </p>
 */
 public class HoodieCommitException extends HoodieException {
-    public HoodieCommitException(String msg) {
-        super(msg);
-    }

-    public HoodieCommitException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieCommitException(String msg) {
+    super(msg);
+  }
+
+  public HoodieCommitException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieCompactionException.java
@@ -17,6 +17,7 @@
 package com.uber.hoodie.exception;

 public class HoodieCompactionException extends HoodieException {
+
  public HoodieCompactionException(String msg) {
    super(msg);
  }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieDependentSystemUnavailableException.java
@@ -18,18 +18,17 @@ package com.uber.hoodie.exception;


 /**
- * <p>
- * Exception thrown when dependent system is not available
- * </p>
+ * <p> Exception thrown when dependent system is not available </p>
 */
 public class HoodieDependentSystemUnavailableException extends HoodieException {
-    public static final String HBASE = "HBASE";

-    public HoodieDependentSystemUnavailableException(String system, String connectURL) {
-        super(getLogMessage(system, connectURL));
-    }
+  public static final String HBASE = "HBASE";

-    private static String getLogMessage(String system, String connectURL) {
-        return "System " + system + " unavailable. Tried to connect to " + connectURL;
-    }
+  public HoodieDependentSystemUnavailableException(String system, String connectURL) {
+    super(getLogMessage(system, connectURL));
+  }
+
+  private static String getLogMessage(String system, String connectURL) {
+    return "System " + system + " unavailable. Tried to connect to " + connectURL;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieInsertException.java
@@ -16,15 +16,13 @@

 package com.uber.hoodie.exception;

-import java.io.IOException;
-
 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk insert
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a bulk
+ * insert </p>
 */
 public class HoodieInsertException extends HoodieException {
-    public HoodieInsertException(String msg, Throwable e) {
-        super(msg, e);
-    }
+
+  public HoodieInsertException(String msg, Throwable e) {
+    super(msg, e);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieRollbackException.java
@@ -18,11 +18,11 @@ package com.uber.hoodie.exception;

 public class HoodieRollbackException extends HoodieException {

-    public HoodieRollbackException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieRollbackException(String msg, Throwable e) {
+    super(msg, e);
+  }

-    public HoodieRollbackException(String msg) {
-        super(msg);
-    }
+  public HoodieRollbackException(String msg) {
+    super(msg);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieSavepointException.java
@@ -18,11 +18,11 @@ package com.uber.hoodie.exception;

 public class HoodieSavepointException extends HoodieException {

-    public HoodieSavepointException(String msg, Throwable e) {
-        super(msg, e);
-    }
+  public HoodieSavepointException(String msg, Throwable e) {
+    super(msg, e);
+  }

-    public HoodieSavepointException(String msg) {
-        super(msg);
-    }
+  public HoodieSavepointException(String msg) {
+    super(msg);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/exception/HoodieUpsertException.java
@@ -17,16 +17,16 @@
 package com.uber.hoodie.exception;

 /**
- * <p>
- * Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a incremental upsert
- * </p>
+ * <p> Exception thrown for any higher level errors when <code>HoodieClient</code> is doing a
+ * incremental upsert </p>
 */
-public class HoodieUpsertException  extends HoodieException {
-    public HoodieUpsertException(String msg, Throwable e) {
-        super(msg, e);
-    }
+public class HoodieUpsertException extends HoodieException {

-    public HoodieUpsertException(String msg) {
-        super(msg);
-    }
+  public HoodieUpsertException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieUpsertException(String msg) {
+    super(msg);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/BulkInsertMapFunction.java
@@ -16,16 +16,14 @@

 package com.uber.hoodie.func;

-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.api.java.function.Function2;
-
 import java.util.Iterator;
 import java.util.List;
+import org.apache.spark.api.java.function.Function2;


 /**
@@ -34,20 +32,21 @@ import java.util.List;
 public class BulkInsertMapFunction<T extends HoodieRecordPayload>
    implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<List<WriteStatus>>> {

-    private String commitTime;
-    private HoodieWriteConfig config;
-    private HoodieTable<T> hoodieTable;
+  private String commitTime;
+  private HoodieWriteConfig config;
+  private HoodieTable<T> hoodieTable;

-    public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
-        HoodieTable<T> hoodieTable) {
-        this.commitTime = commitTime;
-        this.config = config;
-        this.hoodieTable = hoodieTable;
-    }
+  public BulkInsertMapFunction(String commitTime, HoodieWriteConfig config,
+      HoodieTable<T> hoodieTable) {
+    this.commitTime = commitTime;
+    this.config = config;
+    this.hoodieTable = hoodieTable;
+  }

-    @Override
-    public Iterator<List<WriteStatus>> call(Integer partition, Iterator<HoodieRecord<T>> sortedRecordItr)
-        throws Exception {
-        return new LazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable);
-    }
+  @Override
+  public Iterator<List<WriteStatus>> call(Integer partition,
+      Iterator<HoodieRecord<T>> sortedRecordItr)
+      throws Exception {
+    return new LazyInsertIterable<>(sortedRecordItr, config, commitTime, hoodieTable);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
@@ -16,99 +16,101 @@

 package com.uber.hoodie.func;

-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
-import com.uber.hoodie.io.HoodieIOHandle;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.HoodieCreateHandle;
+import com.uber.hoodie.io.HoodieIOHandle;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.TaskContext;
-
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
+import org.apache.spark.TaskContext;

 /**
- * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath,
- * into new files.
+ * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
+ * files.
 */
-public class LazyInsertIterable<T extends HoodieRecordPayload> extends LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {
+public class LazyInsertIterable<T extends HoodieRecordPayload> extends
+    LazyIterableIterator<HoodieRecord<T>, List<WriteStatus>> {

-    private final HoodieWriteConfig hoodieConfig;
-    private final String commitTime;
-    private final HoodieTable<T> hoodieTable;
-    private Set<String> partitionsCleaned;
-    private HoodieCreateHandle handle;
+  private final HoodieWriteConfig hoodieConfig;
+  private final String commitTime;
+  private final HoodieTable<T> hoodieTable;
+  private Set<String> partitionsCleaned;
+  private HoodieCreateHandle handle;

-    public LazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
-        String commitTime, HoodieTable<T> hoodieTable) {
-        super(sortedRecordItr);
-        this.partitionsCleaned = new HashSet<>();
-        this.hoodieConfig = config;
-        this.commitTime = commitTime;
-        this.hoodieTable = hoodieTable;
+  public LazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
+      String commitTime, HoodieTable<T> hoodieTable) {
+    super(sortedRecordItr);
+    this.partitionsCleaned = new HashSet<>();
+    this.hoodieConfig = config;
+    this.commitTime = commitTime;
+    this.hoodieTable = hoodieTable;
+  }
+
+  @Override
+  protected void start() {
+  }
+
+
+  @Override
+  protected List<WriteStatus> computeNext() {
+    List<WriteStatus> statuses = new ArrayList<>();
+
+    while (inputItr.hasNext()) {
+      HoodieRecord record = inputItr.next();
+
+      // clean up any partial failures
+      if (!partitionsCleaned.contains(record.getPartitionPath())) {
+        // This insert task could fail multiple times, but Spark will faithfully retry with
+        // the same data again. Thus, before we open any files under a given partition, we
+        // first delete any files in the same partitionPath written by same Spark partition
+        HoodieIOHandle.cleanupTmpFilesFromCurrentCommit(hoodieConfig,
+            commitTime,
+            record.getPartitionPath(),
+            TaskContext.getPartitionId());
+        partitionsCleaned.add(record.getPartitionPath());
+      }
+
+      // lazily initialize the handle, for the first time
+      if (handle == null) {
+        handle =
+            new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
+                record.getPartitionPath());
+      }
+
+      if (handle.canWrite(record)) {
+        // write the record, if the handle has capacity
+        handle.write(record);
+      } else {
+        // handle is full.
+        statuses.add(handle.close());
+        // Need to handle the rejected record & open new handle
+        handle =
+            new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
+                record.getPartitionPath());
+        handle.write(record); // we should be able to write 1 record.
+        break;
+      }
    }

-    @Override protected void start() {
+    // If we exited out, because we ran out of records, just close the pending handle.
+    if (!inputItr.hasNext()) {
+      if (handle != null) {
+        statuses.add(handle.close());
+      }
    }

+    assert statuses.size() > 0; // should never return empty statuses
+    return statuses;
+  }

-    @Override protected List<WriteStatus> computeNext()  {
-        List<WriteStatus> statuses = new ArrayList<>();
+  @Override
+  protected void end() {

-        while (inputItr.hasNext()) {
-            HoodieRecord record = inputItr.next();
-
-            // clean up any partial failures
-            if (!partitionsCleaned.contains(record.getPartitionPath())) {
-                // This insert task could fail multiple times, but Spark will faithfully retry with
-                // the same data again. Thus, before we open any files under a given partition, we
-                // first delete any files in the same partitionPath written by same Spark partition
-                HoodieIOHandle.cleanupTmpFilesFromCurrentCommit(hoodieConfig,
-                                                                commitTime,
-                                                                record.getPartitionPath(),
-                                                                TaskContext.getPartitionId());
-                partitionsCleaned.add(record.getPartitionPath());
-            }
-
-            // lazily initialize the handle, for the first time
-            if (handle == null) {
-                handle =
-                    new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
-                        record.getPartitionPath());
-            }
-
-            if (handle.canWrite(record)) {
-                // write the record, if the handle has capacity
-                handle.write(record);
-            } else {
-                // handle is full.
-                statuses.add(handle.close());
-                // Need to handle the rejected record & open new handle
-                handle =
-                    new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable,
-                        record.getPartitionPath());
-                handle.write(record); // we should be able to write 1 record.
-                break;
-            }
-        }
-
-        // If we exited out, because we ran out of records, just close the pending handle.
-        if (!inputItr.hasNext()) {
-            if (handle != null) {
-                statuses.add(handle.close());
-            }
-        }
-
-        assert statuses.size() > 0; // should never return empty statuses
-        return statuses;
-    }
-
-    @Override protected void end() {
-
-    }
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyIterableIterator.java
@@ -31,98 +31,99 @@ import java.util.Iterator;
 * responsible for calling inputIterator.next() and doing the processing in computeNext()
 */
 public abstract class LazyIterableIterator<I, O> implements Iterable<O>, Iterator<O> {
-    protected Iterator<I> inputItr = null;
-    private boolean consumed = false;
-    private boolean startCalled = false;
-    private boolean endCalled = false;

-    public LazyIterableIterator(Iterator<I> in) {
-        inputItr = in;
+  protected Iterator<I> inputItr = null;
+  private boolean consumed = false;
+  private boolean startCalled = false;
+  private boolean endCalled = false;
+
+  public LazyIterableIterator(Iterator<I> in) {
+    inputItr = in;
+  }
+
+  /**
+   * Called once, before any elements are processed
+   */
+  protected abstract void start();
+
+  /**
+   * Block computation to be overwritten by sub classes.
+   */
+  protected abstract O computeNext();
+
+
+  /**
+   * Called once, after all elements are processed.
+   */
+  protected abstract void end();
+
+  //////////////////
+  // iterable implementation
+
+  private void invokeStartIfNeeded() {
+    if (!startCalled) {
+      startCalled = true;
+      try {
+        start();
+      } catch (Exception e) {
+        throw new RuntimeException("Error in start()");
+      }
+    }
+  }
+
+  private void invokeEndIfNeeded() {
+    // make the calls out to begin() & end()
+    if (!endCalled) {
+      endCalled = true;
+      // if we are out of elements, and end has not been called yet
+      try {
+        end();
+      } catch (Exception e) {
+        throw new RuntimeException("Error in end()");
+      }
+    }
+  }
+
+  @Override
+  public Iterator<O> iterator() {
+    //check for consumed inputItr
+    if (consumed) {
+      throw new RuntimeException("Invalid repeated inputItr consumption.");
    }

-    /**
-     * Called once, before any elements are processed
-     */
-    protected abstract void start();
+    //hand out self as inputItr exactly once (note: do not hand out the input
+    //inputItr since it is consumed by the self inputItr implementation)
+    consumed = true;
+    return this;
+  }

-    /**
-     * Block computation to be overwritten by sub classes.
-     */
-    protected abstract O computeNext();
+  //////////////////
+  // inputItr implementation

-
-    /**
-     * Called once, after all elements are processed.
-     */
-    protected abstract void end();
-
-
-    //////////////////
-    // iterable implementation
-
-    private void invokeStartIfNeeded() {
-        if (!startCalled) {
-            startCalled = true;
-            try {
-                start();
-            } catch (Exception e) {
-                throw new RuntimeException("Error in start()");
-            }
-        }
+  @Override
+  public boolean hasNext() {
+    boolean ret = inputItr.hasNext();
+    // make sure, there is exactly one call to start()
+    invokeStartIfNeeded();
+    if (!ret) {
+      // if we are out of elements, and end has not been called yet
+      invokeEndIfNeeded();
    }

-    private void invokeEndIfNeeded() {
-        // make the calls out to begin() & end()
-        if (!endCalled) {
-            endCalled = true;
-            // if we are out of elements, and end has not been called yet
-            try {
-                end();
-            } catch (Exception e) {
-                throw new RuntimeException("Error in end()");
-            }
-        }
+    return ret;
+  }
+
+  @Override
+  public O next() {
+    try {
+      return computeNext();
+    } catch (Exception ex) {
+      throw new RuntimeException(ex);
    }
+  }

-    @Override
-    public Iterator<O> iterator() {
-        //check for consumed inputItr
-        if (consumed)
-            throw new RuntimeException("Invalid repeated inputItr consumption.");
-
-        //hand out self as inputItr exactly once (note: do not hand out the input
-        //inputItr since it is consumed by the self inputItr implementation)
-        consumed = true;
-        return this;
-    }
-
-    //////////////////
-    // inputItr implementation
-
-    @Override
-    public boolean hasNext() {
-        boolean ret = inputItr.hasNext();
-        // make sure, there is exactly one call to start()
-        invokeStartIfNeeded();
-        if (!ret) {
-            // if we are out of elements, and end has not been called yet
-            invokeEndIfNeeded();
-        }
-
-        return ret;
-    }
-
-    @Override
-    public O next() {
-        try {
-            return computeNext();
-        } catch (Exception ex) {
-            throw new RuntimeException(ex);
-        }
-    }
-
-    @Override
-    public void remove() {
-        throw new RuntimeException("Unsupported remove operation.");
-    }
+  @Override
+  public void remove() {
+    throw new RuntimeException("Unsupported remove operation.");
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java
@@ -17,118 +17,108 @@
 package com.uber.hoodie.index;

 import com.google.common.base.Optional;
-
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieRecord;
-
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.bloom.HoodieBloomIndex;
 import com.uber.hoodie.index.bucketed.BucketedIndex;
 import com.uber.hoodie.index.hbase.HBaseIndex;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.Serializable;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;

-import java.io.Serializable;
-
 /**
 * Base class for different types of indexes to determine the mapping from uuid
- *
 */
 public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Serializable {
-    protected transient JavaSparkContext jsc = null;

-    public enum IndexType {
-        HBASE,
-        INMEMORY,
-        BLOOM,
-        BUCKETED
-    }
-
-    protected final HoodieWriteConfig config;
-
-    protected HoodieIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        this.config = config;
-        this.jsc = jsc;
-    }
-
-    /**
-     * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
-     * If the optional FullFilePath value is not present, then the key is not found. If the FullFilePath
-     * value is present, it is the path component (without scheme) of the URI underlying file
-     *
-     * @param hoodieKeys
-     * @param table
-     * @return
-     */
-    public abstract JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-        JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table);
-
-    /**
-     * Looks up the index and tags each incoming record with a location of a file that contains the
-     * row (if it is actually present)
-     */
-    public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
-        HoodieTable<T> hoodieTable) throws HoodieIndexException;
-
-    /**
-     * Extracts the location of written records, and updates the index.
-     *
-     * TODO(vc): We may need to propagate the record as well in a WriteStatus class
-     */
-    public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-        HoodieTable<T> hoodieTable) throws HoodieIndexException;
-
-    /**
-     * Rollback the efffects of the commit made at commitTime.
-     */
-    public abstract boolean rollbackCommit(String commitTime);
-
-    /**
-     * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
-     * Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
-     * but different `partitionPath`
-     *
-     * @return whether or not, the index implementation is global in nature
-     */
-    public abstract boolean isGlobal();
-
-    /**
-     * This is used by storage to determine, if its safe to send inserts, straight to the log,
-     * i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
-     *
-     * @return Returns true/false depending on whether the impl has this capability
-     */
-    public abstract boolean canIndexLogFiles();
-
-
-    /**
-     *
-     * An index is "implicit" with respect to storage, if just writing new data to a file slice,
-     * updates the index as well. This is used by storage, to save memory footprint in
-     * certain cases.
-     *
-     * @return
-     */
-    public abstract boolean isImplicitWithStorage();
-
-
-    public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
-            HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
-        switch (config.getIndexType()) {
-            case HBASE:
-                return new HBaseIndex<>(config, jsc);
-            case INMEMORY:
-                return new InMemoryHashIndex<>(config, jsc);
-            case BLOOM:
-                return new HoodieBloomIndex<>(config, jsc);
-            case BUCKETED:
-                return new BucketedIndex<>(config, jsc);
-        }
-        throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType());
+  protected transient JavaSparkContext jsc = null;
+
+  public enum IndexType {
+    HBASE,
+    INMEMORY,
+    BLOOM,
+    BUCKETED
+  }
+
+  protected final HoodieWriteConfig config;
+
+  protected HoodieIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    this.config = config;
+    this.jsc = jsc;
+  }
+
+  /**
+   * Checks if the given [Keys] exists in the hoodie table and returns [Key, Optional[FullFilePath]]
+   * If the optional FullFilePath value is not present, then the key is not found. If the
+   * FullFilePath value is present, it is the path component (without scheme) of the URI underlying
+   * file
+   */
+  public abstract JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table);
+
+  /**
+   * Looks up the index and tags each incoming record with a location of a file that contains the
+   * row (if it is actually present)
+   */
+  public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
+
+  /**
+   * Extracts the location of written records, and updates the index.
+   *
+   * TODO(vc): We may need to propagate the record as well in a WriteStatus class
+   */
+  public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException;
+
+  /**
+   * Rollback the efffects of the commit made at commitTime.
+   */
+  public abstract boolean rollbackCommit(String commitTime);
+
+  /**
+   * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the
+   * `partitionPath`. Such an implementation is able to obtain the same mapping, for two hoodie keys
+   * with same `recordKey` but different `partitionPath`
+   *
+   * @return whether or not, the index implementation is global in nature
+   */
+  public abstract boolean isGlobal();
+
+  /**
+   * This is used by storage to determine, if its safe to send inserts, straight to the log, i.e
+   * having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
+   *
+   * @return Returns true/false depending on whether the impl has this capability
+   */
+  public abstract boolean canIndexLogFiles();
+
+
+  /**
+   * An index is "implicit" with respect to storage, if just writing new data to a file slice,
+   * updates the index as well. This is used by storage, to save memory footprint in certain cases.
+   */
+  public abstract boolean isImplicitWithStorage();
+
+
+  public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
+      HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
+    switch (config.getIndexType()) {
+      case HBASE:
+        return new HBaseIndex<>(config, jsc);
+      case INMEMORY:
+        return new InMemoryHashIndex<>(config, jsc);
+      case BLOOM:
+        return new HoodieBloomIndex<>(config, jsc);
+      case BUCKETED:
+        return new BucketedIndex<>(config, jsc);
    }
+    throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType());
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java
@@ -17,129 +17,119 @@
 package com.uber.hoodie.index;

 import com.google.common.base.Optional;
-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.Function2;

-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentMap;
-

 /**
- * Hoodie Index implementation backed by an in-memory Hash map.
- * <p>
- * ONLY USE FOR LOCAL TESTING
+ * Hoodie Index implementation backed by an in-memory Hash map. <p> ONLY USE FOR LOCAL TESTING
 */
 public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {

-    private static ConcurrentMap<HoodieKey, HoodieRecordLocation> recordLocationMap;
+  private static ConcurrentMap<HoodieKey, HoodieRecordLocation> recordLocationMap;

-    public InMemoryHashIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-        recordLocationMap = new ConcurrentHashMap<>();
-    }
+  public InMemoryHashIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+    recordLocationMap = new ConcurrentHashMap<>();
+  }
+
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
+    throw new UnsupportedOperationException("InMemory index does not implement check exist yet");
+  }
+
+  /**
+   * Function that tags each HoodieRecord with an existing location, if known.
+   */
+  class LocationTagFunction
+      implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {

    @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-            JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
-        throw new UnsupportedOperationException("InMemory index does not implement check exist yet");
-    }
-
-    /**
-     * Function that tags each HoodieRecord with an existing location, if known.
-     */
-    class LocationTagFunction
-            implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
-        @Override
-        public Iterator<HoodieRecord<T>> call(Integer partitionNum,
-                                              Iterator<HoodieRecord<T>> hoodieRecordIterator) {
-            List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
-            while (hoodieRecordIterator.hasNext()) {
-                HoodieRecord<T> rec = hoodieRecordIterator.next();
-                if (recordLocationMap.containsKey(rec.getKey())) {
-                    rec.setCurrentLocation(recordLocationMap.get(rec.getKey()));
-                }
-                taggedRecords.add(rec);
-            }
-            return taggedRecords.iterator();
+    public Iterator<HoodieRecord<T>> call(Integer partitionNum,
+        Iterator<HoodieRecord<T>> hoodieRecordIterator) {
+      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      while (hoodieRecordIterator.hasNext()) {
+        HoodieRecord<T> rec = hoodieRecordIterator.next();
+        if (recordLocationMap.containsKey(rec.getKey())) {
+          rec.setCurrentLocation(recordLocationMap.get(rec.getKey()));
        }
+        taggedRecords.add(rec);
+      }
+      return taggedRecords.iterator();
    }
+  }

-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
-                                                HoodieTable<T> hoodieTable) {
-        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(), true);
-    }
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) {
+    return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(), true);
+  }

-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-                                               HoodieTable<T> hoodieTable) {
-        return writeStatusRDD.map(new Function<WriteStatus, WriteStatus>() {
-            @Override
-            public WriteStatus call(WriteStatus writeStatus) {
-                for (HoodieRecord record : writeStatus.getWrittenRecords()) {
-                    if (!writeStatus.isErrored(record.getKey())) {
-                        HoodieKey key = record.getKey();
-                        java.util.Optional<HoodieRecordLocation> newLocation = record.getNewLocation();
-                        if (newLocation.isPresent()) {
-                            recordLocationMap.put(key, newLocation.get());
-                        } else {
-                            //Delete existing index for a deleted record
-                            recordLocationMap.remove(key);
-                        }
-                    }
-                }
-                return writeStatus;
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD.map(new Function<WriteStatus, WriteStatus>() {
+      @Override
+      public WriteStatus call(WriteStatus writeStatus) {
+        for (HoodieRecord record : writeStatus.getWrittenRecords()) {
+          if (!writeStatus.isErrored(record.getKey())) {
+            HoodieKey key = record.getKey();
+            java.util.Optional<HoodieRecordLocation> newLocation = record.getNewLocation();
+            if (newLocation.isPresent()) {
+              recordLocationMap.put(key, newLocation.get());
+            } else {
+              //Delete existing index for a deleted record
+              recordLocationMap.remove(key);
            }
-        });
-    }
+          }
+        }
+        return writeStatus;
+      }
+    });
+  }

-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    return true;
+  }

-    /**
-     * Only looks up by recordKey
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return true;
-    }
+  /**
+   * Only looks up by recordKey
+   */
+  @Override
+  public boolean isGlobal() {
+    return true;
+  }

-    /**
-     * Mapping is available in HBase already.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Mapping is available in HBase already.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }

-    /**
-     * Index needs to be explicitly updated after storage write.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return false;
-    }
+  /**
+   * Index needs to be explicitly updated after storage write.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return false;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/BloomIndexFileInfo.java
@@ -19,7 +19,6 @@
 package com.uber.hoodie.index.bloom;

 import com.google.common.base.Objects;
-
 import java.io.Serializable;

 /**
@@ -27,73 +26,75 @@ import java.io.Serializable;
 */
 public class BloomIndexFileInfo implements Serializable {

-    private final String fileName;
+  private final String fileName;

-    private final String minRecordKey;
+  private final String minRecordKey;

-    private final String maxRecordKey;
+  private final String maxRecordKey;

-    public BloomIndexFileInfo(String fileName, String minRecordKey, String maxRecordKey) {
-        this.fileName = fileName;
-        this.minRecordKey = minRecordKey;
-        this.maxRecordKey = maxRecordKey;
+  public BloomIndexFileInfo(String fileName, String minRecordKey, String maxRecordKey) {
+    this.fileName = fileName;
+    this.minRecordKey = minRecordKey;
+    this.maxRecordKey = maxRecordKey;
+  }
+
+  public BloomIndexFileInfo(String fileName) {
+    this.fileName = fileName;
+    this.minRecordKey = null;
+    this.maxRecordKey = null;
+  }
+
+  public String getFileName() {
+    return fileName;
+  }
+
+  public String getMinRecordKey() {
+    return minRecordKey;
+  }
+
+  public String getMaxRecordKey() {
+    return maxRecordKey;
+  }
+
+  public boolean hasKeyRanges() {
+    return minRecordKey != null && maxRecordKey != null;
+  }
+
+  /**
+   * Does the given key fall within the range (inclusive)
+   */
+  public boolean isKeyInRange(String recordKey) {
+    return minRecordKey.compareTo(recordKey) <= 0 &&
+        maxRecordKey.compareTo(recordKey) >= 0;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
    }

-    public BloomIndexFileInfo(String fileName) {
-        this.fileName = fileName;
-        this.minRecordKey = null;
-        this.maxRecordKey = null;
-    }
+    BloomIndexFileInfo that = (BloomIndexFileInfo) o;
+    return Objects.equal(that.fileName, fileName) &&
+        Objects.equal(that.minRecordKey, minRecordKey) &&
+        Objects.equal(that.maxRecordKey, maxRecordKey);

-    public String getFileName() {
-        return fileName;
-    }
+  }

-    public String getMinRecordKey() {
-        return minRecordKey;
-    }
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(fileName, minRecordKey, maxRecordKey);
+  }

-    public String getMaxRecordKey() {
-        return maxRecordKey;
-    }
-
-    public boolean hasKeyRanges()  {
-        return minRecordKey != null && maxRecordKey != null;
-    }
-
-    /**
-     * Does the given key fall within the range (inclusive)
-     * @param recordKey
-     * @return
-     */
-    public boolean isKeyInRange(String recordKey) {
-        return minRecordKey.compareTo(recordKey) <= 0 &&
-                maxRecordKey.compareTo(recordKey) >= 0;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        BloomIndexFileInfo that = (BloomIndexFileInfo) o;
-        return Objects.equal(that.fileName, fileName) &&
-                Objects.equal(that.minRecordKey, minRecordKey) &&
-                Objects.equal(that.maxRecordKey, maxRecordKey);
-
-    }
-
-    @Override
-    public int hashCode() {
-       return Objects.hashCode(fileName, minRecordKey, maxRecordKey);
-    }
-
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("BloomIndexFileInfo {");
-        sb.append(" fileName=").append(fileName);
-        sb.append(" minRecordKey=").append(minRecordKey);
-        sb.append(" maxRecordKey=").append(maxRecordKey);
-        sb.append('}');
-        return sb.toString();
-    }
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("BloomIndexFileInfo {");
+    sb.append(" fileName=").append(fileName);
+    sb.append(" minRecordKey=").append(minRecordKey);
+    sb.append(" maxRecordKey=").append(maxRecordKey);
+    sb.append('}');
+    return sb.toString();
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java
@@ -18,9 +18,12 @@

 package com.uber.hoodie.index.bloom;

+import static java.util.stream.Collectors.groupingBy;
+import static java.util.stream.Collectors.mapping;
+import static java.util.stream.Collectors.toList;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Optional;
-
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieKey;
@@ -34,7 +37,10 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.MetadataNotFoundException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
-
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -42,369 +48,370 @@ import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.storage.StorageLevel;
-
 import scala.Tuple2;

-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-import static java.util.stream.Collectors.*;
-
 /**
 * Indexing mechanism based on bloom filter. Each parquet file includes its row_key bloom filter in
 * its metadata.
 */
 public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {

-    private static Logger logger = LogManager.getLogger(HoodieBloomIndex.class);
+  private static Logger logger = LogManager.getLogger(HoodieBloomIndex.class);

-    // we need to limit the join such that it stays within 1.5GB per Spark partition. (SPARK-1476)
-    private static final int SPARK_MAXIMUM_BYTES_PER_PARTITION = 1500 * 1024 * 1024;
-    // this is how much a triplet of (partitionPath, fileId, recordKey) costs.
-    private static final int BYTES_PER_PARTITION_FILE_KEY_TRIPLET = 300;
-    private static int MAX_ITEMS_PER_SHUFFLE_PARTITION = SPARK_MAXIMUM_BYTES_PER_PARTITION / BYTES_PER_PARTITION_FILE_KEY_TRIPLET;
+  // we need to limit the join such that it stays within 1.5GB per Spark partition. (SPARK-1476)
+  private static final int SPARK_MAXIMUM_BYTES_PER_PARTITION = 1500 * 1024 * 1024;
+  // this is how much a triplet of (partitionPath, fileId, recordKey) costs.
+  private static final int BYTES_PER_PARTITION_FILE_KEY_TRIPLET = 300;
+  private static int MAX_ITEMS_PER_SHUFFLE_PARTITION =
+      SPARK_MAXIMUM_BYTES_PER_PARTITION / BYTES_PER_PARTITION_FILE_KEY_TRIPLET;

-    public HoodieBloomIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
+  public HoodieBloomIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+  }
+
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      final HoodieTable<T> hoodieTable) {
+
+    // Step 0: cache the input record RDD
+    if (config.getBloomIndexUseCaching()) {
+      recordRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
    }

-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, final HoodieTable<T> hoodieTable) {
+    // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey)
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD
+        .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));

-        // Step 0: cache the input record RDD
-        if (config.getBloomIndexUseCaching()) {
-            recordRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
-        }
+    // Lookup indexes for all the partition/recordkey pair
+    JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD,
+        hoodieTable);

-        // Step 1: Extract out thinner JavaPairRDD of (partitionPath, recordKey)
-        JavaPairRDD<String, String> partitionRecordKeyPairRDD = recordRDD
-                .mapToPair(record -> new Tuple2<>(record.getPartitionPath(), record.getRecordKey()));
-
-        // Lookup indexes for all the partition/recordkey pair
-        JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, hoodieTable);
-
-        // Cache the result, for subsequent stages.
-        if (config.getBloomIndexUseCaching()) {
-            rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
-        }
-        if (logger.isDebugEnabled()) {
-            long totalTaggedRecords = rowKeyFilenamePairRDD.count();
-            logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords);
-        }
-
-        // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys
-        // Cost: 4 sec.
-        JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD, recordRDD);
-
-        if (config.getBloomIndexUseCaching()) {
-            recordRDD.unpersist(); // unpersist the input Record RDD
-            rowKeyFilenamePairRDD.unpersist();
-        }
-
-        return taggedRecordRDD;
+    // Cache the result, for subsequent stages.
+    if (config.getBloomIndexUseCaching()) {
+      rowKeyFilenamePairRDD.persist(StorageLevel.MEMORY_AND_DISK_SER());
+    }
+    if (logger.isDebugEnabled()) {
+      long totalTaggedRecords = rowKeyFilenamePairRDD.count();
+      logger.debug("Number of update records (ones tagged with a fileID): " + totalTaggedRecords);
    }

-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-            JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
-        JavaPairRDD<String, String> partitionRecordKeyPairRDD =
-                hoodieKeys.mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));
+    // Step 4: Tag the incoming records, as inserts or updates, by joining with existing record keys
+    // Cost: 4 sec.
+    JavaRDD<HoodieRecord<T>> taggedRecordRDD = tagLocationBacktoRecords(rowKeyFilenamePairRDD,
+        recordRDD);

-        // Lookup indexes for all the partition/recordkey pair
-        JavaPairRDD<String, String> rowKeyFilenamePairRDD =
-                lookupIndex(partitionRecordKeyPairRDD, table);
-
-        JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD =
-                hoodieKeys.mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
-
-        return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD)
-                .mapToPair(keyPathTuple -> {
-                    Optional<String> recordLocationPath;
-                    if (keyPathTuple._2._2.isPresent()) {
-                        String fileName = keyPathTuple._2._2.get();
-                        String partitionPath = keyPathTuple._2._1.getPartitionPath();
-                        recordLocationPath = Optional.of(new Path(
-                                new Path(table.getMetaClient().getBasePath(), partitionPath),
-                                fileName).toUri().getPath());
-                    } else {
-                        recordLocationPath = Optional.absent();
-                    }
-                    return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
-                });
+    if (config.getBloomIndexUseCaching()) {
+      recordRDD.unpersist(); // unpersist the input Record RDD
+      rowKeyFilenamePairRDD.unpersist();
    }

-    /**
-     * Lookup the location for each record key and return the pair<record_key,location> for all
-     * record keys already present and drop the record keys if not present
-     */
-    private JavaPairRDD<String, String> lookupIndex(
-            JavaPairRDD<String, String> partitionRecordKeyPairRDD, final HoodieTable<T> hoodieTable) {
-        // Obtain records per partition, in the incoming records
-        Map<String, Long> recordsPerPartition = partitionRecordKeyPairRDD.countByKey();
-        List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
+    return taggedRecordRDD;
+  }

-        // Step 2: Load all involved files as <Partition, filename> pairs
-        List<Tuple2<String, BloomIndexFileInfo>> fileInfoList = loadInvolvedFiles(affectedPartitionPathList, hoodieTable);
-        final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo = fileInfoList.stream()
-                .collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, final HoodieTable<T> table) {
+    JavaPairRDD<String, String> partitionRecordKeyPairRDD =
+        hoodieKeys.mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey()));

-        // Step 3: Obtain a RDD, for each incoming record, that already exists, with the file id, that contains it.
-        int parallelism = autoComputeParallelism(recordsPerPartition, partitionToFileInfo, partitionRecordKeyPairRDD);
-        return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD, parallelism);
+    // Lookup indexes for all the partition/recordkey pair
+    JavaPairRDD<String, String> rowKeyFilenamePairRDD =
+        lookupIndex(partitionRecordKeyPairRDD, table);
+
+    JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD =
+        hoodieKeys.mapToPair(key -> new Tuple2<>(key.getRecordKey(), key));
+
+    return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD)
+        .mapToPair(keyPathTuple -> {
+          Optional<String> recordLocationPath;
+          if (keyPathTuple._2._2.isPresent()) {
+            String fileName = keyPathTuple._2._2.get();
+            String partitionPath = keyPathTuple._2._1.getPartitionPath();
+            recordLocationPath = Optional.of(new Path(
+                new Path(table.getMetaClient().getBasePath(), partitionPath),
+                fileName).toUri().getPath());
+          } else {
+            recordLocationPath = Optional.absent();
+          }
+          return new Tuple2<>(keyPathTuple._2._1, recordLocationPath);
+        });
+  }
+
+  /**
+   * Lookup the location for each record key and return the pair<record_key,location> for all record
+   * keys already present and drop the record keys if not present
+   */
+  private JavaPairRDD<String, String> lookupIndex(
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD, final HoodieTable<T> hoodieTable) {
+    // Obtain records per partition, in the incoming records
+    Map<String, Long> recordsPerPartition = partitionRecordKeyPairRDD.countByKey();
+    List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
+
+    // Step 2: Load all involved files as <Partition, filename> pairs
+    List<Tuple2<String, BloomIndexFileInfo>> fileInfoList = loadInvolvedFiles(
+        affectedPartitionPathList, hoodieTable);
+    final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo = fileInfoList.stream()
+        .collect(groupingBy(Tuple2::_1, mapping(Tuple2::_2, toList())));
+
+    // Step 3: Obtain a RDD, for each incoming record, that already exists, with the file id, that contains it.
+    int parallelism = autoComputeParallelism(recordsPerPartition, partitionToFileInfo,
+        partitionRecordKeyPairRDD);
+    return findMatchingFilesForRecordKeys(partitionToFileInfo, partitionRecordKeyPairRDD,
+        parallelism);
+  }
+
+  /**
+   * The index lookup can be skewed in three dimensions : #files, #partitions, #records
+   *
+   * To be able to smoothly handle skews, we need to compute how to split each partitions into
+   * subpartitions. We do it here, in a way that keeps the amount of each Spark join partition to <
+   * 2GB.
+   *
+   * If {@link com.uber.hoodie.config.HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is specified
+   * as a NON-zero number, then that is used explicitly.
+   */
+  private int autoComputeParallelism(final Map<String, Long> recordsPerPartition,
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+
+    long totalComparisons = 0;
+    if (config.getBloomIndexPruneByRanges()) {
+      // we will just try exploding the input and then count to determine comparisons
+      totalComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo,
+          partitionRecordKeyPairRDD).count();
+    } else {
+      // if not pruning by ranges, then each file in a partition needs to compared against all
+      // records for a partition.
+      Map<String, Long> filesPerPartition = partitionToFileInfo.entrySet().stream()
+          .collect(Collectors.toMap(Map.Entry::getKey, e -> Long.valueOf(e.getValue().size())));
+      long totalFiles = 0, totalRecords = 0;
+      for (String partitionPath : recordsPerPartition.keySet()) {
+        long numRecords = recordsPerPartition.get(partitionPath);
+        long numFiles =
+            filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath)
+                : 1L;
+
+        totalComparisons += numFiles * numRecords;
+        totalFiles +=
+            filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath)
+                : 0L;
+        totalRecords += numRecords;
+      }
+      logger.info("TotalRecords: " + totalRecords + ", TotalFiles: " + totalFiles
+          + ", TotalAffectedPartitions:" + recordsPerPartition.size());
    }

-    /**
-     * The index lookup can be skewed in three dimensions : #files, #partitions, #records
-     *
-     * To be able to smoothly handle skews, we need to compute how to split each partitions into
-     * subpartitions. We do it here, in a way that keeps the amount of each Spark join partition to
-     * < 2GB.
-     *
-     * If {@link com.uber.hoodie.config.HoodieIndexConfig#BLOOM_INDEX_PARALLELISM_PROP} is specified as a NON-zero number,
-     * then that is used explicitly.
-     *
-     */
-    private int autoComputeParallelism(final Map<String, Long> recordsPerPartition,
-                                       final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
-                                       JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+    // each partition will have an item per comparison.
+    int parallelism = (int) (totalComparisons / MAX_ITEMS_PER_SHUFFLE_PARTITION + 1);
+    logger.info(
+        "Auto computed parallelism :" + parallelism + ", totalComparisons: " + totalComparisons);
+    return parallelism;
+  }

-        long totalComparisons = 0;
-        if (config.getBloomIndexPruneByRanges()) {
-            // we will just try exploding the input and then count to determine comparisons
-            totalComparisons = explodeRecordRDDWithFileComparisons(partitionToFileInfo, partitionRecordKeyPairRDD).count();
-        } else {
-            // if not pruning by ranges, then each file in a partition needs to compared against all
-            // records for a partition.
-            Map<String, Long> filesPerPartition = partitionToFileInfo.entrySet().stream()
-                    .collect(Collectors.toMap(Map.Entry::getKey, e -> Long.valueOf(e.getValue().size())));
-            long totalFiles = 0, totalRecords = 0;
-            for (String partitionPath : recordsPerPartition.keySet()) {
-                long numRecords = recordsPerPartition.get(partitionPath);
-                long numFiles = filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath) : 1L;
+  /**
+   * Its crucial to pick the right parallelism.
+   *
+   * totalSubPartitions : this is deemed safe limit, to be nice with Spark. inputParallelism :
+   * typically number of input file splits
+   *
+   * We pick the max such that, we are always safe, but go higher if say a there are a lot of input
+   * files. (otherwise, we will fallback to number of partitions in input and end up with slow
+   * performance)
+   */
+  private int determineParallelism(int inputParallelism, int totalSubPartitions) {
+    // If bloom index parallelism is set, use it to to check against the input parallelism and take the max
+    int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
+    int joinParallelism = Math.max(totalSubPartitions, indexParallelism);
+    logger.info("InputParallelism: ${" + inputParallelism + "}, " +
+        "IndexParallelism: ${" + config.getBloomIndexParallelism() + "}, " +
+        "TotalSubParts: ${" + totalSubPartitions + "}, " +
+        "Join Parallelism set to : " + joinParallelism);
+    return joinParallelism;
+  }

-                totalComparisons += numFiles * numRecords;
-                totalFiles += filesPerPartition.containsKey(partitionPath) ? filesPerPartition.get(partitionPath) : 0L;
-                totalRecords += numRecords;
-            }
-            logger.info("TotalRecords: " + totalRecords + ", TotalFiles: " + totalFiles + ", TotalAffectedPartitions:" + recordsPerPartition.size());
-        }
-
-        // each partition will have an item per comparison.
-        int parallelism = (int) (totalComparisons/ MAX_ITEMS_PER_SHUFFLE_PARTITION + 1);
-        logger.info("Auto computed parallelism :" + parallelism + ", totalComparisons: " + totalComparisons);
-        return parallelism;
-    }
-
-    /**
-     * Its crucial to pick the right parallelism.
-     *
-     * totalSubPartitions : this is deemed safe limit, to be nice with Spark.
-     * inputParallelism : typically number of input file splits
-     *
-     * We pick the max such that, we are always safe, but go higher if say a there are a lot of
-     * input files. (otherwise, we will fallback to number of partitions in input and end up with
-     * slow performance)
-     */
-    private int determineParallelism(int inputParallelism, int totalSubPartitions) {
-        // If bloom index parallelism is set, use it to to check against the input parallelism and take the max
-        int indexParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
-        int joinParallelism = Math.max(totalSubPartitions, indexParallelism);
-        logger.info("InputParallelism: ${" + inputParallelism + "}, " +
-                "IndexParallelism: ${" + config.getBloomIndexParallelism() + "}, " +
-                "TotalSubParts: ${" + totalSubPartitions + "}, " +
-                "Join Parallelism set to : " + joinParallelism);
-        return joinParallelism;
-    }
-
-    /**
-     * Load all involved files as <Partition, filename> pair RDD.
-     */
-    @VisibleForTesting
-    List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions, final HoodieTable<T> hoodieTable) {
-        // Obtain the latest data files from all the partitions.
-        List<Tuple2<String, HoodieDataFile>> dataFilesList = jsc.parallelize(partitions, Math.max(partitions.size(), 1))
-                .flatMapToPair(partitionPath -> {
-                    java.util.Optional<HoodieInstant> latestCommitTime =
-                            hoodieTable.getCommitTimeline().filterCompletedInstants().lastInstant();
-                    List<Tuple2<String, HoodieDataFile>> filteredFiles = new ArrayList<>();
-                    if (latestCommitTime.isPresent()) {
-                         filteredFiles =
-                                hoodieTable.getROFileSystemView().getLatestDataFilesBeforeOrOn(partitionPath,
-                                        latestCommitTime.get().getTimestamp())
-                                        .map(f -> new Tuple2<>(partitionPath, f))
-                                        .collect(toList());
-                    }
-                    return filteredFiles.iterator();
-                }).collect();
-
-        if (config.getBloomIndexPruneByRanges()) {
-            // also obtain file ranges, if range pruning is enabled
-            return jsc.parallelize(dataFilesList, Math.max(dataFilesList.size(), 1))
-                    .mapToPair(ft -> {
-                        try {
-                            String[] minMaxKeys = ParquetUtils.readMinMaxRecordKeys(ft._2().getFileStatus().getPath());
-                            return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName(), minMaxKeys[0], minMaxKeys[1]));
-                        } catch (MetadataNotFoundException me) {
-                            logger.warn("Unable to find range metadata in file :" + ft._2());
-                            return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName()));
-                        }
-                    }).collect();
-        } else {
-            return dataFilesList.stream()
-                    .map(ft -> new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName())))
+  /**
+   * Load all involved files as <Partition, filename> pair RDD.
+   */
+  @VisibleForTesting
+  List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions,
+      final HoodieTable<T> hoodieTable) {
+    // Obtain the latest data files from all the partitions.
+    List<Tuple2<String, HoodieDataFile>> dataFilesList = jsc
+        .parallelize(partitions, Math.max(partitions.size(), 1))
+        .flatMapToPair(partitionPath -> {
+          java.util.Optional<HoodieInstant> latestCommitTime =
+              hoodieTable.getCommitTimeline().filterCompletedInstants().lastInstant();
+          List<Tuple2<String, HoodieDataFile>> filteredFiles = new ArrayList<>();
+          if (latestCommitTime.isPresent()) {
+            filteredFiles =
+                hoodieTable.getROFileSystemView().getLatestDataFilesBeforeOrOn(partitionPath,
+                    latestCommitTime.get().getTimestamp())
+                    .map(f -> new Tuple2<>(partitionPath, f))
                    .collect(toList());
+          }
+          return filteredFiles.iterator();
+        }).collect();
+
+    if (config.getBloomIndexPruneByRanges()) {
+      // also obtain file ranges, if range pruning is enabled
+      return jsc.parallelize(dataFilesList, Math.max(dataFilesList.size(), 1))
+          .mapToPair(ft -> {
+            try {
+              String[] minMaxKeys = ParquetUtils
+                  .readMinMaxRecordKeys(ft._2().getFileStatus().getPath());
+              return new Tuple2<>(ft._1(),
+                  new BloomIndexFileInfo(ft._2().getFileName(), minMaxKeys[0], minMaxKeys[1]));
+            } catch (MetadataNotFoundException me) {
+              logger.warn("Unable to find range metadata in file :" + ft._2());
+              return new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName()));
+            }
+          }).collect();
+    } else {
+      return dataFilesList.stream()
+          .map(ft -> new Tuple2<>(ft._1(), new BloomIndexFileInfo(ft._2().getFileName())))
+          .collect(toList());
+    }
+  }
+
+
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // Nope, don't need to do anything.
+    return true;
+  }
+
+  /**
+   * This is not global, since we depend on the partitionPath to do the lookup
+   */
+  @Override
+  public boolean isGlobal() {
+    return false;
+  }
+
+  /**
+   * No indexes into log files yet.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return false;
+  }
+
+  /**
+   * Bloom filters are stored, into the same data files.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return true;
+  }
+
+  /**
+   * if we dont have key ranges, then also we need to compare against the file. no other choice if
+   * we do, then only compare the file if the record key falls in range.
+   */
+  private boolean shouldCompareWithFile(BloomIndexFileInfo indexInfo, String recordKey) {
+    return !indexInfo.hasKeyRanges() || indexInfo.isKeyInRange(recordKey);
+  }
+
+
+  /**
+   * For each incoming record, produce N output records, 1 each for each file against which the
+   * record's key needs to be checked. For datasets, where the keys have a definite insert order
+   * (e.g: timestamp as prefix), the number of files to be compared gets cut down a lot from range
+   * pruning.
+   */
+  // sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on recordKey
+  // ranges in the index info.
+  @VisibleForTesting
+  JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
+    return partitionRecordKeyPairRDD
+        .map(partitionRecordKeyPair -> {
+          String recordKey = partitionRecordKeyPair._2();
+          String partitionPath = partitionRecordKeyPair._1();
+
+          List<BloomIndexFileInfo> indexInfos = partitionToFileIndexInfo.get(partitionPath);
+          List<Tuple2<String, Tuple2<String, HoodieKey>>> recordComparisons = new ArrayList<>();
+          if (indexInfos
+              != null) { // could be null, if there are no files in a given partition yet.
+            // for each candidate file in partition, that needs to be compared.
+            for (BloomIndexFileInfo indexInfo : indexInfos) {
+              if (shouldCompareWithFile(indexInfo, recordKey)) {
+                recordComparisons.add(
+                    new Tuple2<>(String.format("%s#%s", indexInfo.getFileName(), recordKey),
+                        new Tuple2<>(indexInfo.getFileName(),
+                            new HoodieKey(recordKey, partitionPath))));
+              }
+            }
+          }
+          return recordComparisons;
+        })
+        .flatMapToPair(t -> t.iterator());
+  }
+
+  /**
+   * Find out <RowKey, filename> pair. All workload grouped by file-level.
+   *
+   * Join PairRDD(PartitionPath, RecordKey) and PairRDD(PartitionPath, File) & then repartition such
+   * that each RDD partition is a file, then for each file, we do (1) load bloom filter, (2) load
+   * rowKeys, (3) Tag rowKey
+   *
+   * Make sure the parallelism is atleast the groupby parallelism for tagging location
+   */
+  @VisibleForTesting
+  JavaPairRDD<String, String> findMatchingFilesForRecordKeys(
+      final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
+      JavaPairRDD<String, String> partitionRecordKeyPairRDD,
+      int totalSubpartitions) {
+
+    int joinParallelism = determineParallelism(partitionRecordKeyPairRDD.partitions().size(),
+        totalSubpartitions);
+
+    JavaPairRDD<String, Tuple2<String, HoodieKey>> fileSortedTripletRDD = explodeRecordRDDWithFileComparisons(
+        partitionToFileIndexInfo, partitionRecordKeyPairRDD)
+        // sort further based on filename, such that all checking for the file can happen within a single partition, on-the-fly
+        .sortByKey(true, joinParallelism);
+
+    return fileSortedTripletRDD
+        .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(config.getBasePath()), true)
+        .flatMap(indexLookupResults -> indexLookupResults.iterator())
+        .filter(lookupResult -> lookupResult.getMatchingRecordKeys().size() > 0)
+        .flatMapToPair(lookupResult -> {
+          List<Tuple2<String, String>> vals = new ArrayList<>();
+          for (String recordKey : lookupResult.getMatchingRecordKeys()) {
+            vals.add(new Tuple2<>(recordKey, lookupResult.getFileName()));
+          }
+          return vals.iterator();
+        });
+  }
+
+  /**
+   * Tag the <rowKey, filename> back to the original HoodieRecord RDD.
+   */
+  private JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(
+      JavaPairRDD<String, String> rowKeyFilenamePairRDD,
+      JavaRDD<HoodieRecord<T>> recordRDD) {
+    JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD
+        .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
+
+    // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null), so we do left outer join.
+    return rowKeyRecordPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).values().map(
+        v1 -> {
+          HoodieRecord<T> record = v1._1();
+          if (v1._2().isPresent()) {
+            String filename = v1._2().get();
+            if (filename != null && !filename.isEmpty()) {
+              record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
+                  FSUtils.getFileId(filename)));
+            }
+          }
+          return record;
        }
-    }
+    );
+  }

-
-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // Nope, don't need to do anything.
-        return true;
-    }
-
-    /**
-     * This is not global, since we depend on the partitionPath to do the lookup
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return false;
-    }
-
-    /**
-     * No indexes into log files yet.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return false;
-    }
-
-    /**
-     * Bloom filters are stored, into the same data files.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return true;
-    }
-
-    /**
-     * if we dont have key ranges, then also we need to compare against the file. no other choice
-     * if we do, then only compare the file if the record key falls in range.
-
-     * @param indexInfo
-     * @param recordKey
-     * @return
-     */
-    private boolean shouldCompareWithFile(BloomIndexFileInfo indexInfo, String recordKey) {
-        return !indexInfo.hasKeyRanges() || indexInfo.isKeyInRange(recordKey);
-    }
-
-
-    /**
-     * For each incoming record, produce N output records, 1 each for each file against which the record's key
-     * needs to be checked. For datasets, where the keys have a definite insert order (e.g: timestamp as prefix),
-     * the number of files to be compared gets cut down a lot from range pruning.
-     *
-     *
-     * @param partitionToFileIndexInfo
-     * @param partitionRecordKeyPairRDD
-     * @return
-     */
-    // sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on recordKey
-    // ranges in the index info.
-    @VisibleForTesting
-    JavaPairRDD<String, Tuple2<String, HoodieKey>> explodeRecordRDDWithFileComparisons(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
-                                                                                       JavaPairRDD<String, String> partitionRecordKeyPairRDD) {
-        return partitionRecordKeyPairRDD
-                .map(partitionRecordKeyPair -> {
-                    String recordKey = partitionRecordKeyPair._2();
-                    String partitionPath = partitionRecordKeyPair._1();
-
-                    List<BloomIndexFileInfo> indexInfos = partitionToFileIndexInfo.get(partitionPath);
-                    List<Tuple2<String, Tuple2<String, HoodieKey>>> recordComparisons = new ArrayList<>();
-                    if (indexInfos != null) { // could be null, if there are no files in a given partition yet.
-                        // for each candidate file in partition, that needs to be compared.
-                        for (BloomIndexFileInfo indexInfo : indexInfos) {
-                            if (shouldCompareWithFile(indexInfo, recordKey)) {
-                                recordComparisons.add(
-                                        new Tuple2<>(String.format("%s#%s", indexInfo.getFileName(), recordKey),
-                                                new Tuple2<>(indexInfo.getFileName(), new HoodieKey(recordKey, partitionPath))));
-                            }
-                        }
-                    }
-                    return recordComparisons;
-                })
-                .flatMapToPair(t -> t.iterator());
-    }
-
-    /**
-     * Find out <RowKey, filename> pair. All workload grouped by file-level.
-     *
-     * Join PairRDD(PartitionPath, RecordKey) and PairRDD(PartitionPath, File) & then repartition
-     * such that each RDD partition is a file, then for each file, we do
-     * (1) load bloom filter,
-     * (2) load rowKeys,
-     * (3) Tag rowKey
-     *
-     * Make sure the parallelism is atleast the groupby parallelism for tagging location
-     */
-    @VisibleForTesting
-    JavaPairRDD<String, String> findMatchingFilesForRecordKeys(final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
-                                                               JavaPairRDD<String, String> partitionRecordKeyPairRDD,
-                                                               int totalSubpartitions) {
-
-        int joinParallelism = determineParallelism(partitionRecordKeyPairRDD.partitions().size(), totalSubpartitions);
-
-        JavaPairRDD<String, Tuple2<String, HoodieKey>> fileSortedTripletRDD = explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD)
-                // sort further based on filename, such that all checking for the file can happen within a single partition, on-the-fly
-                .sortByKey(true, joinParallelism);
-
-        return fileSortedTripletRDD
-                .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(config.getBasePath()), true)
-                .flatMap(indexLookupResults -> indexLookupResults.iterator())
-                .filter(lookupResult -> lookupResult.getMatchingRecordKeys().size() > 0)
-                .flatMapToPair(lookupResult -> {
-                    List<Tuple2<String, String>> vals = new ArrayList<>();
-                    for (String recordKey : lookupResult.getMatchingRecordKeys()) {
-                        vals.add(new Tuple2<>(recordKey, lookupResult.getFileName()));
-                    }
-                    return vals.iterator();
-                });
-    }
-
-    /**
-     * Tag the <rowKey, filename> back to the original HoodieRecord RDD.
-     */
-    private JavaRDD<HoodieRecord<T>> tagLocationBacktoRecords(JavaPairRDD<String, String> rowKeyFilenamePairRDD,
-                                                              JavaRDD<HoodieRecord<T>> recordRDD) {
-        JavaPairRDD<String, HoodieRecord<T>> rowKeyRecordPairRDD = recordRDD
-                .mapToPair(record -> new Tuple2<>(record.getRecordKey(), record));
-
-        // Here as the recordRDD might have more data than rowKeyRDD (some rowKeys' fileId is null), so we do left outer join.
-        return rowKeyRecordPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).values().map(
-                v1 -> {
-                    HoodieRecord<T> record = v1._1();
-                    if (v1._2().isPresent()) {
-                        String filename = v1._2().get();
-                        if (filename != null && !filename.isEmpty()) {
-                            record.setCurrentLocation(new HoodieRecordLocation(FSUtils.getCommitTime(filename),
-                                    FSUtils.getFileId(filename)));
-                        }
-                    }
-                    return record;
-                }
-        );
-    }
-
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> hoodieTable) {
-        return writeStatusRDD;
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndexCheckFunction.java
@@ -24,172 +24,182 @@ import com.uber.hoodie.common.util.ParquetUtils;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.func.LazyIterableIterator;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.api.java.function.Function2;
-
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
-
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.function.Function2;
 import scala.Tuple2;

 /**
 * Function performing actual checking of RDD parition containing (fileId, hoodieKeys) against the
 * actual files
 */
-public class HoodieBloomIndexCheckFunction implements Function2<Integer, Iterator<Tuple2<String, Tuple2<String, HoodieKey>>>, Iterator<List<IndexLookupResult>>> {
+public class HoodieBloomIndexCheckFunction implements
+    Function2<Integer, Iterator<Tuple2<String, Tuple2<String, HoodieKey>>>, Iterator<List<IndexLookupResult>>> {

-    private static Logger logger = LogManager.getLogger(HoodieBloomIndexCheckFunction.class);
+  private static Logger logger = LogManager.getLogger(HoodieBloomIndexCheckFunction.class);

-    private final String basePath;
+  private final String basePath;

-    public HoodieBloomIndexCheckFunction(String basePath) {
-        this.basePath = basePath;
+  public HoodieBloomIndexCheckFunction(String basePath) {
+    this.basePath = basePath;
+  }
+
+  /**
+   * Given a list of row keys and one file, return only row keys existing in that file.
+   */
+  public static List<String> checkCandidatesAgainstFile(List<String> candidateRecordKeys,
+      Path filePath) throws HoodieIndexException {
+    List<String> foundRecordKeys = new ArrayList<>();
+    try {
+      // Load all rowKeys from the file, to double-confirm
+      if (!candidateRecordKeys.isEmpty()) {
+        Set<String> fileRowKeys = ParquetUtils.readRowKeysFromParquet(filePath);
+        logger.info("Loading " + fileRowKeys.size() + " row keys from " + filePath);
+        if (logger.isDebugEnabled()) {
+          logger.debug("Keys from " + filePath + " => " + fileRowKeys);
+        }
+        for (String rowKey : candidateRecordKeys) {
+          if (fileRowKeys.contains(rowKey)) {
+            foundRecordKeys.add(rowKey);
+          }
+        }
+        logger.info("After checking with row keys, we have " + foundRecordKeys.size()
+            + " results, for file " + filePath + " => " + foundRecordKeys);
+        if (logger.isDebugEnabled()) {
+          logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
+        }
+      }
+    } catch (Exception e) {
+      throw new HoodieIndexException("Error checking candidate keys against file.", e);
    }
+    return foundRecordKeys;
+  }

-    /**
-     * Given a list of row keys and one file, return only row keys existing in that file.
-     */
-    public static List<String> checkCandidatesAgainstFile(List<String> candidateRecordKeys, Path filePath) throws HoodieIndexException {
-        List<String> foundRecordKeys = new ArrayList<>();
-        try {
-            // Load all rowKeys from the file, to double-confirm
-            if (!candidateRecordKeys.isEmpty()) {
-                Set<String> fileRowKeys = ParquetUtils.readRowKeysFromParquet(filePath);
-                logger.info("Loading " + fileRowKeys.size() + " row keys from " + filePath);
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Keys from " + filePath + " => " + fileRowKeys);
-                }
-                for (String rowKey : candidateRecordKeys) {
-                    if (fileRowKeys.contains(rowKey)) {
-                        foundRecordKeys.add(rowKey);
-                    }
-                }
-                logger.info("After checking with row keys, we have " + foundRecordKeys.size() + " results, for file " + filePath + " => " + foundRecordKeys);
-                if (logger.isDebugEnabled()) {
-                    logger.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
-                }
-            }
-        } catch (Exception e){
-            throw new HoodieIndexException("Error checking candidate keys against file.", e);
-        }
-        return foundRecordKeys;
+  class LazyKeyCheckIterator extends
+      LazyIterableIterator<Tuple2<String, Tuple2<String, HoodieKey>>, List<IndexLookupResult>> {
+
+    private List<String> candidateRecordKeys;
+
+    private BloomFilter bloomFilter;
+
+    private String currentFile;
+
+    private String currentParitionPath;
+
+    LazyKeyCheckIterator(
+        Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) {
+      super(fileParitionRecordKeyTripletItr);
+      currentFile = null;
+      candidateRecordKeys = new ArrayList<>();
+      bloomFilter = null;
+      currentParitionPath = null;
    }

-    class LazyKeyCheckIterator extends LazyIterableIterator<Tuple2<String, Tuple2<String, HoodieKey>>, List<IndexLookupResult>> {
-
-        private List<String> candidateRecordKeys;
-
-        private BloomFilter bloomFilter;
-
-        private String currentFile;
-
-        private String currentParitionPath;
-
-        LazyKeyCheckIterator(Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) {
-            super(fileParitionRecordKeyTripletItr);
-            currentFile = null;
-            candidateRecordKeys = new ArrayList<>();
-            bloomFilter = null;
-            currentParitionPath = null;
-        }
-
-        @Override
-        protected void start() {
-        }
-
-        private void initState(String fileName, String partitionPath) throws HoodieIndexException {
-            try {
-                Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
-                bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(filePath);
-                candidateRecordKeys = new ArrayList<>();
-                currentFile = fileName;
-                currentParitionPath = partitionPath;
-            } catch (Exception e) {
-                throw new HoodieIndexException("Error checking candidate keys against file.", e);
-            }
-        }
-
-        @Override
-        protected List<IndexLookupResult> computeNext() {
-
-            List<IndexLookupResult> ret = new ArrayList<>();
-            try {
-                // process one file in each go.
-                while (inputItr.hasNext()) {
-
-                    Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
-                    String fileName = currentTuple._2._1;
-                    String partitionPath = currentTuple._2._2.getPartitionPath();
-                    String recordKey = currentTuple._2._2.getRecordKey();
-
-                    // lazily init state
-                    if (currentFile == null) {
-                        initState(fileName, partitionPath);
-                    }
-
-                    // if continue on current file)
-                    if (fileName.equals(currentFile)) {
-                        // check record key against bloom filter of current file & add to possible keys if needed
-                        if (bloomFilter.mightContain(recordKey)) {
-                            if (logger.isDebugEnabled()) {
-                                logger.debug("#1 Adding " + recordKey + " as candidate for file " + fileName);
-                            }
-                            candidateRecordKeys.add(recordKey);
-                        }
-                    } else {
-                        // do the actual checking of file & break out
-                        Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
-                        logger.info("#1 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys.size() + " for " + filePath);
-                        if (logger.isDebugEnabled()) {
-                            logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
-                        }
-                        ret.add(new IndexLookupResult(currentFile, checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
-
-                        initState(fileName, partitionPath);
-                        if (bloomFilter.mightContain(recordKey)) {
-                            if (logger.isDebugEnabled()) {
-                                logger.debug("#2 Adding " + recordKey + " as candidate for file " + fileName);
-                            }
-                            candidateRecordKeys.add(recordKey);
-                        }
-                        break;
-                    }
-                }
-
-                // handle case, where we ran out of input, finish pending work, update return val
-                if (!inputItr.hasNext()) {
-                    Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
-                    logger.info("#2 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys.size() + " for " + filePath);
-                    if (logger.isDebugEnabled()) {
-                        logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
-                    }
-                    ret.add(new IndexLookupResult(currentFile, checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
-                }
-
-            } catch (Throwable e) {
-                if (e instanceof HoodieException) {
-                    throw e;
-                }
-                throw new HoodieIndexException("Error checking bloom filter index. ", e);
-            }
-
-            return ret;
-        }
-
-        @Override
-        protected void end() {
-        }
-    }
-
-
    @Override
-    public Iterator<List<IndexLookupResult>> call(Integer partition,
-                                                  Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr) throws Exception {
-        return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr);
+    protected void start() {
    }
+
+    private void initState(String fileName, String partitionPath) throws HoodieIndexException {
+      try {
+        Path filePath = new Path(basePath + "/" + partitionPath + "/" + fileName);
+        bloomFilter = ParquetUtils.readBloomFilterFromParquetMetadata(filePath);
+        candidateRecordKeys = new ArrayList<>();
+        currentFile = fileName;
+        currentParitionPath = partitionPath;
+      } catch (Exception e) {
+        throw new HoodieIndexException("Error checking candidate keys against file.", e);
+      }
+    }
+
+    @Override
+    protected List<IndexLookupResult> computeNext() {
+
+      List<IndexLookupResult> ret = new ArrayList<>();
+      try {
+        // process one file in each go.
+        while (inputItr.hasNext()) {
+
+          Tuple2<String, Tuple2<String, HoodieKey>> currentTuple = inputItr.next();
+          String fileName = currentTuple._2._1;
+          String partitionPath = currentTuple._2._2.getPartitionPath();
+          String recordKey = currentTuple._2._2.getRecordKey();
+
+          // lazily init state
+          if (currentFile == null) {
+            initState(fileName, partitionPath);
+          }
+
+          // if continue on current file)
+          if (fileName.equals(currentFile)) {
+            // check record key against bloom filter of current file & add to possible keys if needed
+            if (bloomFilter.mightContain(recordKey)) {
+              if (logger.isDebugEnabled()) {
+                logger.debug("#1 Adding " + recordKey + " as candidate for file " + fileName);
+              }
+              candidateRecordKeys.add(recordKey);
+            }
+          } else {
+            // do the actual checking of file & break out
+            Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
+            logger.info(
+                "#1 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
+                    .size() + " for " + filePath);
+            if (logger.isDebugEnabled()) {
+              logger
+                  .debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
+            }
+            ret.add(new IndexLookupResult(currentFile,
+                checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
+
+            initState(fileName, partitionPath);
+            if (bloomFilter.mightContain(recordKey)) {
+              if (logger.isDebugEnabled()) {
+                logger.debug("#2 Adding " + recordKey + " as candidate for file " + fileName);
+              }
+              candidateRecordKeys.add(recordKey);
+            }
+            break;
+          }
+        }
+
+        // handle case, where we ran out of input, finish pending work, update return val
+        if (!inputItr.hasNext()) {
+          Path filePath = new Path(basePath + "/" + currentParitionPath + "/" + currentFile);
+          logger.info(
+              "#2 After bloom filter, the candidate row keys is reduced to " + candidateRecordKeys
+                  .size() + " for " + filePath);
+          if (logger.isDebugEnabled()) {
+            logger.debug("#The candidate row keys for " + filePath + " => " + candidateRecordKeys);
+          }
+          ret.add(new IndexLookupResult(currentFile,
+              checkCandidatesAgainstFile(candidateRecordKeys, filePath)));
+        }
+
+      } catch (Throwable e) {
+        if (e instanceof HoodieException) {
+          throw e;
+        }
+        throw new HoodieIndexException("Error checking bloom filter index. ", e);
+      }
+
+      return ret;
+    }
+
+    @Override
+    protected void end() {
+    }
+  }
+
+
+  @Override
+  public Iterator<List<IndexLookupResult>> call(Integer partition,
+      Iterator<Tuple2<String, Tuple2<String, HoodieKey>>> fileParitionRecordKeyTripletItr)
+      throws Exception {
+    return new LazyKeyCheckIterator(fileParitionRecordKeyTripletItr);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/IndexLookupResult.java
@@ -25,21 +25,21 @@ import java.util.List;
 */
 public class IndexLookupResult {

-    private String fileName;
+  private String fileName;


-    private List<String> matchingRecordKeys;
+  private List<String> matchingRecordKeys;

-    public IndexLookupResult(String fileName, List<String> matchingRecordKeys) {
-        this.fileName = fileName;
-        this.matchingRecordKeys = matchingRecordKeys;
-    }
+  public IndexLookupResult(String fileName, List<String> matchingRecordKeys) {
+    this.fileName = fileName;
+    this.matchingRecordKeys = matchingRecordKeys;
+  }

-    public String getFileName() {
-        return fileName;
-    }
+  public String getFileName() {
+    return fileName;
+  }

-    public List<String> getMatchingRecordKeys() {
-        return matchingRecordKeys;
-    }
+  public List<String> getMatchingRecordKeys() {
+    return matchingRecordKeys;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java
@@ -19,7 +19,6 @@
 package com.uber.hoodie.index.bucketed;

 import com.google.common.base.Optional;
-
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
@@ -29,96 +28,86 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
-
 import scala.Tuple2;

 /**
- * An `stateless` index implementation that will using a deterministic mapping function to
- * determine the fileID for a given record.
- *
- * Pros:
- *  - Fast
- *
- * Cons :
- *  - Need to tune the number of buckets per partition path manually (FIXME: Need to autotune this)
- *  - Could increase write amplification on copy-on-write storage since inserts always rewrite files
- *  - Not global.
- *
+ * An `stateless` index implementation that will using a deterministic mapping function to determine
+ * the fileID for a given record.
 *
+ * Pros: - Fast
 *
+ * Cons : - Need to tune the number of buckets per partition path manually (FIXME: Need to autotune
+ * this) - Could increase write amplification on copy-on-write storage since inserts always rewrite
+ * files - Not global.
 */
 public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {

-    private static Logger logger = LogManager.getLogger(BucketedIndex.class);
+  private static Logger logger = LogManager.getLogger(BucketedIndex.class);

-    public BucketedIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-    }
+  public BucketedIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+  }

-    private String getBucket(String recordKey) {
-        return String.valueOf(recordKey.hashCode() % config.getNumBucketsPerPartition());
-    }
+  private String getBucket(String recordKey) {
+    return String.valueOf(recordKey.hashCode() % config.getNumBucketsPerPartition());
+  }

-    @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
-        return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
-    }
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys,
+      HoodieTable<T> table) {
+    return hoodieKeys.mapToPair(hk -> new Tuple2<>(hk, Optional.of(getBucket(hk.getRecordKey()))));
+  }

-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, HoodieTable<T> hoodieTable) throws HoodieIndexException {
-        return recordRDD.map(record -> {
-            String bucket = getBucket(record.getRecordKey());
-            //HACK(vc) a non-existent commit is provided here.
-            record.setCurrentLocation(new HoodieRecordLocation("000", bucket));
-            return record;
-        });
-    }
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException {
+    return recordRDD.map(record -> {
+      String bucket = getBucket(record.getRecordKey());
+      //HACK(vc) a non-existent commit is provided here.
+      record.setCurrentLocation(new HoodieRecordLocation("000", bucket));
+      return record;
+    });
+  }

-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> hoodieTable) throws HoodieIndexException {
-        return writeStatusRDD;
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) throws HoodieIndexException {
+    return writeStatusRDD;
+  }

-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // nothing to rollback in the index.
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // nothing to rollback in the index.
+    return true;
+  }

-    /**
-     * Bucketing is still done within each partition.
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return false;
-    }
+  /**
+   * Bucketing is still done within each partition.
+   */
+  @Override
+  public boolean isGlobal() {
+    return false;
+  }

-    /**
-     * Since indexing is just a deterministic hash, we can identify file group correctly even without an index
-     * on the actual log file.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Since indexing is just a deterministic hash, we can identify file group correctly even without
+   * an index on the actual log file.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }

-    /**
-     * Indexing is just a hash function.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return true;
-    }
+  /**
+   * Indexing is just a hash function.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return true;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java
@@ -19,24 +19,33 @@
 package com.uber.hoodie.index.hbase;

 import com.google.common.base.Optional;
-import com.uber.hoodie.common.table.HoodieTimeline;
-import com.uber.hoodie.common.table.timeline.HoodieInstant;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieKey;
+import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-import com.uber.hoodie.common.model.HoodieRecord;
-
+import com.uber.hoodie.common.table.HoodieTimeline;
+import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieIndexConfig;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieDependentSystemUnavailableException;
 import com.uber.hoodie.exception.HoodieIndexException;
 import com.uber.hoodie.index.HoodieIndex;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.client.*;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -45,230 +54,221 @@ import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.Function2;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
 /**
 * Hoodie Index implementation backed by HBase
 */
 public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
-    private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
-    private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
-    private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
-    private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");

-    private static Logger logger = LogManager.getLogger(HBaseIndex.class);
+  private final static byte[] SYSTEM_COLUMN_FAMILY = Bytes.toBytes("_s");
+  private final static byte[] COMMIT_TS_COLUMN = Bytes.toBytes("commit_ts");
+  private final static byte[] FILE_NAME_COLUMN = Bytes.toBytes("file_name");
+  private final static byte[] PARTITION_PATH_COLUMN = Bytes.toBytes("partition_path");

-    private final String tableName;
+  private static Logger logger = LogManager.getLogger(HBaseIndex.class);

-    public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
-        super(config, jsc);
-        this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  private final String tableName;
+
+  public HBaseIndex(HoodieWriteConfig config, JavaSparkContext jsc) {
+    super(config, jsc);
+    this.tableName = config.getProps().getProperty(HoodieIndexConfig.HBASE_TABLENAME_PROP);
+  }
+
+  @Override
+  public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
+      JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
+    throw new UnsupportedOperationException("HBase index does not implement check exist yet");
+  }
+
+  private static Connection hbaseConnection = null;
+
+  private Connection getHBaseConnection() {
+    Configuration hbaseConfig = HBaseConfiguration.create();
+    String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
+    hbaseConfig.set("hbase.zookeeper.quorum", quorum);
+    String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
+    hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
+    try {
+      return ConnectionFactory.createConnection(hbaseConfig);
+    } catch (IOException e) {
+      throw new HoodieDependentSystemUnavailableException(
+          HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
+    }
+  }
+
+  /**
+   * Function that tags each HoodieRecord with an existing location, if known.
+   */
+  class LocationTagFunction
+      implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
+
+    private final HoodieTable<T> hoodieTable;
+
+    LocationTagFunction(HoodieTable<T> hoodieTable) {
+      this.hoodieTable = hoodieTable;
    }

    @Override
-    public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(
-        JavaRDD<HoodieKey> hoodieKeys, HoodieTable<T> table) {
-        throw new UnsupportedOperationException("HBase index does not implement check exist yet");
-    }
-
-    private static Connection hbaseConnection = null;
-
-    private Connection getHBaseConnection() {
-        Configuration hbaseConfig = HBaseConfiguration.create();
-        String quorum = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKQUORUM_PROP);
-        hbaseConfig.set("hbase.zookeeper.quorum", quorum);
-        String port = config.getProps().getProperty(HoodieIndexConfig.HBASE_ZKPORT_PROP);
-        hbaseConfig.set("hbase.zookeeper.property.clientPort", port);
-        try {
-            return ConnectionFactory.createConnection(hbaseConfig);
-        } catch (IOException e) {
-            throw new HoodieDependentSystemUnavailableException(
-                HoodieDependentSystemUnavailableException.HBASE, quorum + ":" + port);
+    public Iterator<HoodieRecord<T>> call(Integer partitionNum,
+        Iterator<HoodieRecord<T>> hoodieRecordIterator) {
+      // Grab the global HBase connection
+      synchronized (HBaseIndex.class) {
+        if (hbaseConnection == null) {
+          hbaseConnection = getHBaseConnection();
        }
-    }
+      }
+      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      HTable hTable = null;
+      try {
+        hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
+        // Do the tagging.
+        while (hoodieRecordIterator.hasNext()) {
+          HoodieRecord rec = hoodieRecordIterator.next();
+          // TODO(vc): This may need to be a multi get.
+          Result result = hTable.get(
+              new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
+                  .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));

-    /**
-     * Function that tags each HoodieRecord with an existing location, if known.
-     */
-    class LocationTagFunction
-            implements Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> {
+          // first, attempt to grab location from HBase
+          if (result.getRow() != null) {
+            String commitTs =
+                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
+            String fileId =
+                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));

-        private final HoodieTable<T> hoodieTable;
-
-        LocationTagFunction(HoodieTable<T> hoodieTable) {
-            this.hoodieTable = hoodieTable;
+            HoodieTimeline commitTimeline = hoodieTable.getCompletedCommitTimeline();
+            // if the last commit ts for this row is less than the system commit ts
+            if (!commitTimeline.empty() && commitTimeline.containsInstant(
+                new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
+              rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
+            }
+          }
+          taggedRecords.add(rec);
+        }
+      } catch (IOException e) {
+        throw new HoodieIndexException(
+            "Failed to Tag indexed locations because of exception with HBase Client", e);
+      } finally {
+        if (hTable != null) {
+          try {
+            hTable.close();
+          } catch (IOException e) {
+            // Ignore
+          }
        }

-        @Override
-        public Iterator<HoodieRecord<T>> call(Integer partitionNum,
-                                           Iterator<HoodieRecord<T>> hoodieRecordIterator) {
-            // Grab the global HBase connection
-            synchronized (HBaseIndex.class) {
-                if (hbaseConnection == null) {
-                    hbaseConnection = getHBaseConnection();
-                }
-            }
-            List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
-            HTable hTable = null;
-            try {
-                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
-                // Do the tagging.
-                while (hoodieRecordIterator.hasNext()) {
-                    HoodieRecord rec = hoodieRecordIterator.next();
-                    // TODO(vc): This may need to be a multi get.
-                    Result result = hTable.get(
-                            new Get(Bytes.toBytes(rec.getRecordKey())).setMaxVersions(1)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN)
-                                    .addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN));
+      }
+      return taggedRecords.iterator();
+    }
+  }

-                    // first, attempt to grab location from HBase
-                    if (result.getRow() != null) {
-                        String commitTs =
-                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN));
-                        String fileId =
-                                Bytes.toString(result.getValue(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN));
+  @Override
+  public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD,
+      HoodieTable<T> hoodieTable) {
+    return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(hoodieTable), true);
+  }

-                        HoodieTimeline commitTimeline = hoodieTable.getCompletedCommitTimeline();
-                        // if the last commit ts for this row is less than the system commit ts
-                        if (!commitTimeline.empty() && commitTimeline.containsInstant(
-                            new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTs))) {
-                            rec.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
-                        }
-                    }
-                    taggedRecords.add(rec);
-                }
-            } catch (IOException e) {
-                throw new HoodieIndexException(
-                    "Failed to Tag indexed locations because of exception with HBase Client", e);
-            }
+  class UpdateLocationTask implements
+      Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> {

-            finally {
-                if (hTable != null) {
-                    try {
-                        hTable.close();
-                    } catch (IOException e) {
-                        // Ignore
-                    }
-                }
+    @Override
+    public Iterator<WriteStatus> call(Integer partition, Iterator<WriteStatus> statusIterator) {

-            }
-            return taggedRecords.iterator();
+      List<WriteStatus> writeStatusList = new ArrayList<>();
+      // Grab the global HBase connection
+      synchronized (HBaseIndex.class) {
+        if (hbaseConnection == null) {
+          hbaseConnection = getHBaseConnection();
        }
-    }
-
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> recordRDD, HoodieTable<T> hoodieTable) {
-        return recordRDD.mapPartitionsWithIndex(this.new LocationTagFunction(hoodieTable), true);
-    }
-
-    class UpdateLocationTask implements Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> {
-        @Override
-        public Iterator<WriteStatus> call(Integer partition, Iterator<WriteStatus> statusIterator) {
-
-            List<WriteStatus> writeStatusList = new ArrayList<>();
-            // Grab the global HBase connection
-            synchronized (HBaseIndex.class) {
-                if (hbaseConnection == null) {
-                    hbaseConnection = getHBaseConnection();
+      }
+      HTable hTable = null;
+      try {
+        hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
+        while (statusIterator.hasNext()) {
+          WriteStatus writeStatus = statusIterator.next();
+          List<Put> puts = new ArrayList<>();
+          List<Delete> deletes = new ArrayList<>();
+          try {
+            for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
+              if (!writeStatus.isErrored(rec.getKey())) {
+                java.util.Optional<HoodieRecordLocation> loc = rec.getNewLocation();
+                if (loc.isPresent()) {
+                  Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
+                      Bytes.toBytes(loc.get().getCommitTime()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
+                      Bytes.toBytes(loc.get().getFileId()));
+                  put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
+                      Bytes.toBytes(rec.getPartitionPath()));
+                  puts.add(put);
+                } else {
+                  //Delete existing index for a deleted record
+                  Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
+                  deletes.add(delete);
                }
+              }
            }
-            HTable hTable = null;
-            try {
-                hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName));
-                while (statusIterator.hasNext()) {
-                    WriteStatus writeStatus = statusIterator.next();
-                    List<Put> puts = new ArrayList<>();
-                    List<Delete> deletes = new ArrayList<>();
-                    try {
-                        for (HoodieRecord rec : writeStatus.getWrittenRecords()) {
-                            if (!writeStatus.isErrored(rec.getKey())) {
-                                java.util.Optional<HoodieRecordLocation> loc = rec.getNewLocation();
-                                if(loc.isPresent()) {
-                                    Put put = new Put(Bytes.toBytes(rec.getRecordKey()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, COMMIT_TS_COLUMN,
-                                            Bytes.toBytes(loc.get().getCommitTime()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, FILE_NAME_COLUMN,
-                                            Bytes.toBytes(loc.get().getFileId()));
-                                    put.addColumn(SYSTEM_COLUMN_FAMILY, PARTITION_PATH_COLUMN,
-                                            Bytes.toBytes(rec.getPartitionPath()));
-                                    puts.add(put);
-                                } else {
-                                    //Delete existing index for a deleted record
-                                    Delete delete = new Delete(Bytes.toBytes(rec.getRecordKey()));
-                                    deletes.add(delete);
-                                }
-                            }
-                        }
-                        hTable.put(puts);
-                        hTable.delete(deletes);
-                        hTable.flushCommits();
-                    } catch (Exception e) {
-                        Exception we = new Exception("Error updating index for " + writeStatus, e);
-                        logger.error(we);
-                        writeStatus.setGlobalError(we);
-                    }
-                    writeStatusList.add(writeStatus);
-                }
-            } catch (IOException e) {
-                throw new HoodieIndexException(
-                    "Failed to Update Index locations because of exception with HBase Client", e);
-            } finally {
-                if (hTable != null) {
-                    try {
-                        hTable.close();
-                    } catch (IOException e) {
-                        // Ignore
-                    }
-                }
-            }
-            return writeStatusList.iterator();
+            hTable.put(puts);
+            hTable.delete(deletes);
+            hTable.flushCommits();
+          } catch (Exception e) {
+            Exception we = new Exception("Error updating index for " + writeStatus, e);
+            logger.error(we);
+            writeStatus.setGlobalError(we);
+          }
+          writeStatusList.add(writeStatus);
        }
+      } catch (IOException e) {
+        throw new HoodieIndexException(
+            "Failed to Update Index locations because of exception with HBase Client", e);
+      } finally {
+        if (hTable != null) {
+          try {
+            hTable.close();
+          } catch (IOException e) {
+            // Ignore
+          }
+        }
+      }
+      return writeStatusList.iterator();
    }
+  }

-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-        HoodieTable<T> hoodieTable) {
-        return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
-    }
+  @Override
+  public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
+      HoodieTable<T> hoodieTable) {
+    return writeStatusRDD.mapPartitionsWithIndex(new UpdateLocationTask(), true);
+  }

-    @Override
-    public boolean rollbackCommit(String commitTime) {
-        // Can't really rollback here. HBase only can let you go from recordKey to fileID,
-        // not the other way around
-        return true;
-    }
+  @Override
+  public boolean rollbackCommit(String commitTime) {
+    // Can't really rollback here. HBase only can let you go from recordKey to fileID,
+    // not the other way around
+    return true;
+  }

-    /**
-     * Only looks up by recordKey
-     *
-     * @return
-     */
-    @Override
-    public boolean isGlobal() {
-        return true;
-    }
+  /**
+   * Only looks up by recordKey
+   */
+  @Override
+  public boolean isGlobal() {
+    return true;
+  }

-    /**
-     * Mapping is available in HBase already.
-     *
-     * @return
-     */
-    @Override
-    public boolean canIndexLogFiles() {
-        return true;
-    }
+  /**
+   * Mapping is available in HBase already.
+   */
+  @Override
+  public boolean canIndexLogFiles() {
+    return true;
+  }

-    /**
-     * Index needs to be explicitly updated after storage write.
-     *
-     * @return
-     */
-    @Override
-    public boolean isImplicitWithStorage() {
-        return false;
-    }
+  /**
+   * Index needs to be explicitly updated after storage write.
+   */
+  @Override
+  public boolean isImplicitWithStorage() {
+    return false;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieAppendHandle.java
@@ -36,13 +36,6 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieAppendException;
 import com.uber.hoodie.exception.HoodieUpsertException;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.TaskContext;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -50,155 +43,161 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.TaskContext;

 /**
 * IO Operation to append data onto an existing file.
- *
- * @param <T>
 */
 public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
-    private static AtomicLong recordIndex = new AtomicLong(1);

-    private final WriteStatus writeStatus;
-    private final String fileId;
-    private String partitionPath;
-    private List<HoodieRecord<T>> records;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
-    private HoodieLogFile currentLogFile;
-    private Writer writer;
+  private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
+  private static AtomicLong recordIndex = new AtomicLong(1);

-    public HoodieAppendHandle(HoodieWriteConfig config,
-                              String commitTime,
-                              HoodieTable<T> hoodieTable,
-                              String fileId,
-                              Iterator<HoodieRecord<T>> recordItr) {
-        super(config, commitTime, hoodieTable);
-        WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        writeStatus.setStat(new HoodieDeltaWriteStat());
-        this.writeStatus = writeStatus;
-        this.fileId = fileId;
-        init(recordItr);
-    }
+  private final WriteStatus writeStatus;
+  private final String fileId;
+  private String partitionPath;
+  private List<HoodieRecord<T>> records;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;
+  private HoodieLogFile currentLogFile;
+  private Writer writer;

-    private void init(Iterator<HoodieRecord<T>> recordItr) {
-        List<HoodieRecord<T>> records = Lists.newArrayList();
-        recordItr.forEachRemaining(record -> {
-            records.add(record);
-            // extract some information from the first record
-            if (partitionPath == null) {
-                partitionPath = record.getPartitionPath();
-                // HACK(vc) This also assumes a base file. It will break, if appending without one.
-                String latestValidFilePath =
-                    fileSystemView.getLatestDataFiles(record.getPartitionPath())
-                            .filter(dataFile -> dataFile.getFileId().equals(fileId))
-                            .findFirst().get().getFileName();
-                String baseCommitTime = FSUtils.getCommitTime(latestValidFilePath);
-                writeStatus.getStat().setPrevCommit(baseCommitTime);
-                writeStatus.setFileId(fileId);
-                writeStatus.setPartitionPath(record.getPartitionPath());
-                writeStatus.getStat().setFileId(fileId);
+  public HoodieAppendHandle(HoodieWriteConfig config,
+      String commitTime,
+      HoodieTable<T> hoodieTable,
+      String fileId,
+      Iterator<HoodieRecord<T>> recordItr) {
+    super(config, commitTime, hoodieTable);
+    WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    writeStatus.setStat(new HoodieDeltaWriteStat());
+    this.writeStatus = writeStatus;
+    this.fileId = fileId;
+    init(recordItr);
+  }

-                try {
-                    this.writer = HoodieLogFormat.newWriterBuilder()
-                        .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
-                        .withFileId(fileId).overBaseCommit(baseCommitTime)
-                        .withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
-                    this.currentLogFile = writer.getLogFile();
-                    ((HoodieDeltaWriteStat) writeStatus.getStat())
-                        .setLogVersion(currentLogFile.getLogVersion());
-                    ((HoodieDeltaWriteStat) writeStatus.getStat())
-                        .setLogOffset(writer.getCurrentSize());
-                } catch (Exception e) {
-                    logger.error("Error in update task at commit " + commitTime, e);
-                    writeStatus.setGlobalError(e);
-                    throw new HoodieUpsertException(
-                        "Failed to initialize HoodieUpdateHandle for FileId: " + fileId
-                            + " on commit " + commitTime + " on HDFS path " + hoodieTable
-                            .getMetaClient().getBasePath() + partitionPath, e);
-                }
-                Path path = new Path(record.getPartitionPath(),
-                        FSUtils.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId));
-                writeStatus.getStat().setPath(path.toString());
-            }
-            // update the new location of the record, so we know where to find it next
-            record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
-        });
-        this.records = records;
-    }
+  private void init(Iterator<HoodieRecord<T>> recordItr) {
+    List<HoodieRecord<T>> records = Lists.newArrayList();
+    recordItr.forEachRemaining(record -> {
+      records.add(record);
+      // extract some information from the first record
+      if (partitionPath == null) {
+        partitionPath = record.getPartitionPath();
+        // HACK(vc) This also assumes a base file. It will break, if appending without one.
+        String latestValidFilePath =
+            fileSystemView.getLatestDataFiles(record.getPartitionPath())
+                .filter(dataFile -> dataFile.getFileId().equals(fileId))
+                .findFirst().get().getFileName();
+        String baseCommitTime = FSUtils.getCommitTime(latestValidFilePath);
+        writeStatus.getStat().setPrevCommit(baseCommitTime);
+        writeStatus.setFileId(fileId);
+        writeStatus.setPartitionPath(record.getPartitionPath());
+        writeStatus.getStat().setFileId(fileId);

-    private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
-        Optional recordMetadata = hoodieRecord.getData().getMetadata();
        try {
-            Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);
-
-            if(avroRecord.isPresent()) {
-                String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
-                        recordIndex.getAndIncrement());
-                HoodieAvroUtils
-                        .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
-                                hoodieRecord.getPartitionPath(), fileId);
-                HoodieAvroUtils
-                        .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), commitTime, seqId);
-                recordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
-
-            hoodieRecord.deflate();
-            writeStatus.markSuccess(hoodieRecord, recordMetadata);
-            return avroRecord;
+          this.writer = HoodieLogFormat.newWriterBuilder()
+              .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath))
+              .withFileId(fileId).overBaseCommit(baseCommitTime)
+              .withFs(fs).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+          this.currentLogFile = writer.getLogFile();
+          ((HoodieDeltaWriteStat) writeStatus.getStat())
+              .setLogVersion(currentLogFile.getLogVersion());
+          ((HoodieDeltaWriteStat) writeStatus.getStat())
+              .setLogOffset(writer.getCurrentSize());
        } catch (Exception e) {
-            logger.error("Error writing record  " + hoodieRecord, e);
-            writeStatus.markFailure(hoodieRecord, e, recordMetadata);
+          logger.error("Error in update task at commit " + commitTime, e);
+          writeStatus.setGlobalError(e);
+          throw new HoodieUpsertException(
+              "Failed to initialize HoodieUpdateHandle for FileId: " + fileId
+                  + " on commit " + commitTime + " on HDFS path " + hoodieTable
+                  .getMetaClient().getBasePath() + partitionPath, e);
        }
-        return Optional.empty();
-    }
+        Path path = new Path(record.getPartitionPath(),
+            FSUtils.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId));
+        writeStatus.getStat().setPath(path.toString());
+      }
+      // update the new location of the record, so we know where to find it next
+      record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
+    });
+    this.records = records;
+  }

-    public void doAppend() {
+  private Optional<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
+    Optional recordMetadata = hoodieRecord.getData().getMetadata();
+    try {
+      Optional<IndexedRecord> avroRecord = hoodieRecord.getData().getInsertValue(schema);

-        List<IndexedRecord> recordList = new ArrayList<>();
-        List<String> keysToDelete = new ArrayList<>();
-        Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-        metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, commitTime);
-        records.stream().forEach(record -> {
-            Optional<IndexedRecord> indexedRecord = getIndexedRecord(record);
-            if(indexedRecord.isPresent()) {
-                recordList.add(indexedRecord.get());
-            } else {
-                keysToDelete.add(record.getRecordKey());
-            }
-        });
-        try {
-            if(recordList.size() > 0) {
-                writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema, metadata));
-            }
-            if(keysToDelete.size() > 0) {
-                writer = writer.appendBlock(new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), metadata));
-            }
-        } catch (Exception e) {
-            throw new HoodieAppendException(
-                "Failed while appeding records to " + currentLogFile.getPath(), e);
-        }
-    }
+      if (avroRecord.isPresent()) {
+        String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
+            recordIndex.getAndIncrement());
+        HoodieAvroUtils
+            .addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
+                hoodieRecord.getPartitionPath(), fileId);
+        HoodieAvroUtils
+            .addCommitMetadataToRecord((GenericRecord) avroRecord.get(), commitTime, seqId);
+        recordsWritten++;
+      } else {
+        recordsDeleted++;
+      }

-    public void close() {
-        try {
-            if (writer != null) {
-                writer.close();
-            }
-            writeStatus.getStat().setNumWrites(recordsWritten);
-            writeStatus.getStat().setNumDeletes(recordsDeleted);
-            writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
-        } catch (IOException e) {
-            throw new HoodieUpsertException("Failed to close UpdateHandle", e);
-        }
+      hoodieRecord.deflate();
+      writeStatus.markSuccess(hoodieRecord, recordMetadata);
+      return avroRecord;
+    } catch (Exception e) {
+      logger.error("Error writing record  " + hoodieRecord, e);
+      writeStatus.markFailure(hoodieRecord, e, recordMetadata);
    }
+    return Optional.empty();
+  }

-    public WriteStatus getWriteStatus() {
-        return writeStatus;
+  public void doAppend() {
+
+    List<IndexedRecord> recordList = new ArrayList<>();
+    List<String> keysToDelete = new ArrayList<>();
+    Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+    metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, commitTime);
+    records.stream().forEach(record -> {
+      Optional<IndexedRecord> indexedRecord = getIndexedRecord(record);
+      if (indexedRecord.isPresent()) {
+        recordList.add(indexedRecord.get());
+      } else {
+        keysToDelete.add(record.getRecordKey());
+      }
+    });
+    try {
+      if (recordList.size() > 0) {
+        writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, schema, metadata));
+      }
+      if (keysToDelete.size() > 0) {
+        writer = writer.appendBlock(
+            new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), metadata));
+      }
+    } catch (Exception e) {
+      throw new HoodieAppendException(
+          "Failed while appeding records to " + currentLogFile.getPath(), e);
    }
+  }
+
+  public void close() {
+    try {
+      if (writer != null) {
+        writer.close();
+      }
+      writeStatus.getStat().setNumWrites(recordsWritten);
+      writeStatus.getStat().setNumDeletes(recordsDeleted);
+      writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
+    } catch (IOException e) {
+      throw new HoodieUpsertException("Failed to close UpdateHandle", e);
+    }
+  }
+
+  public WriteStatus getWriteStatus() {
+    return writeStatus;
+  }


 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCleanHelper.java
@@ -27,226 +27,212 @@ import com.uber.hoodie.common.table.TableFileSystemView;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;

 /**
 * Cleaner is responsible for garbage collecting older files in a given partition path, such that
- * <p>
- * 1) It provides sufficient time for existing queries running on older versions, to finish
- * <p>
- * 2) It bounds the growth of the files in the file system
- * <p>
- * TODO: Should all cleaning be done based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
+ * <p> 1) It provides sufficient time for existing queries running on older versions, to finish <p>
+ * 2) It bounds the growth of the files in the file system <p> TODO: Should all cleaning be done
+ * based on {@link com.uber.hoodie.common.model.HoodieCommitMetadata}
 */
 public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
-    private static Logger logger = LogManager.getLogger(HoodieCleanHelper.class);

-    private final TableFileSystemView fileSystemView;
-    private final HoodieTimeline commitTimeline;
-    private HoodieTable<T> hoodieTable;
-    private HoodieWriteConfig config;
-    private FileSystem fs;
+  private static Logger logger = LogManager.getLogger(HoodieCleanHelper.class);

-    public HoodieCleanHelper(HoodieTable<T> hoodieTable, HoodieWriteConfig config) {
-        this.hoodieTable = hoodieTable;
-        this.fileSystemView = hoodieTable.getCompletedFileSystemView();
-        this.commitTimeline = hoodieTable.getCompletedCommitTimeline();
-        this.config = config;
-        this.fs = hoodieTable.getFs();
+  private final TableFileSystemView fileSystemView;
+  private final HoodieTimeline commitTimeline;
+  private HoodieTable<T> hoodieTable;
+  private HoodieWriteConfig config;
+  private FileSystem fs;
+
+  public HoodieCleanHelper(HoodieTable<T> hoodieTable, HoodieWriteConfig config) {
+    this.hoodieTable = hoodieTable;
+    this.fileSystemView = hoodieTable.getCompletedFileSystemView();
+    this.commitTimeline = hoodieTable.getCompletedCommitTimeline();
+    this.config = config;
+    this.fs = hoodieTable.getFs();
+  }
+
+
+  /**
+   * Selects the older versions of files for cleaning, such that it bounds the number of versions of
+   * each file. This policy is useful, if you are simply interested in querying the table, and you
+   * don't want too many versions for a single file (i.e run it with versionsRetained = 1)
+   */
+  private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
+      throws IOException {
+    logger.info("Cleaning " + partitionPath + ", retaining latest " + config
+        .getCleanerFileVersionsRetained() + " file versions. ");
+    List<HoodieFileGroup> fileGroups =
+        fileSystemView.getAllFileGroups(partitionPath)
+            .collect(Collectors.toList());
+    List<String> deletePaths = new ArrayList<>();
+    // Collect all the datafiles savepointed by all the savepoints
+    List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
+        .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+
+    for (HoodieFileGroup fileGroup : fileGroups) {
+      int keepVersions = config.getCleanerFileVersionsRetained();
+      Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices().iterator();
+      while (fileSliceIterator.hasNext() && keepVersions > 0) {
+        // Skip this most recent version
+        FileSlice nextSlice = fileSliceIterator.next();
+        HoodieDataFile dataFile = nextSlice.getDataFile().get();
+        if (savepointedFiles.contains(dataFile.getFileName())) {
+          // do not clean up a savepoint data file
+          continue;
+        }
+        keepVersions--;
+      }
+      // Delete the remaining files
+      while (fileSliceIterator.hasNext()) {
+        FileSlice nextSlice = fileSliceIterator.next();
+        HoodieDataFile dataFile = nextSlice.getDataFile().get();
+        deletePaths.add(dataFile.getFileStatus().getPath().toString());
+        if (hoodieTable.getMetaClient().getTableType()
+            == HoodieTableType.MERGE_ON_READ) {
+          // If merge on read, then clean the log files for the commits as well
+          deletePaths.addAll(nextSlice.getLogFiles()
+              .map(file -> file.getPath().toString())
+              .collect(Collectors.toList()));
+        }
+      }
    }
+    return deletePaths;
+  }


-    /**
-     * Selects the older versions of files for cleaning, such that it bounds the number of versions of each file.
-     * This policy is useful, if you are simply interested in querying the table, and you don't want too many
-     * versions for a single file (i.e run it with versionsRetained = 1)
-     *
-     * @param partitionPath
-     * @return
-     * @throws IOException
-     */
-    private List<String> getFilesToCleanKeepingLatestVersions(String partitionPath)
-        throws IOException {
-        logger.info("Cleaning " + partitionPath + ", retaining latest " + config
-            .getCleanerFileVersionsRetained() + " file versions. ");
-        List<HoodieFileGroup> fileGroups =
-            fileSystemView.getAllFileGroups(partitionPath)
-                .collect(Collectors.toList());
-        List<String> deletePaths = new ArrayList<>();
-        // Collect all the datafiles savepointed by all the savepoints
-        List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
-            .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+  /**
+   * Selects the versions for file for cleaning, such that it <p> - Leaves the latest version of the
+   * file untouched - For older versions, - It leaves all the commits untouched which has occured in
+   * last <code>config.getCleanerCommitsRetained()</code> commits - It leaves ONE commit before this
+   * window. We assume that the max(query execution time) == commit_batch_time *
+   * config.getCleanerCommitsRetained(). This is 12 hours by default. This is essential to leave the
+   * file used by the query thats running for the max time. <p> This provides the effect of having
+   * lookback into all changes that happened in the last X commits. (eg: if you retain 24 commits,
+   * and commit batch time is 30 mins, then you have 12 hrs of lookback) <p> This policy is the
+   * default.
+   */
+  private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath)
+      throws IOException {
+    int commitsRetained = config.getCleanerCommitsRetained();
+    logger.info(
+        "Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
+    List<String> deletePaths = new ArrayList<>();

-        for (HoodieFileGroup fileGroup : fileGroups) {
-            int keepVersions = config.getCleanerFileVersionsRetained();
-            Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices().iterator();
-            while (fileSliceIterator.hasNext() && keepVersions > 0) {
-                // Skip this most recent version
-                FileSlice nextSlice = fileSliceIterator.next();
-                HoodieDataFile dataFile = nextSlice.getDataFile().get();
-                if(savepointedFiles.contains(dataFile.getFileName())) {
-                    // do not clean up a savepoint data file
-                    continue;
-                }
-                keepVersions--;
-            }
-            // Delete the remaining files
-            while (fileSliceIterator.hasNext()) {
-                FileSlice nextSlice = fileSliceIterator.next();
-                HoodieDataFile dataFile = nextSlice.getDataFile().get();
-                deletePaths.add(dataFile.getFileStatus().getPath().toString());
-                if (hoodieTable.getMetaClient().getTableType()
-                    == HoodieTableType.MERGE_ON_READ) {
-                    // If merge on read, then clean the log files for the commits as well
-                    deletePaths.addAll(nextSlice.getLogFiles()
-                            .map(file -> file.getPath().toString())
-                            .collect(Collectors.toList()));
-                }
+    // Collect all the datafiles savepointed by all the savepoints
+    List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
+        .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
+
+    // determine if we have enough commits, to start cleaning.
+    if (commitTimeline.countInstants() > commitsRetained) {
+      HoodieInstant earliestCommitToRetain = getEarliestCommitToRetain().get();
+      List<HoodieFileGroup> fileGroups =
+          fileSystemView.getAllFileGroups(partitionPath)
+              .collect(Collectors.toList());
+      for (HoodieFileGroup fileGroup : fileGroups) {
+        List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
+        HoodieDataFile dataFile = fileSliceList.get(0).getDataFile().get();
+        String lastVersion = dataFile.getCommitTime();
+        String lastVersionBeforeEarliestCommitToRetain =
+            getLatestVersionBeforeCommit(fileSliceList, earliestCommitToRetain);
+
+        // Ensure there are more than 1 version of the file (we only clean old files from updates)
+        // i.e always spare the last commit.
+        for (FileSlice aSlice : fileSliceList) {
+          HoodieDataFile aFile = aSlice.getDataFile().get();
+          String fileCommitTime = aFile.getCommitTime();
+          if (savepointedFiles.contains(aFile.getFileName())) {
+            // do not clean up a savepoint data file
+            continue;
+          }
+          // Dont delete the latest commit and also the last commit before the earliest commit we are retaining
+          // The window of commit retain == max query run time. So a query could be running which still
+          // uses this file.
+          if (fileCommitTime.equals(lastVersion) || (
+              lastVersionBeforeEarliestCommitToRetain != null && fileCommitTime
+                  .equals(lastVersionBeforeEarliestCommitToRetain))) {
+            // move on to the next file
+            continue;
+          }
+
+          // Always keep the last commit
+          if (HoodieTimeline.compareTimestamps(
+              earliestCommitToRetain.getTimestamp(),
+              fileCommitTime,
+              HoodieTimeline.GREATER)) {
+            // this is a commit, that should be cleaned.
+            deletePaths.add(aFile.getFileStatus().getPath().toString());
+            if (hoodieTable.getMetaClient().getTableType()
+                == HoodieTableType.MERGE_ON_READ) {
+              // If merge on read, then clean the log files for the commits as well
+              deletePaths.addAll(aSlice.getLogFiles()
+                  .map(file -> file.getPath().toString())
+                  .collect(Collectors.toList()));
            }
+          }
        }
-        return deletePaths;
+      }
    }

+    return deletePaths;
+  }

-    /**
-     * Selects the versions for file for cleaning, such that it
-     * <p>
-     * - Leaves the latest version of the file untouched
-     * - For older versions,
-     * - It leaves all the commits untouched which has occured in last <code>config.getCleanerCommitsRetained()</code> commits
-     * - It leaves ONE commit before this window. We assume that the max(query execution time) == commit_batch_time *  config.getCleanerCommitsRetained(). This is 12 hours by default.
-     * This is essential to leave the file used by the query thats running for the max time.
-     * <p>
-     * This provides the effect of having lookback into all changes that happened in the last X
-     * commits. (eg: if you retain 24 commits, and commit batch time is 30 mins, then you have 12 hrs of lookback)
-     * <p>
-     * This policy is the default.
-     *
-     * @param partitionPath
-     * @return
-     * @throws IOException
-     */
-    private List<String> getFilesToCleanKeepingLatestCommits(String partitionPath)
-        throws IOException {
-        int commitsRetained = config.getCleanerCommitsRetained();
-        logger.info(
-            "Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
-        List<String> deletePaths = new ArrayList<>();
-
-        // Collect all the datafiles savepointed by all the savepoints
-        List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
-            .flatMap(s -> hoodieTable.getSavepointedDataFiles(s)).collect(Collectors.toList());
-
-        // determine if we have enough commits, to start cleaning.
-        if (commitTimeline.countInstants() > commitsRetained) {
-            HoodieInstant earliestCommitToRetain = getEarliestCommitToRetain().get();
-            List<HoodieFileGroup> fileGroups =
-                fileSystemView.getAllFileGroups(partitionPath)
-                    .collect(Collectors.toList());
-            for (HoodieFileGroup fileGroup : fileGroups) {
-                List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
-                HoodieDataFile dataFile = fileSliceList.get(0).getDataFile().get();
-                String lastVersion = dataFile.getCommitTime();
-                String lastVersionBeforeEarliestCommitToRetain =
-                    getLatestVersionBeforeCommit(fileSliceList, earliestCommitToRetain);
-
-                // Ensure there are more than 1 version of the file (we only clean old files from updates)
-                // i.e always spare the last commit.
-                for (FileSlice aSlice : fileSliceList) {
-                    HoodieDataFile aFile = aSlice.getDataFile().get();
-                    String fileCommitTime = aFile.getCommitTime();
-                    if(savepointedFiles.contains(aFile.getFileName())) {
-                        // do not clean up a savepoint data file
-                        continue;
-                    }
-                    // Dont delete the latest commit and also the last commit before the earliest commit we are retaining
-                    // The window of commit retain == max query run time. So a query could be running which still
-                    // uses this file.
-                    if (fileCommitTime.equals(lastVersion) || (
-                        lastVersionBeforeEarliestCommitToRetain != null && fileCommitTime
-                            .equals(lastVersionBeforeEarliestCommitToRetain))) {
-                        // move on to the next file
-                        continue;
-                    }
-
-                    // Always keep the last commit
-                    if (HoodieTimeline.compareTimestamps(
-                            earliestCommitToRetain.getTimestamp(),
-                            fileCommitTime,
-                            HoodieTimeline.GREATER)) {
-                        // this is a commit, that should be cleaned.
-                        deletePaths.add(aFile.getFileStatus().getPath().toString());
-                        if (hoodieTable.getMetaClient().getTableType()
-                            == HoodieTableType.MERGE_ON_READ) {
-                            // If merge on read, then clean the log files for the commits as well
-                            deletePaths.addAll(aSlice.getLogFiles()
-                                    .map(file -> file.getPath().toString())
-                                    .collect(Collectors.toList()));
-                        }
-                    }
-                }
-            }
-        }
-
-        return deletePaths;
+  /**
+   * Gets the latest version < commitTime. This version file could still be used by queries.
+   */
+  private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
+      HoodieInstant commitTime) {
+    for (FileSlice file : fileSliceList) {
+      String fileCommitTime = file.getDataFile().get().getCommitTime();
+      if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime,
+          HoodieTimeline.GREATER)) {
+        // fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
+        return fileCommitTime;
+      }
    }
+    // There is no version of this file which is <= commitTime
+    return null;
+  }

-    /**
-     * Gets the latest version < commitTime. This version file could still be used by queries.
-     */
-    private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
-        HoodieInstant commitTime) {
-        for (FileSlice file : fileSliceList) {
-            String fileCommitTime = file.getDataFile().get().getCommitTime();
-            if (HoodieTimeline.compareTimestamps(commitTime.getTimestamp(), fileCommitTime,
-                HoodieTimeline.GREATER)) {
-                // fileList is sorted on the reverse, so the first commit we find <= commitTime is the one we want
-                return fileCommitTime;
-            }
-        }
-        // There is no version of this file which is <= commitTime
-        return null;
+  /**
+   * Returns files to be cleaned for the given partitionPath based on cleaning policy.
+   */
+  public List<String> getDeletePaths(String partitionPath) throws IOException {
+    HoodieCleaningPolicy policy = config.getCleanerPolicy();
+    List<String> deletePaths;
+    if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
+      deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
+    } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+      deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
+    } else {
+      throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
    }
+    logger.info(
+        deletePaths.size() + " patterns used to delete in partition path:" + partitionPath);

-    /**
-     * Returns files to be cleaned for the given partitionPath based on cleaning policy.
-     */
-    public List<String> getDeletePaths(String partitionPath) throws IOException {
-        HoodieCleaningPolicy policy = config.getCleanerPolicy();
-        List<String> deletePaths;
-        if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
-            deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
-        } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
-            deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
-        } else {
-            throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
-        }
-        logger.info(
-            deletePaths.size() + " patterns used to delete in partition path:" + partitionPath);
+    return deletePaths;
+  }

-        return deletePaths;
-    }
-
-    /**
-     * Returns earliest commit to retain based on cleaning policy.
-     */
-    public Optional<HoodieInstant> getEarliestCommitToRetain() {
-        Optional<HoodieInstant> earliestCommitToRetain = Optional.empty();
-        int commitsRetained = config.getCleanerCommitsRetained();
-        if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
-            && commitTimeline.countInstants() > commitsRetained) {
-            earliestCommitToRetain =
-                commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
-        }
-        return earliestCommitToRetain;
+  /**
+   * Returns earliest commit to retain based on cleaning policy.
+   */
+  public Optional<HoodieInstant> getEarliestCommitToRetain() {
+    Optional<HoodieInstant> earliestCommitToRetain = Optional.empty();
+    int commitsRetained = config.getCleanerCommitsRetained();
+    if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
+        && commitTimeline.countInstants() > commitsRetained) {
+      earliestCommitToRetain =
+          commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
    }
+    return earliestCommitToRetain;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCommitArchiveLog.java
@@ -39,6 +39,12 @@ import com.uber.hoodie.exception.HoodieCommitException;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
@@ -46,225 +52,233 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Optional;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
 /**
 * Archiver to bound the growth of <action>.commit files
 */
 public class HoodieCommitArchiveLog {
-    private static Logger log = LogManager.getLogger(HoodieCommitArchiveLog.class);

-    private final Path archiveFilePath;
-    private final FileSystem fs;
-    private final HoodieWriteConfig config;
-    private HoodieLogFormat.Writer writer;
+  private static Logger log = LogManager.getLogger(HoodieCommitArchiveLog.class);

-    public HoodieCommitArchiveLog(HoodieWriteConfig config, FileSystem fs) {
-        this.fs = fs;
-        this.config = config;
-        this.archiveFilePath = HoodieArchivedTimeline
-            .getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
+  private final Path archiveFilePath;
+  private final FileSystem fs;
+  private final HoodieWriteConfig config;
+  private HoodieLogFormat.Writer writer;
+
+  public HoodieCommitArchiveLog(HoodieWriteConfig config, FileSystem fs) {
+    this.fs = fs;
+    this.config = config;
+    this.archiveFilePath = HoodieArchivedTimeline
+        .getArchiveLogPath(config.getBasePath() + "/" + HoodieTableMetaClient.METAFOLDER_NAME);
+  }
+
+  private HoodieLogFormat.Writer openWriter() {
+    try {
+      if (this.writer == null) {
+        return HoodieLogFormat.newWriterBuilder()
+            .onParentPath(archiveFilePath.getParent())
+            .withFileId(archiveFilePath.getName())
+            .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION)
+            .withFs(fs)
+            .overBaseCommit("").build();
+      } else {
+        return this.writer;
+      }
+    } catch (InterruptedException | IOException e) {
+      throw new HoodieException("Unable to initialize HoodieLogFormat writer", e);
+    }
+  }
+
+  private void close() {
+    try {
+      if (this.writer != null) {
+        this.writer.close();
+      }
+    } catch (IOException e) {
+      throw new HoodieException("Unable to close HoodieLogFormat writer", e);
+    }
+  }
+
+  /**
+   * Check if commits need to be archived. If yes, archive commits.
+   */
+  public boolean archiveIfRequired() {
+    try {
+      List<HoodieInstant> instantsToArchive = getInstantsToArchive().collect(Collectors.toList());
+      boolean success = true;
+      if (instantsToArchive.iterator().hasNext()) {
+        this.writer = openWriter();
+        log.info("Archiving instants " + instantsToArchive);
+        archive(instantsToArchive);
+        success = deleteArchivedInstants(instantsToArchive);
+      } else {
+        log.info("No Instants to archive");
+      }
+      return success;
+    } finally {
+      close();
+    }
+  }
+
+  private Stream<HoodieInstant> getInstantsToArchive() {
+
+    // TODO : rename to max/minInstantsToKeep
+    int maxCommitsToKeep = config.getMaxCommitsToKeep();
+    int minCommitsToKeep = config.getMinCommitsToKeep();
+
+    HoodieTable table = HoodieTable
+        .getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
+
+    // GroupBy each action and limit each action timeline to maxCommitsToKeep
+    HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline()
+        .getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION,
+            HoodieTimeline.ROLLBACK_ACTION));
+    Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
+        .collect(Collectors.groupingBy(s -> s.getAction()))
+        .entrySet()
+        .stream()
+        .map(i -> {
+          if (i.getValue().size() > maxCommitsToKeep) {
+            return i.getValue().subList(0, i.getValue().size() - minCommitsToKeep);
+          } else {
+            return new ArrayList<HoodieInstant>();
+          }
+        })
+        .flatMap(i -> i.stream());
+
+    //TODO (na) : Add a way to return actions associated with a timeline and then merge/unify with logic above to avoid Stream.concats
+    HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
+    // We cannot have any holes in the commit timeline. We cannot archive any commits which are made after the first savepoint present.
+    Optional<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
+    if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
+      // Actually do the commits
+      instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
+        // if no savepoint present, then dont filter
+        return !(firstSavepoint.isPresent() && HoodieTimeline
+            .compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
+                HoodieTimeline.LESSER_OR_EQUAL));
+      }).limit(commitTimeline.countInstants() - minCommitsToKeep));
    }

-    private HoodieLogFormat.Writer openWriter() {
-        try {
-            if(this.writer == null) {
-                return HoodieLogFormat.newWriterBuilder()
-                        .onParentPath(archiveFilePath.getParent())
-                        .withFileId(archiveFilePath.getName())
-                        .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION)
-                        .withFs(fs)
-                        .overBaseCommit("").build();
-            } else {
-                return this.writer;
-            }
-        } catch(InterruptedException | IOException e) {
-            throw new HoodieException("Unable to initialize HoodieLogFormat writer", e);
+    return instants;
+  }
+
+  private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) {
+    log.info("Deleting instants " + archivedInstants);
+    HoodieTableMetaClient metaClient =
+        new HoodieTableMetaClient(fs, config.getBasePath(), true);
+
+    boolean success = true;
+    for (HoodieInstant archivedInstant : archivedInstants) {
+      Path commitFile =
+          new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
+      try {
+        if (fs.exists(commitFile)) {
+          success &= fs.delete(commitFile, false);
+          log.info("Archived and deleted instant file " + commitFile);
        }
+      } catch (IOException e) {
+        throw new HoodieIOException("Failed to delete archived instant " + archivedInstant,
+            e);
+      }
    }
+    return success;
+  }

-    private void close() {
-        try {
-            if(this.writer != null) {
-                this.writer.close();
-            }
-        } catch(IOException e) {
-            throw new HoodieException("Unable to close HoodieLogFormat writer", e);
-        }
+  public void archive(List<HoodieInstant> instants) throws HoodieCommitException {
+
+    try {
+      HoodieTableMetaClient metaClient =
+          new HoodieTableMetaClient(fs, config.getBasePath(), true);
+      HoodieTimeline commitTimeline =
+          metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
+
+      Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
+      log.info("Wrapper schema " + wrapperSchema.toString());
+      List<IndexedRecord> records = new ArrayList<>();
+      for (HoodieInstant hoodieInstant : instants) {
+        records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
+      }
+      HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, wrapperSchema);
+      this.writer = writer.appendBlock(block);
+    } catch (Exception e) {
+      throw new HoodieCommitException("Failed to archive commits", e);
    }
+  }

-    /**
-     * Check if commits need to be archived. If yes, archive commits.
-     */
-    public boolean archiveIfRequired() {
-        try {
-            List<HoodieInstant> instantsToArchive = getInstantsToArchive().collect(Collectors.toList());
-            boolean success = true;
-            if (instantsToArchive.iterator().hasNext()) {
-                this.writer = openWriter();
-                log.info("Archiving instants " + instantsToArchive);
-                archive(instantsToArchive);
-                success = deleteArchivedInstants(instantsToArchive);
-            } else {
-                log.info("No Instants to archive");
-            }
-            return success;
-        } finally {
-            close();
-        }
+  public Path getArchiveFilePath() {
+    return archiveFilePath;
+  }
+
+  private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline,
+      HoodieInstant hoodieInstant) throws IOException {
+    HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
+    archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
+    switch (hoodieInstant.getAction()) {
+      case HoodieTimeline.CLEAN_ACTION: {
+        archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieCleanMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.clean.name());
+        break;
+      }
+      case HoodieTimeline.COMMIT_ACTION: {
+        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
+        archivedMetaWrapper.setActionType(ActionType.commit.name());
+        break;
+      }
+      case HoodieTimeline.COMPACTION_ACTION: {
+        com.uber.hoodie.common.model.HoodieCompactionMetadata compactionMetadata = com.uber.hoodie.common.model.HoodieCompactionMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper
+            .setHoodieCompactionMetadata(compactionMetadataConverter(compactionMetadata));
+        archivedMetaWrapper.setActionType(ActionType.compaction.name());
+        break;
+      }
+      case HoodieTimeline.ROLLBACK_ACTION: {
+        archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieRollbackMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.rollback.name());
+        break;
+      }
+      case HoodieTimeline.SAVEPOINT_ACTION: {
+        archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils
+            .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(),
+                HoodieSavepointMetadata.class));
+        archivedMetaWrapper.setActionType(ActionType.savepoint.name());
+        break;
+      }
+      case HoodieTimeline.DELTA_COMMIT_ACTION: {
+        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+            .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
+        archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
+        archivedMetaWrapper.setActionType(ActionType.commit.name());
+        break;
+      }
    }
+    return archivedMetaWrapper;
+  }

-    private Stream<HoodieInstant> getInstantsToArchive() {
+  private com.uber.hoodie.avro.model.HoodieCommitMetadata commitMetadataConverter(
+      HoodieCommitMetadata hoodieCommitMetadata) {
+    ObjectMapper mapper = new ObjectMapper();
+    //Need this to ignore other public get() methods
+    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    com.uber.hoodie.avro.model.HoodieCommitMetadata avroMetaData =
+        mapper.convertValue(hoodieCommitMetadata,
+            com.uber.hoodie.avro.model.HoodieCommitMetadata.class);
+    return avroMetaData;
+  }

-        // TODO : rename to max/minInstantsToKeep
-        int maxCommitsToKeep = config.getMaxCommitsToKeep();
-        int minCommitsToKeep = config.getMinCommitsToKeep();
-
-        HoodieTable table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(fs, config.getBasePath(), true), config);
-
-        // GroupBy each action and limit each action timeline to maxCommitsToKeep
-        HoodieTimeline cleanAndRollbackTimeline = table.getActiveTimeline().getTimelineOfActions(Sets.newHashSet(HoodieTimeline.CLEAN_ACTION,
-                HoodieTimeline.ROLLBACK_ACTION));
-        Stream<HoodieInstant> instants = cleanAndRollbackTimeline.getInstants()
-            .collect(Collectors.groupingBy(s -> s.getAction()))
-            .entrySet()
-            .stream()
-            .map(i -> {
-                if (i.getValue().size() > maxCommitsToKeep) {
-                    return i.getValue().subList(0, i.getValue().size() - minCommitsToKeep);
-                } else {
-                    return new ArrayList<HoodieInstant>();
-                }
-            })
-            .flatMap(i -> i.stream());
-
-        //TODO (na) : Add a way to return actions associated with a timeline and then merge/unify with logic above to avoid Stream.concats
-        HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
-        // We cannot have any holes in the commit timeline. We cannot archive any commits which are made after the first savepoint present.
-        Optional<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
-        if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
-            // Actually do the commits
-            instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
-                // if no savepoint present, then dont filter
-                return !(firstSavepoint.isPresent() && HoodieTimeline
-                    .compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
-                        HoodieTimeline.LESSER_OR_EQUAL));
-            }).limit(commitTimeline.countInstants() - minCommitsToKeep));
-        }
-
-        return instants;
-    }
-
-    private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) {
-        log.info("Deleting instants " + archivedInstants);
-        HoodieTableMetaClient metaClient =
-                new HoodieTableMetaClient(fs, config.getBasePath(), true);
-
-        boolean success = true;
-        for (HoodieInstant archivedInstant : archivedInstants) {
-            Path commitFile =
-                new Path(metaClient.getMetaPath(), archivedInstant.getFileName());
-            try {
-                if (fs.exists(commitFile)) {
-                    success &= fs.delete(commitFile, false);
-                    log.info("Archived and deleted instant file " + commitFile);
-                }
-            } catch (IOException e) {
-                throw new HoodieIOException("Failed to delete archived instant " + archivedInstant,
-                    e);
-            }
-        }
-        return success;
-    }
-
-    public void archive(List<HoodieInstant> instants) throws HoodieCommitException {
-
-        try {
-            HoodieTableMetaClient metaClient =
-                    new HoodieTableMetaClient(fs, config.getBasePath(), true);
-            HoodieTimeline commitTimeline =
-                    metaClient.getActiveTimeline().getAllCommitsTimeline().filterCompletedInstants();
-
-            Schema wrapperSchema = HoodieArchivedMetaEntry.getClassSchema();
-            log.info("Wrapper schema " + wrapperSchema.toString());
-            List<IndexedRecord> records = new ArrayList<>();
-            for (HoodieInstant hoodieInstant : instants) {
-                records.add(convertToAvroRecord(commitTimeline, hoodieInstant));
-            }
-            HoodieAvroDataBlock block = new HoodieAvroDataBlock(records, wrapperSchema);
-            this.writer = writer.appendBlock(block);
-        } catch(Exception e) {
-            throw new HoodieCommitException("Failed to archive commits", e);
-        }
-    }
-
-    public Path getArchiveFilePath() {
-        return archiveFilePath;
-    }
-
-    private IndexedRecord convertToAvroRecord(HoodieTimeline commitTimeline, HoodieInstant hoodieInstant) throws IOException {
-        HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
-        archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
-        switch(hoodieInstant.getAction()) {
-            case HoodieTimeline.CLEAN_ACTION:{
-                archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.clean.name());
-                break;
-            }
-            case HoodieTimeline.COMMIT_ACTION:{
-                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
-                archivedMetaWrapper.setActionType(ActionType.commit.name());
-                break;
-            }
-            case HoodieTimeline.COMPACTION_ACTION:{
-                com.uber.hoodie.common.model.HoodieCompactionMetadata compactionMetadata = com.uber.hoodie.common.model.HoodieCompactionMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCompactionMetadata(compactionMetadataConverter(compactionMetadata));
-                archivedMetaWrapper.setActionType(ActionType.compaction.name());
-                break;
-            }
-            case HoodieTimeline.ROLLBACK_ACTION:{
-                archivedMetaWrapper.setHoodieRollbackMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.rollback.name());
-                break;
-            }
-            case HoodieTimeline.SAVEPOINT_ACTION:{
-                archivedMetaWrapper.setHoodieSavePointMetadata(AvroUtils.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class));
-                archivedMetaWrapper.setActionType(ActionType.savepoint.name());
-                break;
-            }
-            case HoodieTimeline.DELTA_COMMIT_ACTION:{
-                HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                        .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
-                archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
-                archivedMetaWrapper.setActionType(ActionType.commit.name());
-                break;
-            }
-        }
-        return archivedMetaWrapper;
-    }
-
-    private com.uber.hoodie.avro.model.HoodieCommitMetadata commitMetadataConverter(HoodieCommitMetadata hoodieCommitMetadata) {
-        ObjectMapper mapper = new ObjectMapper();
-        //Need this to ignore other public get() methods
-        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        com.uber.hoodie.avro.model.HoodieCommitMetadata avroMetaData =
-                mapper.convertValue(hoodieCommitMetadata, com.uber.hoodie.avro.model.HoodieCommitMetadata.class);
-        return avroMetaData;
-    }
-
-    private com.uber.hoodie.avro.model.HoodieCompactionMetadata compactionMetadataConverter(HoodieCompactionMetadata hoodieCompactionMetadata) {
-         ObjectMapper mapper = new ObjectMapper();
-        mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        com.uber.hoodie.avro.model.HoodieCompactionMetadata avroMetaData = mapper.convertValue(hoodieCompactionMetadata,
-                 com.uber.hoodie.avro.model.HoodieCompactionMetadata.class);
-        return avroMetaData;
-    }
+  private com.uber.hoodie.avro.model.HoodieCompactionMetadata compactionMetadataConverter(
+      HoodieCompactionMetadata hoodieCompactionMetadata) {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+    com.uber.hoodie.avro.model.HoodieCompactionMetadata avroMetaData = mapper
+        .convertValue(hoodieCompactionMetadata,
+            com.uber.hoodie.avro.model.HoodieCompactionMetadata.class);
+    return avroMetaData;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
@@ -29,116 +29,111 @@ import com.uber.hoodie.exception.HoodieInsertException;
 import com.uber.hoodie.io.storage.HoodieStorageWriter;
 import com.uber.hoodie.io.storage.HoodieStorageWriterFactory;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.Optional;
+import java.util.UUID;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.TaskContext;

-import java.io.IOException;
-import java.util.Optional;
-import java.util.UUID;
-
 public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieCreateHandle.class);

-    private final WriteStatus status;
-    private final HoodieStorageWriter<IndexedRecord> storageWriter;
-    private final Path path;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
+  private static Logger logger = LogManager.getLogger(HoodieCreateHandle.class);

-    public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
-                              HoodieTable<T> hoodieTable, String partitionPath) {
-        super(config, commitTime, hoodieTable);
-        this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        status.setFileId(UUID.randomUUID().toString());
-        status.setPartitionPath(partitionPath);
+  private final WriteStatus status;
+  private final HoodieStorageWriter<IndexedRecord> storageWriter;
+  private final Path path;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;

-        this.path = makeNewPath(partitionPath, TaskContext.getPartitionId(), status.getFileId());
-        try {
-            HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
-                                       commitTime,
-                                       new Path(config.getBasePath()),
-                                       new Path(config.getBasePath(), partitionPath));
-            partitionMetadata.trySave(TaskContext.getPartitionId());
-            this.storageWriter =
-                HoodieStorageWriterFactory.getStorageWriter(commitTime, path, hoodieTable, config, schema);
-        } catch (IOException e) {
-            throw new HoodieInsertException(
-                "Failed to initialize HoodieStorageWriter for path " + path, e);
-        }
-        logger.info("New InsertHandle for partition :" + partitionPath);
+  public HoodieCreateHandle(HoodieWriteConfig config, String commitTime,
+      HoodieTable<T> hoodieTable, String partitionPath) {
+    super(config, commitTime, hoodieTable);
+    this.status = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    status.setFileId(UUID.randomUUID().toString());
+    status.setPartitionPath(partitionPath);
+
+    this.path = makeNewPath(partitionPath, TaskContext.getPartitionId(), status.getFileId());
+    try {
+      HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
+          commitTime,
+          new Path(config.getBasePath()),
+          new Path(config.getBasePath(), partitionPath));
+      partitionMetadata.trySave(TaskContext.getPartitionId());
+      this.storageWriter =
+          HoodieStorageWriterFactory
+              .getStorageWriter(commitTime, path, hoodieTable, config, schema);
+    } catch (IOException e) {
+      throw new HoodieInsertException(
+          "Failed to initialize HoodieStorageWriter for path " + path, e);
    }
+    logger.info("New InsertHandle for partition :" + partitionPath);
+  }

-    /**
-     * Determines whether we can accept the incoming records, into the current file, depending on
-     *
-     * - Whether it belongs to the same partitionPath as existing records
-     * - Whether the current file written bytes lt max file size
-     *
-     * @return
-     */
-    public boolean canWrite(HoodieRecord record) {
-        return storageWriter.canWrite() && record.getPartitionPath()
-            .equals(status.getPartitionPath());
+  /**
+   * Determines whether we can accept the incoming records, into the current file, depending on
+   *
+   * - Whether it belongs to the same partitionPath as existing records - Whether the current file
+   * written bytes lt max file size
+   */
+  public boolean canWrite(HoodieRecord record) {
+    return storageWriter.canWrite() && record.getPartitionPath()
+        .equals(status.getPartitionPath());
+  }
+
+  /**
+   * Perform the actual writing of the given record into the backing file.
+   */
+  public void write(HoodieRecord record) {
+    Optional recordMetadata = record.getData().getMetadata();
+    try {
+      Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
+
+      if (avroRecord.isPresent()) {
+        storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
+        // update the new location of record, so we know where to find it next
+        record.setNewLocation(new HoodieRecordLocation(commitTime, status.getFileId()));
+        recordsWritten++;
+      } else {
+        recordsDeleted++;
+      }
+      record.deflate();
+      status.markSuccess(record, recordMetadata);
+    } catch (Throwable t) {
+      // Not throwing exception from here, since we don't want to fail the entire job
+      // for a single record
+      status.markFailure(record, t, recordMetadata);
+      logger.error("Error writing record " + record, t);
    }
+  }

-    /**
-     * Perform the actual writing of the given record into the backing file.
-     *
-     * @param record
-     */
-    public void write(HoodieRecord record) {
-        Optional recordMetadata = record.getData().getMetadata();
-        try {
-            Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
+  /**
+   * Performs actions to durably, persist the current changes and returns a WriteStatus object
+   */
+  public WriteStatus close() {
+    logger.info(
+        "Closing the file " + status.getFileId() + " as we are done with all the records "
+            + recordsWritten);
+    try {
+      storageWriter.close();

-            if(avroRecord.isPresent()) {
-                storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
-                // update the new location of record, so we know where to find it next
-                record.setNewLocation(new HoodieRecordLocation(commitTime, status.getFileId()));
-                recordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
-            record.deflate();
-            status.markSuccess(record, recordMetadata);
-        } catch (Throwable t) {
-            // Not throwing exception from here, since we don't want to fail the entire job
-            // for a single record
-            status.markFailure(record, t, recordMetadata);
-            logger.error("Error writing record " + record, t);
-        }
-    }
-
-    /**
-     * Performs actions to durably, persist the current changes and returns a WriteStatus object
-     *
-     * @return
-     */
-    public WriteStatus close() {
-        logger.info(
-            "Closing the file " + status.getFileId() + " as we are done with all the records "
-                + recordsWritten);
-        try {
-            storageWriter.close();
-
-            HoodieWriteStat stat = new HoodieWriteStat();
-            stat.setNumWrites(recordsWritten);
-            stat.setNumDeletes(recordsDeleted);
-            stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
-            stat.setFileId(status.getFileId());
-            String relativePath = path.toString().replace(new Path(config.getBasePath()) + "/", "");
-            stat.setPath(relativePath);
-            stat.setTotalWriteBytes(FSUtils.getFileSize(fs, path));
-            stat.setTotalWriteErrors(status.getFailedRecords().size());
-            status.setStat(stat);
-
-            return status;
-        } catch (IOException e) {
-            throw new HoodieInsertException("Failed to close the Insert Handle for path " + path,
-                e);
-        }
+      HoodieWriteStat stat = new HoodieWriteStat();
+      stat.setNumWrites(recordsWritten);
+      stat.setNumDeletes(recordsDeleted);
+      stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
+      stat.setFileId(status.getFileId());
+      String relativePath = path.toString().replace(new Path(config.getBasePath()) + "/", "");
+      stat.setPath(relativePath);
+      stat.setTotalWriteBytes(FSUtils.getFileSize(fs, path));
+      stat.setTotalWriteErrors(status.getFailedRecords().size());
+      status.setStat(stat);
+
+      return status;
+    } catch (IOException e) {
+      throw new HoodieInsertException("Failed to close the Insert Handle for path " + path,
+          e);
    }
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieIOHandle.java
@@ -24,6 +24,7 @@ import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -31,68 +32,67 @@ import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;

-import java.io.IOException;
-
 public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
-    private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
-    protected final String commitTime;
-    protected final HoodieWriteConfig config;
-    protected final FileSystem fs;
-    protected final HoodieTable<T> hoodieTable;
-    protected HoodieTimeline hoodieTimeline;
-    protected TableFileSystemView.ReadOptimizedView fileSystemView;
-    protected final Schema schema;

-    public HoodieIOHandle(HoodieWriteConfig config, String commitTime,
-                          HoodieTable<T> hoodieTable) {
-        this.commitTime = commitTime;
-        this.config = config;
-        this.fs = FSUtils.getFs();
-        this.hoodieTable = hoodieTable;
-        this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
-        this.fileSystemView = hoodieTable.getROFileSystemView();
-        this.schema =
-            HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
+  private static Logger logger = LogManager.getLogger(HoodieIOHandle.class);
+  protected final String commitTime;
+  protected final HoodieWriteConfig config;
+  protected final FileSystem fs;
+  protected final HoodieTable<T> hoodieTable;
+  protected HoodieTimeline hoodieTimeline;
+  protected TableFileSystemView.ReadOptimizedView fileSystemView;
+  protected final Schema schema;
+
+  public HoodieIOHandle(HoodieWriteConfig config, String commitTime,
+      HoodieTable<T> hoodieTable) {
+    this.commitTime = commitTime;
+    this.config = config;
+    this.fs = FSUtils.getFs();
+    this.hoodieTable = hoodieTable;
+    this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
+    this.fileSystemView = hoodieTable.getROFileSystemView();
+    this.schema =
+        HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
+  }
+
+  public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
+    Path path = new Path(config.getBasePath(), partitionPath);
+    try {
+      fs.mkdirs(path); // create a new partition as needed.
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to make dir " + path, e);
    }

-    public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
-        Path path = new Path(config.getBasePath(), partitionPath);
-        try {
-            fs.mkdirs(path); // create a new partition as needed.
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to make dir " + path, e);
+    return new Path(path.toString(),
+        FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
+  }
+
+  /**
+   * Deletes any new tmp files written during the current commit, into the partition
+   */
+  public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config,
+      String commitTime,
+      String partitionPath,
+      int taskPartitionId) {
+    FileSystem fs = FSUtils.getFs();
+    try {
+      FileStatus[] prevFailedFiles = fs.globStatus(new Path(String
+          .format("%s/%s/%s", config.getBasePath(), partitionPath,
+              FSUtils.maskWithoutFileId(commitTime, taskPartitionId))));
+      if (prevFailedFiles != null) {
+        logger.info("Deleting " + prevFailedFiles.length
+            + " files generated by previous failed attempts.");
+        for (FileStatus status : prevFailedFiles) {
+          fs.delete(status.getPath(), false);
        }
-
-        return new Path(path.toString(),
-            FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName));
+      }
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime,
+          e);
    }
+  }

-    /**
-     * Deletes any new tmp files written during the current commit, into the partition
-     */
-    public static void cleanupTmpFilesFromCurrentCommit(HoodieWriteConfig config,
-                                                        String commitTime,
-                                                        String partitionPath,
-                                                        int taskPartitionId) {
-        FileSystem fs = FSUtils.getFs();
-        try {
-            FileStatus[] prevFailedFiles = fs.globStatus(new Path(String
-                .format("%s/%s/%s", config.getBasePath(), partitionPath,
-                    FSUtils.maskWithoutFileId(commitTime, taskPartitionId))));
-            if (prevFailedFiles != null) {
-                logger.info("Deleting " + prevFailedFiles.length
-                    + " files generated by previous failed attempts.");
-                for (FileStatus status : prevFailedFiles) {
-                    fs.delete(status.getPath(), false);
-                }
-            }
-        } catch (IOException e) {
-            throw new HoodieIOException("Failed to cleanup Temp files from commit " + commitTime,
-                e);
-        }
-    }
-
-    public Schema getSchema() {
-        return schema;
-    }
+  public Schema getSchema() {
+    return schema;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
@@ -16,19 +16,23 @@

 package com.uber.hoodie.io;

-import com.uber.hoodie.common.model.HoodiePartitionMetadata;
-import com.uber.hoodie.common.util.ReflectionUtils;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.WriteStatus;
+import com.uber.hoodie.common.model.HoodiePartitionMetadata;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieWriteStat;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.common.util.ReflectionUtils;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieUpsertException;
 import com.uber.hoodie.io.storage.HoodieStorageWriter;
 import com.uber.hoodie.io.storage.HoodieStorageWriterFactory;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Optional;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
@@ -36,197 +40,197 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.TaskContext;

-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Optional;
-
@SuppressWarnings("Duplicates")
 public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHandle<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);

-    private WriteStatus writeStatus;
-    private HashMap<String, HoodieRecord<T>> keyToNewRecords;
-    private HoodieStorageWriter<IndexedRecord> storageWriter;
-    private Path newFilePath;
-    private Path oldFilePath;
-    private long recordsWritten = 0;
-    private long recordsDeleted = 0;
-    private long updatedRecordsWritten = 0;
+  private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);

-    public HoodieMergeHandle(HoodieWriteConfig config,
-                             String commitTime,
-                             HoodieTable<T> hoodieTable,
-                             Iterator<HoodieRecord<T>> recordItr,
-                             String fileId) {
-        super(config, commitTime, hoodieTable);
-        init(fileId, recordItr);
-    }
+  private WriteStatus writeStatus;
+  private HashMap<String, HoodieRecord<T>> keyToNewRecords;
+  private HoodieStorageWriter<IndexedRecord> storageWriter;
+  private Path newFilePath;
+  private Path oldFilePath;
+  private long recordsWritten = 0;
+  private long recordsDeleted = 0;
+  private long updatedRecordsWritten = 0;

-    /**
-     * Load the new incoming records in a map, and extract the old file path.
-     */
-    private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
-        WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
-        writeStatus.setStat(new HoodieWriteStat());
-        this.writeStatus = writeStatus;
-        this.keyToNewRecords = new HashMap<>();
+  public HoodieMergeHandle(HoodieWriteConfig config,
+      String commitTime,
+      HoodieTable<T> hoodieTable,
+      Iterator<HoodieRecord<T>> recordItr,
+      String fileId) {
+    super(config, commitTime, hoodieTable);
+    init(fileId, recordItr);
+  }

-        try {
-            // Load the new records in a map
-            while (newRecordsItr.hasNext()) {
-                HoodieRecord<T> record = newRecordsItr.next();
-                // If the first record, we need to extract some info out
-                if (oldFilePath == null) {
-                    String latestValidFilePath = fileSystemView
-                            .getLatestDataFiles(record.getPartitionPath())
-                            .filter(dataFile -> dataFile.getFileId().equals(fileId))
-                            .findFirst()
-                            .get().getFileName();
-                    writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));
+  /**
+   * Load the new incoming records in a map, and extract the old file path.
+   */
+  private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
+    WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
+    writeStatus.setStat(new HoodieWriteStat());
+    this.writeStatus = writeStatus;
+    this.keyToNewRecords = new HashMap<>();

-                    HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
-                                                       commitTime,
-                                                       new Path(config.getBasePath()),
-                                                       new Path(config.getBasePath(), record.getPartitionPath()));
-                    partitionMetadata.trySave(TaskContext.getPartitionId());
+    try {
+      // Load the new records in a map
+      while (newRecordsItr.hasNext()) {
+        HoodieRecord<T> record = newRecordsItr.next();
+        // If the first record, we need to extract some info out
+        if (oldFilePath == null) {
+          String latestValidFilePath = fileSystemView
+              .getLatestDataFiles(record.getPartitionPath())
+              .filter(dataFile -> dataFile.getFileId().equals(fileId))
+              .findFirst()
+              .get().getFileName();
+          writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath));

-                    oldFilePath = new Path(
-                        config.getBasePath() + "/" + record.getPartitionPath() + "/"
-                            + latestValidFilePath);
-                    String relativePath = new Path( record.getPartitionPath() + "/" + FSUtils
-                            .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
-                    newFilePath = new Path(config.getBasePath(), relativePath);
+          HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
+              commitTime,
+              new Path(config.getBasePath()),
+              new Path(config.getBasePath(), record.getPartitionPath()));
+          partitionMetadata.trySave(TaskContext.getPartitionId());

-                    // handle cases of partial failures, for update task
-                    if (fs.exists(newFilePath)) {
-                        fs.delete(newFilePath, false);
-                    }
+          oldFilePath = new Path(
+              config.getBasePath() + "/" + record.getPartitionPath() + "/"
+                  + latestValidFilePath);
+          String relativePath = new Path(record.getPartitionPath() + "/" + FSUtils
+              .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
+          newFilePath = new Path(config.getBasePath(), relativePath);

-                    logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
-                        oldFilePath.toString(), newFilePath.toString()));
-                    // file name is same for all records, in this bunch
-                    writeStatus.setFileId(fileId);
-                    writeStatus.setPartitionPath(record.getPartitionPath());
-                    writeStatus.getStat().setFileId(fileId);
-                    writeStatus.getStat().setPath(relativePath);
-                }
-                keyToNewRecords.put(record.getRecordKey(), record);
-                // update the new location of the record, so we know where to find it next
-                record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
-            }
-            // Create the writer for writing the new version file
-            storageWriter = HoodieStorageWriterFactory
-                .getStorageWriter(commitTime, newFilePath, hoodieTable, config, schema);
+          // handle cases of partial failures, for update task
+          if (fs.exists(newFilePath)) {
+            fs.delete(newFilePath, false);
+          }

-        } catch (Exception e) {
-            logger.error("Error in update task at commit " + commitTime, e);
-            writeStatus.setGlobalError(e);
-            throw new HoodieUpsertException(
-                "Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
-                    + commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), e);
+          logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
+              oldFilePath.toString(), newFilePath.toString()));
+          // file name is same for all records, in this bunch
+          writeStatus.setFileId(fileId);
+          writeStatus.setPartitionPath(record.getPartitionPath());
+          writeStatus.getStat().setFileId(fileId);
+          writeStatus.getStat().setPath(relativePath);
        }
+        keyToNewRecords.put(record.getRecordKey(), record);
+        // update the new location of the record, so we know where to find it next
+        record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
+      }
+      // Create the writer for writing the new version file
+      storageWriter = HoodieStorageWriterFactory
+          .getStorageWriter(commitTime, newFilePath, hoodieTable, config, schema);
+
+    } catch (Exception e) {
+      logger.error("Error in update task at commit " + commitTime, e);
+      writeStatus.setGlobalError(e);
+      throw new HoodieUpsertException(
+          "Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
+              + commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), e);
    }
+  }


-    private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) {
-        Optional recordMetadata = hoodieRecord.getData().getMetadata();
-        try {
-            if(indexedRecord.isPresent()) {
-                storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
-                recordsWritten++;
-                updatedRecordsWritten++;
-            } else {
-                recordsDeleted++;
-            }
+  private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord,
+      Optional<IndexedRecord> indexedRecord) {
+    Optional recordMetadata = hoodieRecord.getData().getMetadata();
+    try {
+      if (indexedRecord.isPresent()) {
+        storageWriter.writeAvroWithMetadata(indexedRecord.get(), hoodieRecord);
+        recordsWritten++;
+        updatedRecordsWritten++;
+      } else {
+        recordsDeleted++;
+      }

-            hoodieRecord.deflate();
-            writeStatus.markSuccess(hoodieRecord, recordMetadata);
-            return true;
-        } catch (Exception e) {
-            logger.error("Error writing record  "+ hoodieRecord, e);
-            writeStatus.markFailure(hoodieRecord, e, recordMetadata);
-        }
-        return false;
+      hoodieRecord.deflate();
+      writeStatus.markSuccess(hoodieRecord, recordMetadata);
+      return true;
+    } catch (Exception e) {
+      logger.error("Error writing record  " + hoodieRecord, e);
+      writeStatus.markFailure(hoodieRecord, e, recordMetadata);
    }
+    return false;
+  }

-    /**
-     * Go through an old record. Here if we detect a newer version shows up, we write the new one to the file.
-     */
-    public void write(GenericRecord oldRecord) {
-        String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
-        boolean copyOldRecord = true;
-        if (keyToNewRecords.containsKey(key)) {
-            try {
-                Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData().combineAndGetUpdateValue(oldRecord, schema);
-                if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
+  /**
+   * Go through an old record. Here if we detect a newer version shows up, we write the new one to
+   * the file.
+   */
+  public void write(GenericRecord oldRecord) {
+    String key = oldRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+    HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
+    boolean copyOldRecord = true;
+    if (keyToNewRecords.containsKey(key)) {
+      try {
+        Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData()
+            .combineAndGetUpdateValue(oldRecord, schema);
+        if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) {
                    /* ONLY WHEN
                     * 1) we have an update for this key AND
                     * 2) We are able to successfully write the the combined new value
                     *
                     * We no longer need to copy the old record over.
                     */
-                    copyOldRecord = false;
-                }
-                keyToNewRecords.remove(key);
-            } catch (Exception e) {
-                throw new HoodieUpsertException("Failed to combine/merge new record with old value in storage, for new record {"
-                        + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
-            }
-        }
-
-        if (copyOldRecord) {
-            // this should work as it is, since this is an existing record
-            String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
-                + getOldFilePath() + " to new file " + newFilePath;
-            try {
-                storageWriter.writeAvro(key, oldRecord);
-            } catch (ClassCastException e) {
-                logger.error(
-                    "Schema mismatch when rewriting old record " + oldRecord + " from file "
-                        + getOldFilePath() + " to file " + newFilePath + " with schema " + schema
-                        .toString(true));
-                throw new HoodieUpsertException(errMsg, e);
-            } catch (IOException e) {
-                logger.error("Failed to merge old record into new file for key " + key + " from old file "
-                    + getOldFilePath() + " to new file " + newFilePath, e);
-                throw new HoodieUpsertException(errMsg, e);
-            }
-            recordsWritten ++;
+          copyOldRecord = false;
        }
+        keyToNewRecords.remove(key);
+      } catch (Exception e) {
+        throw new HoodieUpsertException(
+            "Failed to combine/merge new record with old value in storage, for new record {"
+                + keyToNewRecords.get(key) + "}, old value {" + oldRecord + "}", e);
+      }
    }

-    public void close() {
-        try {
-            // write out any pending records (this can happen when inserts are turned into updates)
-            Iterator<String> pendingRecordsItr = keyToNewRecords.keySet().iterator();
-            while (pendingRecordsItr.hasNext()) {
-                String key = pendingRecordsItr.next();
-                HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
-                writeUpdateRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema));
-            }
-            keyToNewRecords.clear();
-
-            if (storageWriter != null) {
-                storageWriter.close();
-            }
-
-            writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, newFilePath));
-            writeStatus.getStat().setNumWrites(recordsWritten);
-            writeStatus.getStat().setNumDeletes(recordsDeleted);
-            writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten);
-            writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
-        } catch (IOException e) {
-            throw new HoodieUpsertException("Failed to close UpdateHandle", e);
-        }
+    if (copyOldRecord) {
+      // this should work as it is, since this is an existing record
+      String errMsg = "Failed to merge old record into new file for key " + key + " from old file "
+          + getOldFilePath() + " to new file " + newFilePath;
+      try {
+        storageWriter.writeAvro(key, oldRecord);
+      } catch (ClassCastException e) {
+        logger.error(
+            "Schema mismatch when rewriting old record " + oldRecord + " from file "
+                + getOldFilePath() + " to file " + newFilePath + " with schema " + schema
+                .toString(true));
+        throw new HoodieUpsertException(errMsg, e);
+      } catch (IOException e) {
+        logger.error("Failed to merge old record into new file for key " + key + " from old file "
+            + getOldFilePath() + " to new file " + newFilePath, e);
+        throw new HoodieUpsertException(errMsg, e);
+      }
+      recordsWritten++;
    }
+  }

-    public Path getOldFilePath() {
-        return oldFilePath;
-    }
+  public void close() {
+    try {
+      // write out any pending records (this can happen when inserts are turned into updates)
+      Iterator<String> pendingRecordsItr = keyToNewRecords.keySet().iterator();
+      while (pendingRecordsItr.hasNext()) {
+        String key = pendingRecordsItr.next();
+        HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
+        writeUpdateRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema));
+      }
+      keyToNewRecords.clear();

-    public WriteStatus getWriteStatus() {
-        return writeStatus;
+      if (storageWriter != null) {
+        storageWriter.close();
+      }
+
+      writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, newFilePath));
+      writeStatus.getStat().setNumWrites(recordsWritten);
+      writeStatus.getStat().setNumDeletes(recordsDeleted);
+      writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten);
+      writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size());
+    } catch (IOException e) {
+      throw new HoodieUpsertException("Failed to close UpdateHandle", e);
    }
+  }
+
+  public Path getOldFilePath() {
+    return oldFilePath;
+  }
+
+  public WriteStatus getWriteStatus() {
+    return writeStatus;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/CompactionOperation.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.io.compact;

 import com.uber.hoodie.common.model.HoodieDataFile;
 import com.uber.hoodie.common.model.HoodieLogFile;
-
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.io.compact.strategy.CompactionStrategy;
 import java.io.Serializable;
@@ -27,8 +26,8 @@ import java.util.Map;
 import java.util.stream.Collectors;

 /**
- * Encapsulates all the needed information about a compaction
- * and make a decision whether this compaction is effective or not
+ * Encapsulates all the needed information about a compaction and make a decision whether this
+ * compaction is effective or not
 *
 * @see CompactionStrategy
 */
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieCompactor.java
@@ -22,29 +22,28 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
-import org.apache.spark.api.java.JavaSparkContext;
-
 import java.io.Serializable;
 import java.util.Date;
+import org.apache.spark.api.java.JavaSparkContext;

 /**
 * A HoodieCompactor runs compaction on a hoodie table
 */
 public interface HoodieCompactor extends Serializable {
-    /**
-     * Compact the delta files with the data files
-     * @throws Exception
-     */
-    HoodieCompactionMetadata compact(JavaSparkContext jsc, final HoodieWriteConfig config,
-        HoodieTable hoodieTable) throws Exception;
+
+  /**
+   * Compact the delta files with the data files
+   */
+  HoodieCompactionMetadata compact(JavaSparkContext jsc, final HoodieWriteConfig config,
+      HoodieTable hoodieTable) throws Exception;


-    // Helper methods
-    default String startCompactionCommit(HoodieTable hoodieTable) {
-        String commitTime = HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
-        HoodieActiveTimeline activeTimeline = hoodieTable.getActiveTimeline();
-        activeTimeline
-            .createInflight(new HoodieInstant(true, HoodieTimeline.COMPACTION_ACTION, commitTime));
-        return commitTime;
-    }
+  // Helper methods
+  default String startCompactionCommit(HoodieTable hoodieTable) {
+    String commitTime = HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
+    HoodieActiveTimeline activeTimeline = hoodieTable.getActiveTimeline();
+    activeTimeline
+        .createInflight(new HoodieInstant(true, HoodieTimeline.COMPACTION_ACTION, commitTime));
+    return commitTime;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
@@ -16,14 +16,14 @@

 package com.uber.hoodie.io.compact;

+import static java.util.stream.Collectors.toList;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.CompactionWriteStat;
-import com.uber.hoodie.common.model.HoodieAvroPayload;
 import com.uber.hoodie.common.model.HoodieCompactionMetadata;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieTableType;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
@@ -36,7 +36,12 @@ import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieCompactionException;
 import com.uber.hoodie.table.HoodieCopyOnWriteTable;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 import org.apache.avro.Schema;
@@ -46,18 +51,10 @@ import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.api.java.function.FlatMapFunction;

-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Optional;
-
-import static java.util.stream.Collectors.*;
-
 /**
- * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage.
- * Computes all possible compactions, passes it through a CompactionFilter and executes
- * all the compactions and writes a new version of base files and make a normal commit
+ * HoodieRealtimeTableCompactor compacts a hoodie table with merge on read storage. Computes all
+ * possible compactions, passes it through a CompactionFilter and executes all the compactions and
+ * writes a new version of base files and make a normal commit
 *
 * @see HoodieCompactor
 */
@@ -80,7 +77,8 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    String compactionCommit = startCompactionCommit(hoodieTable);
    log.info("Compacting " + metaClient.getBasePath() + " with commit " + compactionCommit);
    List<String> partitionPaths =
-        FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), config.shouldAssumeDatePartitioning());
+        FSUtils.getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(),
+            config.shouldAssumeDatePartitioning());

    log.info("Compaction looking for files to compact in " + partitionPaths + " partitions");
    List<CompactionOperation> operations =
@@ -89,7 +87,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
                .getRTFileSystemView()
                .getLatestFileSlices(partitionPath)
                .map(s -> new CompactionOperation(s.getDataFile().get(),
-                        partitionPath, s.getLogFiles().collect(Collectors.toList()), config))
+                    partitionPath, s.getLogFiles().collect(Collectors.toList()), config))
                .collect(toList()).iterator()).collect();
    log.info("Total of " + operations.size() + " compactions are retrieved");

@@ -150,14 +148,15 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    // Since a DeltaCommit is not defined yet, reading all the records. revisit this soon.

    String maxInstantTime = metaClient.getActiveTimeline()
-            .getTimelineOfActions(
-                    Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
-                            HoodieTimeline.COMPACTION_ACTION,
-                            HoodieTimeline.DELTA_COMMIT_ACTION))
-            .filterCompletedInstants().lastInstant().get().getTimestamp();
+        .getTimelineOfActions(
+            Sets.newHashSet(HoodieTimeline.COMMIT_ACTION,
+                HoodieTimeline.COMPACTION_ACTION,
+                HoodieTimeline.DELTA_COMMIT_ACTION))
+        .filterCompletedInstants().lastInstant().get().getTimestamp();

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, metaClient.getBasePath(),
-            operation.getDeltaFilePaths(), readerSchema, maxInstantTime);
+    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs,
+        metaClient.getBasePath(),
+        operation.getDeltaFilePaths(), readerSchema, maxInstantTime);
    if (!scanner.iterator().hasNext()) {
      return Lists.newArrayList();
    }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/BoundedIOCompactionStrategy.java
@@ -28,8 +28,8 @@ import java.util.Map;
 import java.util.Optional;

 /**
- * CompactionStrategy which looks at total IO to be done for the compaction (read + write)
- * and limits the list of compactions to be under a configured limit on the IO
+ * CompactionStrategy which looks at total IO to be done for the compaction (read + write) and
+ * limits the list of compactions to be under a configured limit on the IO
 *
 * @see CompactionStrategy
 */
@@ -46,7 +46,7 @@ public class BoundedIOCompactionStrategy implements CompactionStrategy {
    // Total size of all the log files
    Long totalLogFileSize = logFiles.stream().map(HoodieLogFile::getFileSize).filter(
        Optional::isPresent).map(Optional::get).reduce(
-            (size1, size2) -> size1 + size2).orElse(0L);
+        (size1, size2) -> size1 + size2).orElse(0L);
    // Total read will be the base file + all the log files
    Long totalIORead = FSUtils.getSizeInMB(dataFile.getFileSize() + totalLogFileSize);
    // Total write will be similar to the size of the base file
@@ -62,7 +62,8 @@ public class BoundedIOCompactionStrategy implements CompactionStrategy {
  }

  @Override
-  public List<CompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig, List<CompactionOperation> operations) {
+  public List<CompactionOperation> orderAndFilter(HoodieWriteConfig writeConfig,
+      List<CompactionOperation> operations) {
    // Iterate through the operations in order and accept operations as long as we are within the IO limit
    // Preserves the original ordering of compactions
    List<CompactionOperation> finalOperations = Lists.newArrayList();
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/CompactionStrategy.java
@@ -25,12 +25,12 @@ import java.util.List;
 import java.util.Map;

 /**
- * Strategy for compaction. Pluggable implementation of define how compaction should be done.
- * The implementations of this interface can capture the relevant metrics to order and filter
- * the final list of compaction operation to run in a single compaction.
+ * Strategy for compaction. Pluggable implementation of define how compaction should be done. The
+ * implementations of this interface can capture the relevant metrics to order and filter the final
+ * list of compaction operation to run in a single compaction.
 *
- * Implementation of CompactionStrategy cannot hold any state.
- * Difference instantiations can be passed in every time
+ * Implementation of CompactionStrategy cannot hold any state. Difference instantiations can be
+ * passed in every time
 *
 * @see com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor
 * @see CompactionOperation
@@ -38,8 +38,8 @@ import java.util.Map;
 public interface CompactionStrategy extends Serializable {

  /**
-   * Callback hook when a CompactionOperation is created. Individual strategies can
-   * capture the metrics they need to decide on the priority.
+   * Callback hook when a CompactionOperation is created. Individual strategies can capture the
+   * metrics they need to decide on the priority.
   *
   * @param dataFile - Base file to compact
   * @param partitionPath - Partition path
@@ -50,8 +50,8 @@ public interface CompactionStrategy extends Serializable {
      List<HoodieLogFile> logFiles);

  /**
-   * Order and Filter the list of compactions. Use the metrics captured with the
-   * captureMetrics to order and filter out compactions
+   * Order and Filter the list of compactions. Use the metrics captured with the captureMetrics to
+   * order and filter out compactions
   *
   * @param writeConfig - HoodieWriteConfig - config for this compaction is passed in
   * @param operations - list of compactions collected
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/LogFileSizeBasedCompactionStrategy.java
@@ -27,8 +27,8 @@ import java.util.Optional;
 import java.util.stream.Collectors;

 /**
- * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size
- * and limits the compactions within a configured IO bound
+ * LogFileSizeBasedCompactionStrategy orders the compactions based on the total log files size and
+ * limits the compactions within a configured IO bound
 *
 * @see BoundedIOCompactionStrategy
 * @see CompactionStrategy
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/strategy/UnBoundedCompactionStrategy.java
@@ -25,9 +25,9 @@ import java.util.List;
 import java.util.Map;

 /**
- * UnBoundedCompactionStrategy will not change ordering or filter any compaction.
- * It is a pass-through and will compact all the base files which has a log file.
- * This usually means no-intelligence on compaction.
+ * UnBoundedCompactionStrategy will not change ordering or filter any compaction. It is a
+ * pass-through and will compact all the base files which has a log file. This usually means
+ * no-intelligence on compaction.
 *
 * @see CompactionStrategy
 */
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetConfig.java
@@ -17,50 +17,50 @@
 package com.uber.hoodie.io.storage;

 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
-import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;

 public class HoodieParquetConfig {
-    private HoodieAvroWriteSupport writeSupport;
-    private CompressionCodecName compressionCodecName;
-    private int blockSize;
-    private int pageSize;
-    private int maxFileSize;
-    private Configuration hadoopConf;

-    public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport,
-        CompressionCodecName compressionCodecName, int blockSize, int pageSize, int maxFileSize,
-        Configuration hadoopConf) {
-        this.writeSupport = writeSupport;
-        this.compressionCodecName = compressionCodecName;
-        this.blockSize = blockSize;
-        this.pageSize = pageSize;
-        this.maxFileSize = maxFileSize;
-        this.hadoopConf = hadoopConf;
-    }
+  private HoodieAvroWriteSupport writeSupport;
+  private CompressionCodecName compressionCodecName;
+  private int blockSize;
+  private int pageSize;
+  private int maxFileSize;
+  private Configuration hadoopConf;

-    public HoodieAvroWriteSupport getWriteSupport() {
-        return writeSupport;
-    }
+  public HoodieParquetConfig(HoodieAvroWriteSupport writeSupport,
+      CompressionCodecName compressionCodecName, int blockSize, int pageSize, int maxFileSize,
+      Configuration hadoopConf) {
+    this.writeSupport = writeSupport;
+    this.compressionCodecName = compressionCodecName;
+    this.blockSize = blockSize;
+    this.pageSize = pageSize;
+    this.maxFileSize = maxFileSize;
+    this.hadoopConf = hadoopConf;
+  }

-    public CompressionCodecName getCompressionCodecName() {
-        return compressionCodecName;
-    }
+  public HoodieAvroWriteSupport getWriteSupport() {
+    return writeSupport;
+  }

-    public int getBlockSize() {
-        return blockSize;
-    }
+  public CompressionCodecName getCompressionCodecName() {
+    return compressionCodecName;
+  }

-    public int getPageSize() {
-        return pageSize;
-    }
+  public int getBlockSize() {
+    return blockSize;
+  }

-    public int getMaxFileSize() {
-        return maxFileSize;
-    }
+  public int getPageSize() {
+    return pageSize;
+  }

-    public Configuration getHadoopConf() {
-        return hadoopConf;
-    }
+  public int getMaxFileSize() {
+    return maxFileSize;
+  }
+
+  public Configuration getHadoopConf() {
+    return hadoopConf;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieParquetWriter.java
@@ -20,6 +20,8 @@ import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
@@ -30,79 +32,76 @@ import org.apache.parquet.hadoop.ParquetFileWriter;
 import org.apache.parquet.hadoop.ParquetWriter;
 import org.apache.spark.TaskContext;

-import java.io.IOException;
-import java.util.concurrent.atomic.AtomicLong;
-
 /**
- * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file.
- * Provides a way to check if the current file can take more records with the <code>canWrite()</code>
- *
- * @param <T>
+ * HoodieParquetWriter extends the ParquetWriter to help limit the size of underlying file. Provides
+ * a way to check if the current file can take more records with the <code>canWrite()</code>
 */
 public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends IndexedRecord>
    extends ParquetWriter<IndexedRecord> implements HoodieStorageWriter<R> {
-    private static double STREAM_COMPRESSION_RATIO = 0.1;
-    private static AtomicLong recordIndex = new AtomicLong(1);
+
+  private static double STREAM_COMPRESSION_RATIO = 0.1;
+  private static AtomicLong recordIndex = new AtomicLong(1);


-    private final Path file;
-    private final HoodieWrapperFileSystem fs;
-    private final long maxFileSize;
-    private final HoodieAvroWriteSupport writeSupport;
-    private final String commitTime;
-    private final Schema schema;
+  private final Path file;
+  private final HoodieWrapperFileSystem fs;
+  private final long maxFileSize;
+  private final HoodieAvroWriteSupport writeSupport;
+  private final String commitTime;
+  private final Schema schema;


-    private static Configuration registerFileSystem(Configuration conf) {
-        Configuration returnConf = new Configuration(conf);
-        String scheme = FileSystem.getDefaultUri(conf).getScheme();
-        returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl",
-            HoodieWrapperFileSystem.class.getName());
-        return returnConf;
-    }
+  private static Configuration registerFileSystem(Configuration conf) {
+    Configuration returnConf = new Configuration(conf);
+    String scheme = FileSystem.getDefaultUri(conf).getScheme();
+    returnConf.set("fs." + HoodieWrapperFileSystem.getHoodieScheme(scheme) + ".impl",
+        HoodieWrapperFileSystem.class.getName());
+    return returnConf;
+  }

-    public HoodieParquetWriter(String commitTime, Path file,
-        HoodieParquetConfig parquetConfig, Schema schema) throws IOException {
-        super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
-            ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
-            parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(),
-            parquetConfig.getPageSize(), parquetConfig.getPageSize(),
-            ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
-            ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION,
-            registerFileSystem(parquetConfig.getHadoopConf()));
-        this.file =
-            HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
-        this.fs = (HoodieWrapperFileSystem) this.file
-            .getFileSystem(registerFileSystem(parquetConfig.getHadoopConf()));
-        // We cannot accurately measure the snappy compressed output file size. We are choosing a conservative 10%
-        // TODO - compute this compression ratio dynamically by looking at the bytes written to the stream and the actual file size reported by HDFS
-        this.maxFileSize = parquetConfig.getMaxFileSize() + Math
-            .round(parquetConfig.getMaxFileSize() * STREAM_COMPRESSION_RATIO);
-        this.writeSupport = parquetConfig.getWriteSupport();
-        this.commitTime = commitTime;
-        this.schema = schema;
-    }
+  public HoodieParquetWriter(String commitTime, Path file,
+      HoodieParquetConfig parquetConfig, Schema schema) throws IOException {
+    super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
+        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
+        parquetConfig.getCompressionCodecName(), parquetConfig.getBlockSize(),
+        parquetConfig.getPageSize(), parquetConfig.getPageSize(),
+        ParquetWriter.DEFAULT_IS_DICTIONARY_ENABLED,
+        ParquetWriter.DEFAULT_IS_VALIDATING_ENABLED, ParquetWriter.DEFAULT_WRITER_VERSION,
+        registerFileSystem(parquetConfig.getHadoopConf()));
+    this.file =
+        HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
+    this.fs = (HoodieWrapperFileSystem) this.file
+        .getFileSystem(registerFileSystem(parquetConfig.getHadoopConf()));
+    // We cannot accurately measure the snappy compressed output file size. We are choosing a conservative 10%
+    // TODO - compute this compression ratio dynamically by looking at the bytes written to the stream and the actual file size reported by HDFS
+    this.maxFileSize = parquetConfig.getMaxFileSize() + Math
+        .round(parquetConfig.getMaxFileSize() * STREAM_COMPRESSION_RATIO);
+    this.writeSupport = parquetConfig.getWriteSupport();
+    this.commitTime = commitTime;
+    this.schema = schema;
+  }


-    @Override
-    public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
-        String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
-                recordIndex.getAndIncrement());
-        HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord,
-                record.getRecordKey(),
-                record.getPartitionPath(),
-                file.getName());
-        HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
-        super.write(avroRecord);
-        writeSupport.add(record.getRecordKey());
-    }
+  @Override
+  public void writeAvroWithMetadata(R avroRecord, HoodieRecord record) throws IOException {
+    String seqId = HoodieRecord.generateSequenceId(commitTime, TaskContext.getPartitionId(),
+        recordIndex.getAndIncrement());
+    HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord,
+        record.getRecordKey(),
+        record.getPartitionPath(),
+        file.getName());
+    HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord, commitTime, seqId);
+    super.write(avroRecord);
+    writeSupport.add(record.getRecordKey());
+  }

-    public boolean canWrite() {
-        return fs.getBytesWritten(file) < maxFileSize;
-    }
+  public boolean canWrite() {
+    return fs.getBytesWritten(file) < maxFileSize;
+  }

-    @Override public void writeAvro(String key, IndexedRecord object) throws IOException {
-        super.write(object);
-        writeSupport.add(key);
-    }
+  @Override
+  public void writeAvro(String key, IndexedRecord object) throws IOException {
+    super.write(object);
+    writeSupport.add(key);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriter.java
@@ -17,13 +17,16 @@
 package com.uber.hoodie.io.storage;

 import com.uber.hoodie.common.model.HoodieRecord;
+import java.io.IOException;
 import org.apache.avro.generic.IndexedRecord;

-import java.io.IOException;
-
 public interface HoodieStorageWriter<R extends IndexedRecord> {
-    void writeAvroWithMetadata(R newRecord, HoodieRecord record) throws IOException;
-    boolean canWrite();
-    void close() throws IOException;
-    void writeAvro(String key, R oldRecord) throws IOException;
+
+  void writeAvroWithMetadata(R newRecord, HoodieRecord record) throws IOException;
+
+  boolean canWrite();
+
+  void close() throws IOException;
+
+  void writeAvro(String key, R oldRecord) throws IOException;
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
@@ -16,42 +16,42 @@

 package com.uber.hoodie.io.storage;

-import com.uber.hoodie.common.table.HoodieTableMetaClient;
-import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.avro.HoodieAvroWriteSupport;
 import com.uber.hoodie.common.BloomFilter;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;

-import java.io.IOException;
-
 public class HoodieStorageWriterFactory {
-    public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
-            String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config, Schema schema)
-        throws IOException {
-        //TODO - based on the metadata choose the implementation of HoodieStorageWriter
-        // Currently only parquet is supported
-        return newParquetStorageWriter(commitTime, path, config, schema);
-    }

-    private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
-            String commitTime, Path path, HoodieWriteConfig config, Schema schema) throws IOException {
-        BloomFilter filter =
-            new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP());
-        HoodieAvroWriteSupport writeSupport =
-            new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);
+  public static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> getStorageWriter(
+      String commitTime, Path path, HoodieTable<T> hoodieTable, HoodieWriteConfig config,
+      Schema schema)
+      throws IOException {
+    //TODO - based on the metadata choose the implementation of HoodieStorageWriter
+    // Currently only parquet is supported
+    return newParquetStorageWriter(commitTime, path, config, schema);
+  }

-        HoodieParquetConfig parquetConfig =
-            new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
-                config.getParquetBlockSize(), config.getParquetPageSize(),
-                config.getParquetMaxFileSize(), FSUtils.getFs().getConf());
+  private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieStorageWriter<R> newParquetStorageWriter(
+      String commitTime, Path path, HoodieWriteConfig config, Schema schema) throws IOException {
+    BloomFilter filter =
+        new BloomFilter(config.getBloomFilterNumEntries(), config.getBloomFilterFPP());
+    HoodieAvroWriteSupport writeSupport =
+        new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, filter);

-        return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
-    }
+    HoodieParquetConfig parquetConfig =
+        new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
+            config.getParquetBlockSize(), config.getParquetPageSize(),
+            config.getParquetMaxFileSize(), FSUtils.getFs().getConf());
+
+    return new HoodieParquetWriter<>(commitTime, path, parquetConfig, schema);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieWrapperFileSystem.java
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/SizeAwareFSDataOutputStream.java
@@ -16,44 +16,47 @@

 package com.uber.hoodie.io.storage;

-import org.apache.hadoop.fs.FSDataOutputStream;
-
 import java.io.IOException;
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.hadoop.fs.FSDataOutputStream;

 /**
- * Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes.
- * This gives a cheap way to check on the underlying file size.
+ * Wrapper over <code>FSDataOutputStream</code> to keep track of the size of the written bytes. This
+ * gives a cheap way to check on the underlying file size.
 */
 public class SizeAwareFSDataOutputStream extends FSDataOutputStream {
-    // A callback to call when the output stream is closed.
-    private final Runnable closeCallback;
-    // Keep track of the bytes written
-    private final AtomicLong bytesWritten = new AtomicLong(0L);

-    public SizeAwareFSDataOutputStream(FSDataOutputStream out, Runnable closeCallback)
-        throws IOException {
-        super(out);
-        this.closeCallback = closeCallback;
-    }
+  // A callback to call when the output stream is closed.
+  private final Runnable closeCallback;
+  // Keep track of the bytes written
+  private final AtomicLong bytesWritten = new AtomicLong(0L);

-    @Override public synchronized void write(byte[] b, int off, int len) throws IOException {
-        bytesWritten.addAndGet(len);
-        super.write(b, off, len);
-    }
+  public SizeAwareFSDataOutputStream(FSDataOutputStream out, Runnable closeCallback)
+      throws IOException {
+    super(out);
+    this.closeCallback = closeCallback;
+  }

-    @Override public void write(byte[] b) throws IOException {
-        bytesWritten.addAndGet(b.length);
-        super.write(b);
-    }
+  @Override
+  public synchronized void write(byte[] b, int off, int len) throws IOException {
+    bytesWritten.addAndGet(len);
+    super.write(b, off, len);
+  }

-    @Override public void close() throws IOException {
-        super.close();
-        closeCallback.run();
-    }
+  @Override
+  public void write(byte[] b) throws IOException {
+    bytesWritten.addAndGet(b.length);
+    super.write(b);
+  }

-    public long getBytesWritten() {
-        return bytesWritten.get();
-    }
+  @Override
+  public void close() throws IOException {
+    super.close();
+    closeCallback.run();
+  }
+
+  public long getBytesWritten() {
+    return bytesWritten.get();
+  }

 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/HoodieMetrics.java
@@ -22,7 +22,6 @@ import com.codahale.metrics.Timer;
 import com.google.common.annotations.VisibleForTesting;
 import com.uber.hoodie.common.model.HoodieCommitMetadata;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;

@@ -30,119 +29,122 @@ import org.apache.log4j.Logger;
 * Wrapper for metrics-related operations.
 */
 public class HoodieMetrics {
-    private HoodieWriteConfig config = null;
-    private String tableName = null;
-    private static Logger logger = LogManager.getLogger(HoodieMetrics.class);
-    // Some timers
-    public String rollbackTimerName = null;
-    public String cleanTimerName = null;
-    public  String commitTimerName = null;
-    private Timer rollbackTimer = null;
-    private Timer cleanTimer = null;
-    private Timer commitTimer = null;

-    public HoodieMetrics(HoodieWriteConfig config, String tableName) {
-        this.config = config;
-        this.tableName = tableName;
-        if (config.isMetricsOn()) {
-            Metrics.init(config);
-            this.rollbackTimerName = getMetricsName("timer", "rollback");
-            this.cleanTimerName = getMetricsName("timer", "clean");
-            this.commitTimerName = getMetricsName("timer", "commit");
+  private HoodieWriteConfig config = null;
+  private String tableName = null;
+  private static Logger logger = LogManager.getLogger(HoodieMetrics.class);
+  // Some timers
+  public String rollbackTimerName = null;
+  public String cleanTimerName = null;
+  public String commitTimerName = null;
+  private Timer rollbackTimer = null;
+  private Timer cleanTimer = null;
+  private Timer commitTimer = null;
+
+  public HoodieMetrics(HoodieWriteConfig config, String tableName) {
+    this.config = config;
+    this.tableName = tableName;
+    if (config.isMetricsOn()) {
+      Metrics.init(config);
+      this.rollbackTimerName = getMetricsName("timer", "rollback");
+      this.cleanTimerName = getMetricsName("timer", "clean");
+      this.commitTimerName = getMetricsName("timer", "commit");
+    }
+  }
+
+  private Timer createTimer(String name) {
+    return config.isMetricsOn() ? Metrics.getInstance().getRegistry().timer(name) : null;
+  }
+
+  public Timer.Context getRollbackCtx() {
+    if (config.isMetricsOn() && rollbackTimer == null) {
+      rollbackTimer = createTimer(rollbackTimerName);
+    }
+    return rollbackTimer == null ? null : rollbackTimer.time();
+  }
+
+  public Timer.Context getCleanCtx() {
+    if (config.isMetricsOn() && cleanTimer == null) {
+      cleanTimer = createTimer(cleanTimerName);
+    }
+    return cleanTimer == null ? null : cleanTimer.time();
+  }
+
+  public Timer.Context getCommitCtx() {
+    if (config.isMetricsOn() && commitTimer == null) {
+      commitTimer = createTimer(commitTimerName);
+    }
+    return commitTimer == null ? null : commitTimer.time();
+  }
+
+  public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs,
+      HoodieCommitMetadata metadata) {
+    if (config.isMetricsOn()) {
+      long totalPartitionsWritten = metadata.fetchTotalPartitionsWritten();
+      long totalFilesInsert = metadata.fetchTotalFilesInsert();
+      long totalFilesUpdate = metadata.fetchTotalFilesUpdated();
+      long totalRecordsWritten = metadata.fetchTotalRecordsWritten();
+      long totalUpdateRecordsWritten = metadata.fetchTotalUpdateRecordsWritten();
+      long totalInsertRecordsWritten = metadata.fetchTotalInsertRecordsWritten();
+      long totalBytesWritten = metadata.fetchTotalBytesWritten();
+      registerGauge(getMetricsName("commit", "duration"), durationInMs);
+      registerGauge(getMetricsName("commit", "totalPartitionsWritten"), totalPartitionsWritten);
+      registerGauge(getMetricsName("commit", "totalFilesInsert"), totalFilesInsert);
+      registerGauge(getMetricsName("commit", "totalFilesUpdate"), totalFilesUpdate);
+      registerGauge(getMetricsName("commit", "totalRecordsWritten"), totalRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalUpdateRecordsWritten"),
+          totalUpdateRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalInsertRecordsWritten"),
+          totalInsertRecordsWritten);
+      registerGauge(getMetricsName("commit", "totalBytesWritten"), totalBytesWritten);
+      registerGauge(getMetricsName("commit", "commitTime"), commitEpochTimeInMs);
+    }
+  }
+
+  public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
+    if (config.isMetricsOn()) {
+      logger.info(String.format("Sending rollback metrics (duration=%d, numFilesDeleted=$d)",
+          durationInMs, numFilesDeleted));
+      registerGauge(getMetricsName("rollback", "duration"), durationInMs);
+      registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
+    }
+  }
+
+  public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
+    if (config.isMetricsOn()) {
+      logger.info(String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)",
+          durationInMs, numFilesDeleted));
+      registerGauge(getMetricsName("clean", "duration"), durationInMs);
+      registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
+    }
+  }
+
+  @VisibleForTesting
+  String getMetricsName(String action, String metric) {
+    return config == null ? null :
+        String.format("%s.%s.%s", tableName, action, metric);
+  }
+
+  void registerGauge(String metricName, final long value) {
+    try {
+      MetricRegistry registry = Metrics.getInstance().getRegistry();
+      registry.register(metricName, new Gauge<Long>() {
+        @Override
+        public Long getValue() {
+          return value;
        }
+      });
+    } catch (Exception e) {
+      // Here we catch all exception, so the major upsert pipeline will not be affected if the metrics system
+      // has some issues.
+      logger.error("Failed to send metrics: ", e);
    }
+  }

-    private Timer createTimer(String name) {
-        return config.isMetricsOn() ? Metrics.getInstance().getRegistry().timer(name) : null;
-    }
-
-    public Timer.Context getRollbackCtx() {
-        if (config.isMetricsOn() && rollbackTimer == null) {
-            rollbackTimer = createTimer(rollbackTimerName);
-        }
-        return rollbackTimer == null ? null : rollbackTimer.time();
-    }
-
-    public Timer.Context getCleanCtx() {
-        if (config.isMetricsOn() && cleanTimer == null) {
-            cleanTimer = createTimer(cleanTimerName);
-        }
-        return cleanTimer == null ? null : cleanTimer.time();
-    }
-
-    public Timer.Context getCommitCtx() {
-        if (config.isMetricsOn() && commitTimer == null) {
-            commitTimer = createTimer(commitTimerName);
-        }
-        return commitTimer == null ? null : commitTimer.time();
-    }
-
-    public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs, HoodieCommitMetadata metadata) {
-        if (config.isMetricsOn()) {
-            long totalPartitionsWritten = metadata.fetchTotalPartitionsWritten();
-            long totalFilesInsert = metadata.fetchTotalFilesInsert();
-            long totalFilesUpdate = metadata.fetchTotalFilesUpdated();
-            long totalRecordsWritten = metadata.fetchTotalRecordsWritten();
-            long totalUpdateRecordsWritten = metadata.fetchTotalUpdateRecordsWritten();
-            long totalInsertRecordsWritten = metadata.fetchTotalInsertRecordsWritten();
-            long totalBytesWritten = metadata.fetchTotalBytesWritten();
-            registerGauge(getMetricsName("commit", "duration"), durationInMs);
-            registerGauge(getMetricsName("commit", "totalPartitionsWritten"), totalPartitionsWritten);
-            registerGauge(getMetricsName("commit", "totalFilesInsert"), totalFilesInsert);
-            registerGauge(getMetricsName("commit", "totalFilesUpdate"), totalFilesUpdate);
-            registerGauge(getMetricsName("commit", "totalRecordsWritten"), totalRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalUpdateRecordsWritten"), totalUpdateRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalInsertRecordsWritten"), totalInsertRecordsWritten);
-            registerGauge(getMetricsName("commit", "totalBytesWritten"), totalBytesWritten);
-            registerGauge(getMetricsName("commit", "commitTime"), commitEpochTimeInMs);
-        }
-    }
-
-    public void updateRollbackMetrics(long durationInMs, long numFilesDeleted) {
-        if (config.isMetricsOn()) {
-            logger.info(String.format("Sending rollback metrics (duration=%d, numFilesDeleted=$d)",
-                    durationInMs, numFilesDeleted));
-            registerGauge(getMetricsName("rollback", "duration"), durationInMs);
-            registerGauge(getMetricsName("rollback", "numFilesDeleted"), numFilesDeleted);
-        }
-    }
-
-    public void updateCleanMetrics(long durationInMs, int numFilesDeleted) {
-        if (config.isMetricsOn()) {
-            logger.info(String.format("Sending clean metrics (duration=%d, numFilesDeleted=%d)",
-                    durationInMs, numFilesDeleted));
-            registerGauge(getMetricsName("clean", "duration"), durationInMs);
-            registerGauge(getMetricsName("clean", "numFilesDeleted"), numFilesDeleted);
-        }
-    }
-
-    @VisibleForTesting
-    String getMetricsName(String action, String metric) {
-        return config == null ? null :
-                String.format("%s.%s.%s", tableName, action, metric);
-    }
-
-    void registerGauge(String metricName, final long value) {
-        try {
-            MetricRegistry registry = Metrics.getInstance().getRegistry();
-            registry.register(metricName, new Gauge<Long>() {
-                @Override
-                public Long getValue() {
-                    return value;
-                }
-            });
-        } catch (Exception e) {
-            // Here we catch all exception, so the major upsert pipeline will not be affected if the metrics system
-            // has some issues.
-            logger.error("Failed to send metrics: ", e);
-        }
-    }
-
-    /**
-     * By default, the timer context returns duration with nano seconds.
-     * Convert it to millisecond.
-     */
-    public long getDurationInMs(long ctxDuration) {
-        return ctxDuration / 1000000;
-    }
+  /**
+   * By default, the timer context returns duration with nano seconds. Convert it to millisecond.
+   */
+  public long getDurationInMs(long ctxDuration) {
+    return ctxDuration / 1000000;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/InMemoryMetricsReporter.java
@@ -22,16 +22,17 @@ import java.io.Closeable;
 * Used for testing.
 */
 public class InMemoryMetricsReporter extends MetricsReporter {
-    @Override
-    public void start() {
-    }

-    @Override
-    public void report() {
-    }
+  @Override
+  public void start() {
+  }

-    @Override
-    public Closeable getReporter() {
-        return null;
-    }
+  @Override
+  public void report() {
+  }
+
+  @Override
+  public Closeable getReporter() {
+    return null;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/Metrics.java
@@ -19,65 +19,64 @@ package com.uber.hoodie.metrics;
 import com.codahale.metrics.MetricRegistry;
 import com.google.common.io.Closeables;
 import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.config.HoodieMetricsConfig;
 import com.uber.hoodie.exception.HoodieException;
-import org.apache.commons.configuration.ConfigurationException;
-
 import java.io.Closeable;
+import org.apache.commons.configuration.ConfigurationException;

 /**
 * This is the main class of the metrics system.
 */
 public class Metrics {
-    private static volatile boolean initialized = false;
-    private static Metrics metrics = null;
-    private final MetricRegistry registry;
-    private MetricsReporter reporter = null;

-    private Metrics(HoodieWriteConfig metricConfig) throws ConfigurationException {
-        registry = new MetricRegistry();
+  private static volatile boolean initialized = false;
+  private static Metrics metrics = null;
+  private final MetricRegistry registry;
+  private MetricsReporter reporter = null;

-        reporter = MetricsReporterFactory.createReporter(metricConfig, registry);
-        if (reporter == null) {
-            throw new RuntimeException("Cannot initialize Reporter.");
-        }
+  private Metrics(HoodieWriteConfig metricConfig) throws ConfigurationException {
+    registry = new MetricRegistry();
+
+    reporter = MetricsReporterFactory.createReporter(metricConfig, registry);
+    if (reporter == null) {
+      throw new RuntimeException("Cannot initialize Reporter.");
+    }
 //        reporter.start();

-        Runtime.getRuntime().addShutdownHook(new Thread() {
-            @Override
-            public void run() {
-                try {
-                    reporter.report();
-                    Closeables.close(reporter.getReporter(), true);
-                } catch (Exception e) {
-                    e.printStackTrace();
-                }
-            }
-        });
-    }
-
-    public static Metrics getInstance() {
-        assert initialized;
-        return metrics;
-    }
-
-    public static synchronized void init(HoodieWriteConfig metricConfig) {
-        if (initialized) {
-            return;
-        }
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
        try {
-            metrics = new Metrics(metricConfig);
-        } catch (ConfigurationException e) {
-            throw new HoodieException(e);
+          reporter.report();
+          Closeables.close(reporter.getReporter(), true);
+        } catch (Exception e) {
+          e.printStackTrace();
        }
-        initialized = true;
-    }
+      }
+    });
+  }

-    public MetricRegistry getRegistry() {
-        return registry;
-    }
+  public static Metrics getInstance() {
+    assert initialized;
+    return metrics;
+  }

-    public Closeable getReporter() {
-        return reporter.getReporter();
+  public static synchronized void init(HoodieWriteConfig metricConfig) {
+    if (initialized) {
+      return;
    }
+    try {
+      metrics = new Metrics(metricConfig);
+    } catch (ConfigurationException e) {
+      throw new HoodieException(e);
+    }
+    initialized = true;
+  }
+
+  public MetricRegistry getRegistry() {
+    return registry;
+  }
+
+  public Closeable getReporter() {
+    return reporter.getReporter();
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsGraphiteReporter.java
@@ -21,75 +21,74 @@ import com.codahale.metrics.MetricRegistry;
 import com.codahale.metrics.graphite.Graphite;
 import com.codahale.metrics.graphite.GraphiteReporter;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
 import java.io.Closeable;
 import java.net.InetSocketAddress;
 import java.util.concurrent.TimeUnit;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;

 /**
- * Implementation of Graphite reporter, which connects to the Graphite server,
- * and send metrics to that server.
+ * Implementation of Graphite reporter, which connects to the Graphite server, and send metrics to
+ * that server.
 */
 public class MetricsGraphiteReporter extends MetricsReporter {
-    private final MetricRegistry registry;
-    private final GraphiteReporter graphiteReporter;
-    private final HoodieWriteConfig config;
-    private String serverHost;
-    private int serverPort;

-    private static Logger logger = LogManager.getLogger(MetricsGraphiteReporter.class);
+  private final MetricRegistry registry;
+  private final GraphiteReporter graphiteReporter;
+  private final HoodieWriteConfig config;
+  private String serverHost;
+  private int serverPort;

-    public MetricsGraphiteReporter(HoodieWriteConfig config, MetricRegistry registry) {
-        this.registry = registry;
-        this.config = config;
+  private static Logger logger = LogManager.getLogger(MetricsGraphiteReporter.class);

-        // Check the serverHost and serverPort here
-        this.serverHost = config.getGraphiteServerHost();
-        this.serverPort = config.getGraphiteServerPort();
-        if (serverHost == null || serverPort == 0) {
-            throw new RuntimeException(
-                    String.format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
-                            serverHost, serverPort));
-        }
+  public MetricsGraphiteReporter(HoodieWriteConfig config, MetricRegistry registry) {
+    this.registry = registry;
+    this.config = config;

-        this.graphiteReporter = createGraphiteReport();
+    // Check the serverHost and serverPort here
+    this.serverHost = config.getGraphiteServerHost();
+    this.serverPort = config.getGraphiteServerPort();
+    if (serverHost == null || serverPort == 0) {
+      throw new RuntimeException(
+          String.format("Graphite cannot be initialized with serverHost[%s] and serverPort[%s].",
+              serverHost, serverPort));
    }

-    @Override
-    public void start() {
-        if (graphiteReporter != null) {
-            graphiteReporter.start(30, TimeUnit.SECONDS);
-        } else {
-            logger.error("Cannot start as the graphiteReporter is null.");
-        }
-    }
+    this.graphiteReporter = createGraphiteReport();
+  }

-    @Override
-    public void report() {
-        if (graphiteReporter != null) {
-            graphiteReporter.report();
-        } else {
-            logger.error("Cannot report metrics as the graphiteReporter is null.");
-        }
+  @Override
+  public void start() {
+    if (graphiteReporter != null) {
+      graphiteReporter.start(30, TimeUnit.SECONDS);
+    } else {
+      logger.error("Cannot start as the graphiteReporter is null.");
    }
+  }

-    @Override
-    public Closeable getReporter() {
-        return graphiteReporter;
+  @Override
+  public void report() {
+    if (graphiteReporter != null) {
+      graphiteReporter.report();
+    } else {
+      logger.error("Cannot report metrics as the graphiteReporter is null.");
    }
+  }

-    private GraphiteReporter createGraphiteReport() {
-        Graphite graphite = new Graphite(
-                new InetSocketAddress(serverHost, serverPort));
-        String reporterPrefix = config.getGraphiteMetricPrefix();
-        return GraphiteReporter.forRegistry(registry)
-                .prefixedWith(reporterPrefix)
-                .convertRatesTo(TimeUnit.SECONDS)
-                .convertDurationsTo(TimeUnit.MILLISECONDS)
-                .filter(MetricFilter.ALL)
-                .build(graphite);
-    }
+  @Override
+  public Closeable getReporter() {
+    return graphiteReporter;
+  }
+
+  private GraphiteReporter createGraphiteReport() {
+    Graphite graphite = new Graphite(
+        new InetSocketAddress(serverHost, serverPort));
+    String reporterPrefix = config.getGraphiteMetricPrefix();
+    return GraphiteReporter.forRegistry(registry)
+        .prefixedWith(reporterPrefix)
+        .convertRatesTo(TimeUnit.SECONDS)
+        .convertDurationsTo(TimeUnit.MILLISECONDS)
+        .filter(MetricFilter.ALL)
+        .build(graphite);
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporter.java
@@ -22,15 +22,16 @@ import java.io.Closeable;
 * Interface for implementing a Reporter.
 */
 public abstract class MetricsReporter {
-    /**
-     * Push out metrics at scheduled intervals
-     */
-    public abstract void start();

-    /**
-     * Deterministically push out metrics
-     */
-    public abstract void report();
+  /**
+   * Push out metrics at scheduled intervals
+   */
+  public abstract void start();

-    public abstract Closeable getReporter();
+  /**
+   * Deterministically push out metrics
+   */
+  public abstract void report();
+
+  public abstract Closeable getReporter();
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterFactory.java
@@ -18,7 +18,6 @@ package com.uber.hoodie.metrics;

 import com.codahale.metrics.MetricRegistry;
 import com.uber.hoodie.config.HoodieWriteConfig;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;

@@ -26,23 +25,24 @@ import org.apache.log4j.Logger;
 * Factory class for creating MetricsReporter.
 */
 public class MetricsReporterFactory {
-    private static Logger logger = LogManager.getLogger(MetricsReporterFactory.class);

-    public static MetricsReporter createReporter(HoodieWriteConfig config,
-                                                 MetricRegistry registry) {
-        MetricsReporterType type = config.getMetricsReporterType();
-        MetricsReporter reporter = null;
-        switch (type) {
-            case GRAPHITE:
-                reporter = new MetricsGraphiteReporter(config, registry);
-                break;
-            case INMEMORY:
-                reporter = new InMemoryMetricsReporter();
-                break;
-            default:
-                logger.error("Reporter type[" + type + "] is not supported.");
-                break;
-        }
-        return reporter;
+  private static Logger logger = LogManager.getLogger(MetricsReporterFactory.class);
+
+  public static MetricsReporter createReporter(HoodieWriteConfig config,
+      MetricRegistry registry) {
+    MetricsReporterType type = config.getMetricsReporterType();
+    MetricsReporter reporter = null;
+    switch (type) {
+      case GRAPHITE:
+        reporter = new MetricsGraphiteReporter(config, registry);
+        break;
+      case INMEMORY:
+        reporter = new InMemoryMetricsReporter();
+        break;
+      default:
+        logger.error("Reporter type[" + type + "] is not supported.");
+        break;
    }
+    return reporter;
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/metrics/MetricsReporterType.java
@@ -17,10 +17,10 @@
 package com.uber.hoodie.metrics;

 /**
- * Types of the reporter. Right now we only support Graphite.
- * We can include JMX and CSV in the future.
+ * Types of the reporter. Right now we only support Graphite. We can include JMX and CSV in the
+ * future.
 */
 public enum MetricsReporterType {
-    GRAPHITE,
-    INMEMORY
+  GRAPHITE,
+  INMEMORY
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieMergeOnReadTable.java
@@ -39,13 +39,6 @@ import com.uber.hoodie.exception.HoodieCompactionException;
 import com.uber.hoodie.exception.HoodieRollbackException;
 import com.uber.hoodie.io.HoodieAppendHandle;
 import com.uber.hoodie.io.compact.HoodieRealtimeTableCompactor;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function;
-
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.Arrays;
@@ -56,179 +49,209 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function;


 /**
 * Implementation of a more real-time read-optimized Hoodie Table where
 *
 * INSERTS - Same as HoodieCopyOnWriteTable - Produce new files, block aligned to desired size (or)
- *           Merge with the smallest existing file, to expand it
+ * Merge with the smallest existing file, to expand it
 *
- * UPDATES - Appends the changes to a rolling log file maintained per file Id.
- * Compaction merges the log file into the base file.
+ * UPDATES - Appends the changes to a rolling log file maintained per file Id. Compaction merges the
+ * log file into the base file.
 *
- * WARNING - MOR table type does not support nested rollbacks, every rollback
- *           must be followed by an attempted commit action
+ * WARNING - MOR table type does not support nested rollbacks, every rollback must be followed by an
+ * attempted commit action
 */
-public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends HoodieCopyOnWriteTable<T> {
-    private static Logger logger = LogManager.getLogger(HoodieMergeOnReadTable.class);
+public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
+    HoodieCopyOnWriteTable<T> {

-    public HoodieMergeOnReadTable(HoodieWriteConfig config,
-        HoodieTableMetaClient metaClient) {
-        super(config, metaClient);
+  private static Logger logger = LogManager.getLogger(HoodieMergeOnReadTable.class);
+
+  public HoodieMergeOnReadTable(HoodieWriteConfig config,
+      HoodieTableMetaClient metaClient) {
+    super(config, metaClient);
+  }
+
+  @Override
+  public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
+      Iterator<HoodieRecord<T>> recordItr) throws IOException {
+    logger.info("Merging updates for commit " + commitTime + " for file " + fileId);
+    HoodieAppendHandle<T> appendHandle =
+        new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
+    appendHandle.doAppend();
+    appendHandle.close();
+    return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus()))
+        .iterator();
+  }
+
+  @Override
+  public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
+    logger.info("Checking if compaction needs to be run on " + config.getBasePath());
+    Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCompactionTimeline()
+        .filterCompletedInstants().lastInstant();
+    String deltaCommitsSinceTs = "0";
+    if (lastCompaction.isPresent()) {
+      deltaCommitsSinceTs = lastCompaction.get().getTimestamp();
    }

-    @Override
-    public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileId,
-        Iterator<HoodieRecord<T>> recordItr) throws IOException {
-        logger.info("Merging updates for commit " + commitTime + " for file " + fileId);
-        HoodieAppendHandle<T> appendHandle =
-            new HoodieAppendHandle<>(config, commitTime, this, fileId, recordItr);
-        appendHandle.doAppend();
-        appendHandle.close();
-        return Collections.singletonList(Collections.singletonList(appendHandle.getWriteStatus()))
-            .iterator();
+    int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline()
+        .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
+    if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
+      logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction
+          + " delta commits was found since last compaction " + deltaCommitsSinceTs
+          + ". Waiting for " + config.getInlineCompactDeltaCommitMax());
+      return Optional.empty();
    }

-    @Override
-    public Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc) {
-        logger.info("Checking if compaction needs to be run on " + config.getBasePath());
-        Optional<HoodieInstant> lastCompaction = getActiveTimeline().getCompactionTimeline()
-            .filterCompletedInstants().lastInstant();
-        String deltaCommitsSinceTs = "0";
-        if (lastCompaction.isPresent()) {
-            deltaCommitsSinceTs = lastCompaction.get().getTimestamp();
-        }
-
-        int deltaCommitsSinceLastCompaction = getActiveTimeline().getDeltaCommitTimeline()
-            .findInstantsAfter(deltaCommitsSinceTs, Integer.MAX_VALUE).countInstants();
-        if (config.getInlineCompactDeltaCommitMax() > deltaCommitsSinceLastCompaction) {
-            logger.info("Not running compaction as only " + deltaCommitsSinceLastCompaction
-                + " delta commits was found since last compaction " + deltaCommitsSinceTs
-                + ". Waiting for " + config.getInlineCompactDeltaCommitMax());
-            return Optional.empty();
-        }
-
-        logger.info("Compacting merge on read table " + config.getBasePath());
-        HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor();
-        try {
-            return Optional.of(compactor.compact(jsc, config, this));
-        } catch (IOException e) {
-            throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e);
-        }
+    logger.info("Compacting merge on read table " + config.getBasePath());
+    HoodieRealtimeTableCompactor compactor = new HoodieRealtimeTableCompactor();
+    try {
+      return Optional.of(compactor.compact(jsc, config, this));
+    } catch (IOException e) {
+      throw new HoodieCompactionException("Could not compact " + config.getBasePath(), e);
    }
+  }

-    @Override
-    public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException {
+  @Override
+  public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
+      throws IOException {

-        //At the moment, MOR table type does not support nested rollbacks
-        if(commits.size() > 1) {
-            throw new UnsupportedOperationException("Nested Rollbacks are not supported");
-        }
-        Map<String, HoodieInstant> commitsAndCompactions =
-             this.getActiveTimeline()
-                .getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION))
-                .getInstants()
-                .filter(i -> commits.contains(i.getTimestamp()))
-                .collect(Collectors.toMap(i -> i.getTimestamp(), i -> i));
-
-        // Atomically un-publish all non-inflight commits
-        commitsAndCompactions.entrySet().stream().map(entry -> entry.getValue())
-                .filter(i -> !i.isInflight()).forEach(this.getActiveTimeline()::revertToInflight);
-
-        logger.info("Unpublished " + commits);
-
-        Long startTime = System.currentTimeMillis();
-
-        List<HoodieRollbackStat> allRollbackStats = commits.stream().map(commit -> {
-            HoodieInstant instant = commitsAndCompactions.get(commit);
-            List<HoodieRollbackStat> stats = null;
-            switch (instant.getAction()) {
-                case HoodieTimeline.COMMIT_ACTION:
-                case HoodieTimeline.COMPACTION_ACTION:
-                    try {
-                        logger.info("Starting to rollback Commit/Compaction " + instant);
-                        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                            .fromBytes(this.getCommitTimeline().getInstantDetails(new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
-
-                        stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream().collect(Collectors.toList()))
-                                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
-                                    Map<FileStatus, Boolean> results = super.deleteCleanedFiles(partitionPath, Arrays.asList(commit));
-                                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                                            .withDeletedFileResults(results).build();
-                                }).collect();
-                        logger.info("Finished rollback of Commit/Compaction " + instant);
-                        break;
-                    } catch (IOException io) {
-                        throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
-                    }
-                case HoodieTimeline.DELTA_COMMIT_ACTION:
-                    try {
-                        logger.info("Starting to rollback delta commit " + instant);
-
-                        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
-                            .fromBytes(this.getCommitTimeline().getInstantDetails(new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
-
-                        stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream().collect(Collectors.toList()))
-                                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
-                                    // read commit file and (either append delete blocks or delete file)
-                                    Map<FileStatus, Boolean> filesToDeletedStatus = new HashMap<>();
-                                    Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
-
-                                    // we do not know fileIds for inserts (first inserts are parquet files), delete all parquet files for the corresponding failed commit, if present (same as COW)
-                                    filesToDeletedStatus = super.deleteCleanedFiles(partitionPath, Arrays.asList(commit));
-
-                                    // append rollback blocks for updates
-                                    commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT).forEach(wStat -> {
-                                        HoodieLogFormat.Writer writer = null;
-                                        try {
-                                            writer = HoodieLogFormat.newWriterBuilder()
-                                                    .onParentPath(new Path(this.getMetaClient().getBasePath(), partitionPath))
-                                                    .withFileId(wStat.getFileId()).overBaseCommit(wStat.getPrevCommit())
-                                                    .withFs(FSUtils.getFs()).withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
-                                            Long numRollbackBlocks = 0L;
-                                            // generate metadata
-                                            Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
-                                            metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME, metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
-                                            metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, commit);
-                                            // if update belongs to an existing log file
-                                            writer.appendBlock(new HoodieCommandBlock(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata));
-                                            numRollbackBlocks++;
-                                            if(wStat.getNumDeletes() > 0) {
-                                                writer.appendBlock(new HoodieCommandBlock(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK, metadata));
-                                                numRollbackBlocks++;
-                                            }
-                                            filesToNumBlocksRollback.put(FSUtils.getFs().getFileStatus(writer.getLogFile().getPath()), numRollbackBlocks);
-                                        } catch (IOException | InterruptedException io) {
-                                            throw new HoodieRollbackException("Failed to rollback for commit " + commit, io);
-                                        } finally {
-                                            try {
-                                                writer.close();
-                                            } catch (IOException io) {
-                                                throw new UncheckedIOException(io);
-                                            }
-                                        }
-                                    });
-                                    return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
-                                            .withDeletedFileResults(filesToDeletedStatus)
-                                            .withRollbackBlockAppendResults(filesToNumBlocksRollback).build();
-                                }).collect();
-                        logger.info("Fnished rollback of delta commit " + instant);
-                        break;
-                    } catch (IOException io) {
-                        throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
-                    }
-            }
-            return stats;
-        }).flatMap(x -> x.stream()).collect(Collectors.toList());
-
-        commitsAndCompactions.entrySet().stream()
-                .map(entry -> new HoodieInstant(true, entry.getValue().getAction(), entry.getValue().getTimestamp()))
-                .forEach(this.getActiveTimeline()::deleteInflight);
-
-        logger.debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
-
-        return allRollbackStats;
+    //At the moment, MOR table type does not support nested rollbacks
+    if (commits.size() > 1) {
+      throw new UnsupportedOperationException("Nested Rollbacks are not supported");
    }
+    Map<String, HoodieInstant> commitsAndCompactions =
+        this.getActiveTimeline()
+            .getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
+                HoodieActiveTimeline.COMPACTION_ACTION, HoodieActiveTimeline.DELTA_COMMIT_ACTION))
+            .getInstants()
+            .filter(i -> commits.contains(i.getTimestamp()))
+            .collect(Collectors.toMap(i -> i.getTimestamp(), i -> i));
+
+    // Atomically un-publish all non-inflight commits
+    commitsAndCompactions.entrySet().stream().map(entry -> entry.getValue())
+        .filter(i -> !i.isInflight()).forEach(this.getActiveTimeline()::revertToInflight);
+
+    logger.info("Unpublished " + commits);
+
+    Long startTime = System.currentTimeMillis();
+
+    List<HoodieRollbackStat> allRollbackStats = commits.stream().map(commit -> {
+      HoodieInstant instant = commitsAndCompactions.get(commit);
+      List<HoodieRollbackStat> stats = null;
+      switch (instant.getAction()) {
+        case HoodieTimeline.COMMIT_ACTION:
+        case HoodieTimeline.COMPACTION_ACTION:
+          try {
+            logger.info("Starting to rollback Commit/Compaction " + instant);
+            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+                .fromBytes(this.getCommitTimeline().getInstantDetails(
+                    new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
+
+            stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream()
+                .collect(Collectors.toList()))
+                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
+                  Map<FileStatus, Boolean> results = super
+                      .deleteCleanedFiles(partitionPath, Arrays.asList(commit));
+                  return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
+                      .withDeletedFileResults(results).build();
+                }).collect();
+            logger.info("Finished rollback of Commit/Compaction " + instant);
+            break;
+          } catch (IOException io) {
+            throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
+          }
+        case HoodieTimeline.DELTA_COMMIT_ACTION:
+          try {
+            logger.info("Starting to rollback delta commit " + instant);
+
+            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+                .fromBytes(this.getCommitTimeline().getInstantDetails(
+                    new HoodieInstant(true, instant.getAction(), instant.getTimestamp())).get());
+
+            stats = jsc.parallelize(commitMetadata.getPartitionToWriteStats().keySet().stream()
+                .collect(Collectors.toList()))
+                .map((Function<String, HoodieRollbackStat>) partitionPath -> {
+                  // read commit file and (either append delete blocks or delete file)
+                  Map<FileStatus, Boolean> filesToDeletedStatus = new HashMap<>();
+                  Map<FileStatus, Long> filesToNumBlocksRollback = new HashMap<>();
+
+                  // we do not know fileIds for inserts (first inserts are parquet files), delete all parquet files for the corresponding failed commit, if present (same as COW)
+                  filesToDeletedStatus = super
+                      .deleteCleanedFiles(partitionPath, Arrays.asList(commit));
+
+                  // append rollback blocks for updates
+                  commitMetadata.getPartitionToWriteStats().get(partitionPath).stream()
+                      .filter(wStat -> wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT)
+                      .forEach(wStat -> {
+                        HoodieLogFormat.Writer writer = null;
+                        try {
+                          writer = HoodieLogFormat.newWriterBuilder()
+                              .onParentPath(
+                                  new Path(this.getMetaClient().getBasePath(), partitionPath))
+                              .withFileId(wStat.getFileId()).overBaseCommit(wStat.getPrevCommit())
+                              .withFs(FSUtils.getFs())
+                              .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+                          Long numRollbackBlocks = 0L;
+                          // generate metadata
+                          Map<HoodieLogBlock.LogMetadataType, String> metadata = Maps.newHashMap();
+                          metadata.put(HoodieLogBlock.LogMetadataType.INSTANT_TIME,
+                              metaClient.getActiveTimeline().lastInstant().get().getTimestamp());
+                          metadata.put(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME, commit);
+                          // if update belongs to an existing log file
+                          writer.appendBlock(new HoodieCommandBlock(
+                              HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK,
+                              metadata));
+                          numRollbackBlocks++;
+                          if (wStat.getNumDeletes() > 0) {
+                            writer.appendBlock(new HoodieCommandBlock(
+                                HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK,
+                                metadata));
+                            numRollbackBlocks++;
+                          }
+                          filesToNumBlocksRollback
+                              .put(FSUtils.getFs().getFileStatus(writer.getLogFile().getPath()),
+                                  numRollbackBlocks);
+                        } catch (IOException | InterruptedException io) {
+                          throw new HoodieRollbackException(
+                              "Failed to rollback for commit " + commit, io);
+                        } finally {
+                          try {
+                            writer.close();
+                          } catch (IOException io) {
+                            throw new UncheckedIOException(io);
+                          }
+                        }
+                      });
+                  return HoodieRollbackStat.newBuilder().withPartitionPath(partitionPath)
+                      .withDeletedFileResults(filesToDeletedStatus)
+                      .withRollbackBlockAppendResults(filesToNumBlocksRollback).build();
+                }).collect();
+            logger.info("Fnished rollback of delta commit " + instant);
+            break;
+          } catch (IOException io) {
+            throw new UncheckedIOException("Failed to rollback for commit " + commit, io);
+          }
+      }
+      return stats;
+    }).flatMap(x -> x.stream()).collect(Collectors.toList());
+
+    commitsAndCompactions.entrySet().stream()
+        .map(entry -> new HoodieInstant(true, entry.getValue().getAction(),
+            entry.getValue().getTimestamp()))
+        .forEach(this.getActiveTimeline()::deleteInflight);
+
+    logger
+        .debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
+
+    return allRollbackStats;
+  }

 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieTable.java
@@ -34,7 +34,6 @@ import com.uber.hoodie.common.util.AvroUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieCommitException;
 import com.uber.hoodie.exception.HoodieException;
-import com.uber.hoodie.exception.HoodieRollbackException;
 import com.uber.hoodie.exception.HoodieSavepointException;
 import java.io.IOException;
 import java.io.Serializable;
@@ -43,8 +42,6 @@ import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -55,291 +52,245 @@ import org.apache.spark.api.java.JavaSparkContext;
 * Abstract implementation of a HoodieTable
 */
 public abstract class HoodieTable<T extends HoodieRecordPayload> implements Serializable {
-    protected final HoodieWriteConfig config;
-    protected final HoodieTableMetaClient metaClient;
-    private static Logger logger = LogManager.getLogger(HoodieTable.class);

-    protected HoodieTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
-        this.config = config;
-        this.metaClient = metaClient;
+  protected final HoodieWriteConfig config;
+  protected final HoodieTableMetaClient metaClient;
+  private static Logger logger = LogManager.getLogger(HoodieTable.class);
+
+  protected HoodieTable(HoodieWriteConfig config, HoodieTableMetaClient metaClient) {
+    this.config = config;
+    this.metaClient = metaClient;
+  }
+
+  /**
+   * Provides a partitioner to perform the upsert operation, based on the workload profile
+   */
+  public abstract Partitioner getUpsertPartitioner(WorkloadProfile profile);
+
+
+  /**
+   * Provides a partitioner to perform the insert operation, based on the workload profile
+   */
+  public abstract Partitioner getInsertPartitioner(WorkloadProfile profile);
+
+
+  /**
+   * Return whether this HoodieTable implementation can benefit from workload profiling
+   */
+  public abstract boolean isWorkloadProfileNeeded();
+
+  public HoodieWriteConfig getConfig() {
+    return config;
+  }
+
+  public HoodieTableMetaClient getMetaClient() {
+    return metaClient;
+  }
+
+  public FileSystem getFs() {
+    return metaClient.getFs();
+  }
+
+  /**
+   * Get the view of the file system for this table
+   */
+  public TableFileSystemView getFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the read optimized view of the file system for this table
+   */
+  public TableFileSystemView.ReadOptimizedView getROFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the real time view of the file system for this table
+   */
+  public TableFileSystemView.RealtimeView getRTFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  }
+
+  /**
+   * Get the completed (commit + compaction) view of the file system for this table
+   */
+  public TableFileSystemView getCompletedFileSystemView() {
+    return new HoodieTableFileSystemView(metaClient, getCommitTimeline());
+  }
+
+  /**
+   * Get only the completed (no-inflights) commit timeline
+   */
+  public HoodieTimeline getCompletedCommitTimeline() {
+    return getCommitTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get only the inflights (no-completed) commit timeline
+   */
+  public HoodieTimeline getInflightCommitTimeline() {
+    return getCommitTimeline().filterInflights();
+  }
+
+
+  /**
+   * Get only the completed (no-inflights) clean timeline
+   */
+  public HoodieTimeline getCompletedCleanTimeline() {
+    return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get only the completed (no-inflights) savepoint timeline
+   */
+  public HoodieTimeline getCompletedSavepointTimeline() {
+    return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
+  }
+
+  /**
+   * Get the list of savepoints in this table
+   */
+  public List<String> getSavepoints() {
+    return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
+        .collect(Collectors.toList());
+  }
+
+  /**
+   * Get the list of data file names savepointed
+   */
+  public Stream<String> getSavepointedDataFiles(String savepointTime) {
+    if (!getSavepoints().contains(savepointTime)) {
+      throw new HoodieSavepointException(
+          "Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
    }
-
-    /**
-     * Provides a partitioner to perform the upsert operation, based on the
-     * workload profile
-     *
-     * @return
-     */
-    public abstract Partitioner getUpsertPartitioner(WorkloadProfile profile);
-
-
-    /**
-     * Provides a partitioner to perform the insert operation, based on the workload profile
-     *
-     * @return
-     */
-    public abstract Partitioner getInsertPartitioner(WorkloadProfile profile);
-
-
-    /**
-     * Return whether this HoodieTable implementation can benefit from workload
-     * profiling
-     *
-     * @return
-     */
-    public abstract boolean isWorkloadProfileNeeded();
-
-    public HoodieWriteConfig getConfig() {
-        return config;
+    HoodieInstant instant =
+        new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
+    HoodieSavepointMetadata metadata = null;
+    try {
+      metadata = AvroUtils.deserializeHoodieSavepointMetadata(
+          getActiveTimeline().getInstantDetails(instant).get());
+    } catch (IOException e) {
+      throw new HoodieSavepointException(
+          "Could not get savepointed data files for savepoint " + savepointTime, e);
    }
+    return metadata.getPartitionMetadata().values().stream()
+        .flatMap(s -> s.getSavepointDataFile().stream());
+  }

-    public HoodieTableMetaClient getMetaClient() {
-        return metaClient;
+  public HoodieActiveTimeline getActiveTimeline() {
+    return metaClient.getActiveTimeline();
+  }
+
+  /**
+   * Get the commit timeline visible for this table
+   */
+  public HoodieTimeline getCommitTimeline() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return getActiveTimeline().getCommitTimeline();
+      case MERGE_ON_READ:
+        // We need to include the parquet files written out in delta commits
+        // Include commit action to be able to start doing a MOR over a COW dataset - no migration required
+        return getActiveTimeline().getCommitsAndCompactionsTimeline();
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
    }
+  }

-    public FileSystem getFs() {
-        return metaClient.getFs();
+  /**
+   * Get only the completed (no-inflights) compaction commit timeline
+   */
+  public HoodieTimeline getCompletedCompactionCommitTimeline() {
+    return getCompactionCommitTimeline().filterCompletedInstants();
+  }
+
+
+  /**
+   * Get the compacted commit timeline visible for this table
+   */
+  public HoodieTimeline getCompactionCommitTimeline() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return getActiveTimeline().getCommitsAndCompactionsTimeline();
+      case MERGE_ON_READ:
+        // We need to include the parquet files written out in delta commits in tagging
+        return getActiveTimeline().getTimelineOfActions(
+            Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION));
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
    }
+  }

-    /**
-     * Get the view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView getFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  /**
+   * Gets the commit action type
+   */
+  public String getCommitActionType() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return HoodieActiveTimeline.COMMIT_ACTION;
+      case MERGE_ON_READ:
+        return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
    }
+    throw new HoodieCommitException(
+        "Could not commit on unknown storage type " + metaClient.getTableType());
+  }

-    /**
-     * Get the read optimized view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView.ReadOptimizedView getROFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+  /**
+   * Gets the action type for a compaction commit
+   */
+  public String getCompactedCommitActionType() {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return HoodieTimeline.COMMIT_ACTION;
+      case MERGE_ON_READ:
+        return HoodieTimeline.COMPACTION_ACTION;
    }
+    throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
+  }

-    /**
-     * Get the real time view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView.RealtimeView getRTFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline());
+
+  /**
+   * Perform the ultimate IO for a given upserted (RDD) partition
+   */
+  public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime,
+      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+
+  /**
+   * Perform the ultimate IO for a given inserted (RDD) partition
+   */
+  public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime,
+      Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
+
+
+  public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(
+      HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
+    switch (metaClient.getTableType()) {
+      case COPY_ON_WRITE:
+        return new HoodieCopyOnWriteTable<>(config, metaClient);
+      case MERGE_ON_READ:
+        return new HoodieMergeOnReadTable<>(config, metaClient);
+      default:
+        throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
    }
+  }

-    /**
-     * Get the completed (commit + compaction) view of the file system for this table
-     *
-     * @return
-     */
-    public TableFileSystemView getCompletedFileSystemView() {
-        return new HoodieTableFileSystemView(metaClient, getCommitTimeline());
-    }
+  /**
+   * Run Compaction on the table. Compaction arranges the data so that it is optimized for data
+   * access
+   */
+  public abstract Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc);

-    /**
-     * Get only the completed (no-inflights) commit timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCommitTimeline() {
-        return getCommitTimeline().filterCompletedInstants();
-    }
+  /**
+   * Clean partition paths according to cleaning policy and returns the number of files cleaned.
+   */
+  public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc);

-    /**
-     * Get only the inflights (no-completed) commit timeline
-     * @return
-     */
-    public HoodieTimeline getInflightCommitTimeline() {
-        return getCommitTimeline().filterInflights();
-    }
-
-
-    /**
-     * Get only the completed (no-inflights) clean timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCleanTimeline() {
-        return getActiveTimeline().getCleanerTimeline().filterCompletedInstants();
-    }
-
-    /**
-     * Get only the completed (no-inflights) savepoint timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedSavepointTimeline() {
-        return getActiveTimeline().getSavePointTimeline().filterCompletedInstants();
-    }
-
-    /**
-     * Get the list of savepoints in this table
-     * @return
-     */
-    public List<String> getSavepoints() {
-        return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
-            .collect(Collectors.toList());
-    }
-
-    /**
-     * Get the list of data file names savepointed
-     *
-     * @param savepointTime
-     * @return
-     * @throws IOException
-     */
-    public Stream<String> getSavepointedDataFiles(String savepointTime) {
-        if (!getSavepoints().contains(savepointTime)) {
-            throw new HoodieSavepointException(
-                "Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
-        }
-        HoodieInstant instant =
-            new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION, savepointTime);
-        HoodieSavepointMetadata metadata = null;
-        try {
-            metadata = AvroUtils.deserializeHoodieSavepointMetadata(
-                getActiveTimeline().getInstantDetails(instant).get());
-        } catch (IOException e) {
-            throw new HoodieSavepointException(
-                "Could not get savepointed data files for savepoint " + savepointTime, e);
-        }
-        return metadata.getPartitionMetadata().values().stream()
-            .flatMap(s -> s.getSavepointDataFile().stream());
-    }
-
-    public HoodieActiveTimeline getActiveTimeline() {
-        return metaClient.getActiveTimeline();
-    }
-
-    /**
-     * Get the commit timeline visible for this table
-     *
-     * @return
-     */
-    public HoodieTimeline getCommitTimeline() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return getActiveTimeline().getCommitTimeline();
-            case MERGE_ON_READ:
-                // We need to include the parquet files written out in delta commits
-                // Include commit action to be able to start doing a MOR over a COW dataset - no migration required
-                return getActiveTimeline().getCommitsAndCompactionsTimeline();
-            default:
-                throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Get only the completed (no-inflights) compaction commit timeline
-     * @return
-     */
-    public HoodieTimeline getCompletedCompactionCommitTimeline() {
-        return getCompactionCommitTimeline().filterCompletedInstants();
-    }
-
-
-    /**
-     * Get the compacted commit timeline visible for this table
-     *
-     * @return
-     */
-    public HoodieTimeline getCompactionCommitTimeline() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return getActiveTimeline().getCommitsAndCompactionsTimeline();
-            case MERGE_ON_READ:
-                // We need to include the parquet files written out in delta commits in tagging
-                return getActiveTimeline().getTimelineOfActions(
-                    Sets.newHashSet(HoodieActiveTimeline.COMPACTION_ACTION));
-            default:
-                throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Gets the commit action type
-     * @return
-     */
-    public String getCommitActionType() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return HoodieActiveTimeline.COMMIT_ACTION;
-            case MERGE_ON_READ:
-                return HoodieActiveTimeline.DELTA_COMMIT_ACTION;
-        }
-        throw new HoodieCommitException(
-            "Could not commit on unknown storage type " + metaClient.getTableType());
-    }
-
-    /**
-     * Gets the action type for a compaction commit
-     * @return
-     */
-    public String getCompactedCommitActionType() {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return HoodieTimeline.COMMIT_ACTION;
-            case MERGE_ON_READ:
-                return HoodieTimeline.COMPACTION_ACTION;
-        }
-        throw new HoodieException("Unsupported table type :"+ metaClient.getTableType());
-    }
-
-
-
-    /**
-     * Perform the ultimate IO for a given upserted (RDD) partition
-     *
-     * @param partition
-     * @param recordIterator
-     * @param partitioner
-     */
-    public abstract Iterator<List<WriteStatus>> handleUpsertPartition(String commitTime,
-        Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
-
-    /**
-     * Perform the ultimate IO for a given inserted (RDD) partition
-     *
-     * @param partition
-     * @param recordIterator
-     * @param partitioner
-     */
-    public abstract Iterator<List<WriteStatus>> handleInsertPartition(String commitTime,
-        Integer partition, Iterator<HoodieRecord<T>> recordIterator, Partitioner partitioner);
-
-
-    public static <T extends HoodieRecordPayload> HoodieTable<T> getHoodieTable(
-        HoodieTableMetaClient metaClient, HoodieWriteConfig config) {
-        switch (metaClient.getTableType()) {
-            case COPY_ON_WRITE:
-                return new HoodieCopyOnWriteTable<>(config, metaClient);
-            case MERGE_ON_READ:
-                return new HoodieMergeOnReadTable<>(config, metaClient);
-            default:
-                throw new HoodieException("Unsupported table type :" + metaClient.getTableType());
-        }
-    }
-
-    /**
-     * Run Compaction on the table.
-     * Compaction arranges the data so that it is optimized for data access
-     */
-    public abstract Optional<HoodieCompactionMetadata> compact(JavaSparkContext jsc);
-
-    /**
-     * Clean partition paths according to cleaning policy and returns the number
-     * of files cleaned.
-     */
-    public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc);
-
-    /**
-     * Rollback the (inflight/committed) record changes with the given commit time.
-     * Four steps:
-     * (1) Atomically unpublish this commit
-     * (2) clean indexing data
-     * (3) clean new generated parquet files / log blocks
-     * (4) Finally, delete .<action>.commit or .<action>.inflight file
-     * @param commits
-     * @return
-     * @throws HoodieRollbackException
-     */
-    public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits) throws IOException;
+  /**
+   * Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1)
+   * Atomically unpublish this commit (2) clean indexing data (3) clean new generated parquet files
+   * / log blocks (4) Finally, delete .<action>.commit or .<action>.inflight file
+   */
+  public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
+      throws IOException;
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/UserDefinedBulkInsertPartitioner.java
@@ -20,13 +20,13 @@ import com.uber.hoodie.common.model.HoodieRecordPayload;
 import org.apache.spark.api.java.JavaRDD;

 /**
- * Repartition input records into at least expected number of output spark partitions. It should give
- * below guarantees
- * - Output spark partition will have records from only one hoodie partition.
- * - Average records per output spark partitions should be almost equal to (#inputRecords / #outputSparkPartitions)
- * to avoid possible skews.
+ * Repartition input records into at least expected number of output spark partitions. It should
+ * give below guarantees - Output spark partition will have records from only one hoodie partition.
+ * - Average records per output spark partitions should be almost equal to (#inputRecords /
+ * #outputSparkPartitions) to avoid possible skews.
 */
 public interface UserDefinedBulkInsertPartitioner<T extends HoodieRecordPayload> {

-    JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions);
+  JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records,
+      int outputSparkPartitions);
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadProfile.java
@@ -20,15 +20,11 @@ package com.uber.hoodie.table;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordLocation;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.PairFunction;
-
 import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
-
+import org.apache.spark.api.java.JavaRDD;
 import scala.Option;
 import scala.Tuple2;

@@ -40,73 +36,76 @@ import scala.Tuple2;
 */
 public class WorkloadProfile<T extends HoodieRecordPayload> implements Serializable {

-    /**
-     * Input workload
-     */
-    private final JavaRDD<HoodieRecord<T>> taggedRecords;
+  /**
+   * Input workload
+   */
+  private final JavaRDD<HoodieRecord<T>> taggedRecords;

-    /**
-     * Computed workload profile
-     */
-    private final HashMap<String, WorkloadStat> partitionPathStatMap;
+  /**
+   * Computed workload profile
+   */
+  private final HashMap<String, WorkloadStat> partitionPathStatMap;


-    private final WorkloadStat globalStat;
+  private final WorkloadStat globalStat;


-    public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) {
-        this.taggedRecords = taggedRecords;
-        this.partitionPathStatMap = new HashMap<>();
-        this.globalStat = new WorkloadStat();
-        buildProfile();
+  public WorkloadProfile(JavaRDD<HoodieRecord<T>> taggedRecords) {
+    this.taggedRecords = taggedRecords;
+    this.partitionPathStatMap = new HashMap<>();
+    this.globalStat = new WorkloadStat();
+    buildProfile();
+  }
+
+  private void buildProfile() {
+
+    Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords
+        .mapToPair(record ->
+            new Tuple2<>(
+                new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())),
+                record))
+        .countByKey();
+
+    for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e : partitionLocationCounts
+        .entrySet()) {
+      String partitionPath = e.getKey()._1();
+      Long count = e.getValue();
+      Option<HoodieRecordLocation> locOption = e.getKey()._2();
+
+      if (!partitionPathStatMap.containsKey(partitionPath)) {
+        partitionPathStatMap.put(partitionPath, new WorkloadStat());
+      }
+
+      if (locOption.isDefined()) {
+        // update
+        partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count);
+        globalStat.addUpdates(locOption.get(), count);
+      } else {
+        // insert
+        partitionPathStatMap.get(partitionPath).addInserts(count);
+        globalStat.addInserts(count);
+      }
    }
+  }

-    private void buildProfile() {
+  public WorkloadStat getGlobalStat() {
+    return globalStat;
+  }

-        Map<Tuple2<String, Option<HoodieRecordLocation>>, Long> partitionLocationCounts = taggedRecords
-                .mapToPair(record ->
-                        new Tuple2<>(new Tuple2<>(record.getPartitionPath(), Option.apply(record.getCurrentLocation())), record))
-                .countByKey();
+  public Set<String> getPartitionPaths() {
+    return partitionPathStatMap.keySet();
+  }

-        for (Map.Entry<Tuple2<String, Option<HoodieRecordLocation>>, Long> e: partitionLocationCounts.entrySet()) {
-            String partitionPath = e.getKey()._1();
-            Long count = e.getValue();
-            Option<HoodieRecordLocation> locOption = e.getKey()._2();
+  public WorkloadStat getWorkloadStat(String partitionPath) {
+    return partitionPathStatMap.get(partitionPath);
+  }

-            if (!partitionPathStatMap.containsKey(partitionPath)){
-                partitionPathStatMap.put(partitionPath, new WorkloadStat());
-            }
-
-            if (locOption.isDefined()) {
-                // update
-                partitionPathStatMap.get(partitionPath).addUpdates(locOption.get(), count);
-                globalStat.addUpdates(locOption.get(), count);
-            } else {
-                // insert
-                partitionPathStatMap.get(partitionPath).addInserts(count);
-                globalStat.addInserts(count);
-            }
-        }
-    }
-
-    public WorkloadStat getGlobalStat() {
-        return globalStat;
-    }
-
-    public Set<String> getPartitionPaths() {
-        return partitionPathStatMap.keySet();
-    }
-
-    public WorkloadStat getWorkloadStat(String partitionPath){
-        return partitionPathStatMap.get(partitionPath);
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WorkloadProfile {");
-        sb.append("globalStat=").append(globalStat).append(", ");
-        sb.append("partitionStat=").append(partitionPathStatMap);
-        sb.append('}');
-        return sb.toString();
-    }
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WorkloadProfile {");
+    sb.append("globalStat=").append(globalStat).append(", ");
+    sb.append("partitionStat=").append(partitionPathStatMap);
+    sb.append('}');
+    return sb.toString();
+  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/WorkloadStat.java
@@ -17,7 +17,6 @@
 package com.uber.hoodie.table;

 import com.uber.hoodie.common.model.HoodieRecordLocation;
-
 import java.io.Serializable;
 import java.util.HashMap;

@@ -25,43 +24,44 @@ import java.util.HashMap;
 * Wraps stats about a single partition path.
 */
 public class WorkloadStat implements Serializable {
-    private long numInserts = 0L;

-    private long numUpdates = 0L;
+  private long numInserts = 0L;

-    private HashMap<String, Long> updateLocationToCount;
+  private long numUpdates = 0L;

-    public WorkloadStat() {
-        updateLocationToCount = new HashMap<>();
-    }
+  private HashMap<String, Long> updateLocationToCount;

-    long addInserts(long numInserts) {
-        return this.numInserts += numInserts;
-    }
+  public WorkloadStat() {
+    updateLocationToCount = new HashMap<>();
+  }

-    long addUpdates(HoodieRecordLocation location, long numUpdates) {
-        updateLocationToCount.put(location.getFileId(), numUpdates);
-        return this.numUpdates += numUpdates;
-    }
+  long addInserts(long numInserts) {
+    return this.numInserts += numInserts;
+  }

-    public long getNumUpdates() {
-        return numUpdates;
-    }
+  long addUpdates(HoodieRecordLocation location, long numUpdates) {
+    updateLocationToCount.put(location.getFileId(), numUpdates);
+    return this.numUpdates += numUpdates;
+  }

-    public long getNumInserts() {
-        return numInserts;
-    }
+  public long getNumUpdates() {
+    return numUpdates;
+  }

-    public HashMap<String, Long> getUpdateLocationToCount() {
-        return updateLocationToCount;
-    }
+  public long getNumInserts() {
+    return numInserts;
+  }

-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("WorkloadStat {");
-        sb.append("numInserts=").append(numInserts).append(", ");
-        sb.append("numUpdates=").append(numUpdates);
-        sb.append('}');
-        return sb.toString();
-    }
+  public HashMap<String, Long> getUpdateLocationToCount() {
+    return updateLocationToCount;
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("WorkloadStat {");
+    sb.append("numInserts=").append(numInserts).append(", ");
+    sb.append("numUpdates=").append(numUpdates);
+    sb.append('}');
+    return sb.toString();
+  }
 }