Support union mode in HoodieRealtimeRecordReader for pure insert workloads

Also Replace BufferedIteratorPayload abstraction with function passing
2018-04-26 10:18:05 -07:00
parent 93f345a032
commit dfc0c61eb7
44 changed files with 2545 additions and 1179 deletions
--- a/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieLogFileCommand.java
+++ b/hoodie-cli/src/main/java/com/uber/hoodie/cli/commands/HoodieLogFileCommand.java
@@ -24,8 +24,8 @@ import com.uber.hoodie.cli.TableHeader;
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
-import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
 import com.uber.hoodie.common.table.log.HoodieLogFormat;
+import com.uber.hoodie.common.table.log.HoodieMergedLogRecordScanner;
 import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
 import com.uber.hoodie.common.table.log.block.HoodieCorruptBlock;
 import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
@@ -187,7 +187,7 @@ public class HoodieLogFileCommand implements CommandMarker {

    if (shouldMerge) {
      System.out.println("===========================> MERGING RECORDS <===================");
-      HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs,
+      HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs,
          HoodieCLI.tableMetadata.getBasePath(), logFilePaths, readerSchema,
          HoodieCLI.tableMetadata.getActiveTimeline().getCommitTimeline().lastInstant().get()
              .getTimestamp(),
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/BufferedIterator.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/BufferedIterator.java
@@ -1,209 +0,0 @@
-/*
- *  Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- *  Licensed under the Apache License, Version 2.0 (the "License");
- *  you may not use this file except in compliance with the License.
- *  You may obtain a copy of the License at
- *
- *           http://www.apache.org/licenses/LICENSE-2.0
- *
- *  Unless required by applicable law or agreed to in writing, software
- *  distributed under the License is distributed on an "AS IS" BASIS,
- *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- *  See the License for the specific language governing permissions and
- *  limitations under the License.
- */
-
-package com.uber.hoodie.func;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.uber.hoodie.exception.HoodieException;
-import java.util.Iterator;
-import java.util.Optional;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.Semaphore;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicLong;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.Function;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.util.SizeEstimator;
-
-/**
- * Used for buffering input records. Buffer limit is controlled by {@link #bufferMemoryLimit}. It
- * internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in
- * buffer accordingly. This is done to ensure that we don't OOM.
- *
- * @param <I> input payload data type
- * @param <O> output payload data type
- */
-public class BufferedIterator<I, O> implements Iterator<O> {
-
-  // interval used for polling records in the queue.
-  public static final int RECORD_POLL_INTERVAL_SEC = 5;
-  // rate used for sampling records to determine avg record size in bytes.
-  public static final int RECORD_SAMPLING_RATE = 64;
-  // maximum records that will be cached
-  private static final int RECORD_CACHING_LIMIT = 128 * 1024;
-  private static Logger logger = LogManager.getLogger(BufferedIterator.class);
-  // It indicates number of records to cache. We will be using sampled record's average size to
-  // determine how many
-  // records we should cache and will change (increase/decrease) permits accordingly.
-  @VisibleForTesting
-  public final Semaphore rateLimiter = new Semaphore(1);
-  // used for sampling records with "RECORD_SAMPLING_RATE" frequency.
-  public final AtomicLong samplingRecordCounter = new AtomicLong(-1);
-  // internal buffer to cache buffered records.
-  private final LinkedBlockingQueue<Optional<O>> buffer = new
-      LinkedBlockingQueue<>();
-  // maximum amount of memory to be used for buffering records.
-  private final long bufferMemoryLimit;
-  // original iterator from where records are read for buffering.
-  private final Iterator<I> inputIterator;
-  // it holds the root cause of the exception in case either buffering records (reading from
-  // inputIterator) fails or
-  // thread reading records from buffer fails.
-  private final AtomicReference<Exception> hasFailed = new AtomicReference(null);
-  // used for indicating that all the records from buffer are read successfully.
-  private final AtomicBoolean isDone = new AtomicBoolean(false);
-  // indicates rate limit (number of records to cache). it is updated whenever there is a change
-  // in avg record size.
-  @VisibleForTesting
-  public int currentRateLimit = 1;
-  // indicates avg record size in bytes. It is updated whenever a new record is sampled.
-  @VisibleForTesting
-  public long avgRecordSizeInBytes = 0;
-  // indicates number of samples collected so far.
-  private long numSamples = 0;
-  // next record to be read from buffer.
-  private O nextRecord;
-  // Function to transform the input payload to the expected output payload
-  private Function<I, O> bufferedIteratorTransform;
-
-  public BufferedIterator(final Iterator<I> iterator, final long bufferMemoryLimit,
-      final Function<I, O> bufferedIteratorTransform) {
-    this.inputIterator = iterator;
-    this.bufferMemoryLimit = bufferMemoryLimit;
-    this.bufferedIteratorTransform = bufferedIteratorTransform;
-  }
-
-  @VisibleForTesting
-  public int size() {
-    return this.buffer.size();
-  }
-
-  // It samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in
-  // bytes. It is used
-  // for determining how many maximum records to buffer. Based on change in avg size it may
-  // increase or decrease
-  // available permits.
-  private void adjustBufferSizeIfNeeded(final I record) throws InterruptedException {
-    if (this.samplingRecordCounter.incrementAndGet() % RECORD_SAMPLING_RATE != 0) {
-      return;
-    }
-    final long recordSizeInBytes = SizeEstimator.estimate(record);
-    final long newAvgRecordSizeInBytes = Math
-        .max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
-    final int newRateLimit = (int) Math
-        .min(RECORD_CACHING_LIMIT, Math.max(1, this.bufferMemoryLimit / newAvgRecordSizeInBytes));
-
-    // If there is any change in number of records to cache then we will either release (if it increased) or acquire
-    // (if it decreased) to adjust rate limiting to newly computed value.
-    if (newRateLimit > currentRateLimit) {
-      rateLimiter.release(newRateLimit - currentRateLimit);
-    } else if (newRateLimit < currentRateLimit) {
-      rateLimiter.acquire(currentRateLimit - newRateLimit);
-    }
-    currentRateLimit = newRateLimit;
-    avgRecordSizeInBytes = newAvgRecordSizeInBytes;
-    numSamples++;
-  }
-
-  // inserts record into internal buffer. It also fetches insert value from the record to offload
-  // computation work on to
-  // buffering thread.
-  private void insertRecord(I t) throws Exception {
-    rateLimiter.acquire();
-    adjustBufferSizeIfNeeded(t);
-    // We are retrieving insert value in the record buffering thread to offload computation
-    // around schema validation
-    // and record creation to it.
-    final O payload = bufferedIteratorTransform.apply(t);
-    buffer.put(Optional.of(payload));
-  }
-
-  private void readNextRecord() {
-    rateLimiter.release();
-    Optional<O> newRecord;
-    while (true) {
-      try {
-        throwExceptionIfFailed();
-        newRecord = buffer.poll(RECORD_POLL_INTERVAL_SEC, TimeUnit.SECONDS);
-        if (newRecord != null) {
-          break;
-        }
-      } catch (InterruptedException e) {
-        logger.error("error reading records from BufferedIterator", e);
-        throw new HoodieException(e);
-      }
-    }
-    if (newRecord.isPresent()) {
-      this.nextRecord = newRecord.get();
-    } else {
-      // We are done reading all the records from internal iterator.
-      this.isDone.set(true);
-      this.nextRecord = null;
-    }
-  }
-
-  public void startBuffering() throws Exception {
-    logger.info("starting to buffer records");
-    try {
-      while (inputIterator.hasNext()) {
-        // We need to stop buffering if buffer-reader has failed and exited.
-        throwExceptionIfFailed();
-        insertRecord(inputIterator.next());
-      }
-      // done buffering records notifying buffer-reader.
-      buffer.put(Optional.empty());
-    } catch (Exception e) {
-      logger.error("error buffering records", e);
-      // Used for notifying buffer-reader thread of the failed operation.
-      markAsFailed(e);
-      throw e;
-    }
-    logger.info("finished buffering records");
-  }
-
-  @Override
-  public boolean hasNext() {
-    if (this.nextRecord == null && !this.isDone.get()) {
-      readNextRecord();
-    }
-    return !this.isDone.get();
-  }
-
-  @Override
-  public O next() {
-    Preconditions.checkState(hasNext() && this.nextRecord != null);
-    final O ret = this.nextRecord;
-    this.nextRecord = null;
-    return ret;
-  }
-
-  private void throwExceptionIfFailed() {
-    if (this.hasFailed.get() != null) {
-      throw new HoodieException("operation has failed", this.hasFailed.get());
-    }
-  }
-
-  public void markAsFailed(Exception e) {
-    this.hasFailed.set(e);
-    // release the permits so that if the buffering thread is waiting for permits then it will
-    // get it.
-    this.rateLimiter.release(RECORD_CACHING_LIMIT + 1);
-  }
-}
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/BufferedIteratorExecutor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/BufferedIteratorExecutor.java
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *          http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package com.uber.hoodie.func;
-
-import com.uber.hoodie.config.HoodieWriteConfig;
-import com.uber.hoodie.exception.HoodieException;
-import java.util.Iterator;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Future;
-import java.util.function.Function;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.apache.spark.TaskContext;
-import org.apache.spark.TaskContext$;
-
-/**
- * Executor for a BufferedIterator operation. This class takes as input the input iterator which
- * needs to be buffered, the runnable function that needs to be executed in the reader thread and
- * return the transformed output based on the writer function
- */
-public class BufferedIteratorExecutor<I, O, E> {
-
-  private static Logger logger = LogManager.getLogger(BufferedIteratorExecutor.class);
-
-  // Executor service used for launching writer thread.
-  final ExecutorService writerService;
-  // Used for buffering records which is controlled by HoodieWriteConfig#WRITE_BUFFER_LIMIT_BYTES.
-  final BufferedIterator<I, O> bufferedIterator;
-  // Need to set current spark thread's TaskContext into newly launched thread so that new
-  // thread can access
-  // TaskContext properties.
-  final TaskContext sparkThreadTaskContext;
-
-  public BufferedIteratorExecutor(final HoodieWriteConfig hoodieConfig, final Iterator<I> inputItr,
-      final Function<I, O> bufferedIteratorTransform,
-      final ExecutorService writerService) {
-    this.sparkThreadTaskContext = TaskContext.get();
-    this.writerService = writerService;
-    this.bufferedIterator = new BufferedIterator<>(inputItr, hoodieConfig.getWriteBufferLimitBytes(),
-        bufferedIteratorTransform);
-  }
-
-  /**
-   * Starts buffering and executing the writer function
-   */
-  public Future<E> start(Function<BufferedIterator, E> writerFunction) {
-    try {
-      Future<E> future = writerService.submit(
-          () -> {
-            logger.info("starting hoodie writer thread");
-            // Passing parent thread's TaskContext to newly launched thread for it to access original TaskContext
-            // properties.
-            TaskContext$.MODULE$.setTaskContext(sparkThreadTaskContext);
-            try {
-              E result = writerFunction.apply(bufferedIterator);
-              logger.info("hoodie write is done; notifying reader thread");
-              return result;
-            } catch (Exception e) {
-              logger.error("error writing hoodie records", e);
-              bufferedIterator.markAsFailed(e);
-              throw e;
-            }
-          });
-      bufferedIterator.startBuffering();
-      return future;
-    } catch (Exception e) {
-      throw new HoodieException(e);
-    }
-  }
-
-  public boolean isRemaining() {
-    return bufferedIterator.hasNext();
-  }
-}
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/LazyInsertIterable.java
@@ -19,27 +19,25 @@ package com.uber.hoodie.func;
 import com.uber.hoodie.WriteStatus;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieException;
-import com.uber.hoodie.func.payload.AbstractBufferedIteratorPayload;
-import com.uber.hoodie.func.payload.HoodieRecordBufferedIteratorPayload;
 import com.uber.hoodie.io.HoodieCreateHandle;
 import com.uber.hoodie.io.HoodieIOHandle;
 import com.uber.hoodie.table.HoodieTable;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Optional;
 import java.util.Set;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
 import java.util.function.Function;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.spark.TaskContext;
+import scala.Tuple2;

 /**
 * Lazy Iterable, that writes a stream of HoodieRecords sorted by the partitionPath, into new
@@ -52,7 +50,6 @@ public class LazyInsertIterable<T extends HoodieRecordPayload> extends
  private final String commitTime;
  private final HoodieTable<T> hoodieTable;
  private Set<String> partitionsCleaned;
-  private HoodieCreateHandle handle;

  public LazyInsertIterable(Iterator<HoodieRecord<T>> sortedRecordItr, HoodieWriteConfig config,
      String commitTime, HoodieTable<T> hoodieTable) {
@@ -63,57 +60,68 @@ public class LazyInsertIterable<T extends HoodieRecordPayload> extends
    this.hoodieTable = hoodieTable;
  }

-  @Override
-  protected void start() {
-  }
-
  /**
   * Transformer function to help transform a HoodieRecord. This transformer is used by BufferedIterator to offload some
   * expensive operations of transformation to the reader thread.
-   * @param schema
-   * @param <T>
-   * @return
   */
-  public static <T extends HoodieRecordPayload> Function<HoodieRecord<T>, AbstractBufferedIteratorPayload>
-      bufferedItrPayloadTransform(Schema schema) {
-    return (hoodieRecord) -> new HoodieRecordBufferedIteratorPayload(hoodieRecord, schema);
+  static <T extends HoodieRecordPayload> Function<HoodieRecord<T>,
+      Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>> getTransformFunction(Schema schema) {
+    return hoodieRecord -> {
+      try {
+        return new Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>(hoodieRecord,
+            hoodieRecord.getData().getInsertValue(schema));
+      } catch (IOException e) {
+        throw new HoodieException(e);
+      }
+    };
+  }
+
+  @Override
+  protected void start() {
  }

  @Override
  protected List<WriteStatus> computeNext() {
    // Executor service used for launching writer thread.
-    final ExecutorService writerService = Executors.newFixedThreadPool(1);
+    BoundedInMemoryExecutor<HoodieRecord<T>,
+        Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>, List<WriteStatus>> bufferedIteratorExecutor = null;
    try {
-      Function<BufferedIterator, List<WriteStatus>> function = (bufferedIterator) -> {
-        List<WriteStatus> statuses = new LinkedList<>();
-        statuses.addAll(handleWrite(bufferedIterator));
-        return statuses;
-      };
-      BufferedIteratorExecutor<HoodieRecord<T>, AbstractBufferedIteratorPayload, List<WriteStatus>>
-          bufferedIteratorExecutor = new BufferedIteratorExecutor(hoodieConfig, inputItr,
-          bufferedItrPayloadTransform(HoodieIOHandle.createHoodieWriteSchema(hoodieConfig)),
-              writerService);
-      Future<List<WriteStatus>> writerResult = bufferedIteratorExecutor.start(function);
-      final List<WriteStatus> result = writerResult.get();
+      final Schema schema = HoodieIOHandle.createHoodieWriteSchema(hoodieConfig);
+      bufferedIteratorExecutor =
+          new SparkBoundedInMemoryExecutor<>(hoodieConfig, inputItr,
+              new InsertHandler(), getTransformFunction(schema));
+      final List<WriteStatus> result = bufferedIteratorExecutor.execute();
      assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining();
      return result;
    } catch (Exception e) {
      throw new HoodieException(e);
    } finally {
-      writerService.shutdownNow();
+      if (null != bufferedIteratorExecutor) {
+        bufferedIteratorExecutor.shutdownNow();
+      }
    }
  }

-  private List<WriteStatus> handleWrite(
-      final BufferedIterator<HoodieRecord<T>, AbstractBufferedIteratorPayload> bufferedIterator) {
-    List<WriteStatus> statuses = new ArrayList<>();
-    while (bufferedIterator.hasNext()) {
-      final HoodieRecordBufferedIteratorPayload payload = (HoodieRecordBufferedIteratorPayload) bufferedIterator
-          .next();
-      final HoodieRecord insertPayload = (HoodieRecord) payload.getInputPayload();
+  @Override
+  protected void end() {
+
+  }
+
+  /**
+   * Consumes stream of hoodie records from in-memory queue and
+   * writes to one or more create-handles
+   */
+  private class InsertHandler extends
+      BoundedInMemoryQueueConsumer<Tuple2<HoodieRecord<T>, Optional<IndexedRecord>>, List<WriteStatus>> {
+
+    private final List<WriteStatus> statuses = new ArrayList<>();
+    private HoodieCreateHandle handle;
+
+    @Override
+    protected void consumeOneRecord(Tuple2<HoodieRecord<T>, Optional<IndexedRecord>> payload) {
+      final HoodieRecord insertPayload = payload._1();
      // clean up any partial failures
-      if (!partitionsCleaned
-          .contains(insertPayload.getPartitionPath())) {
+      if (!partitionsCleaned.contains(insertPayload.getPartitionPath())) {
        // This insert task could fail multiple times, but Spark will faithfully retry with
        // the same data again. Thus, before we open any files under a given partition, we
        // first delete any files in the same partitionPath written by same Spark partition
@@ -127,33 +135,30 @@ public class LazyInsertIterable<T extends HoodieRecordPayload> extends
        handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath());
      }

-      if (handle.canWrite(((HoodieRecord) payload.getInputPayload()))) {
+      if (handle.canWrite(payload._1())) {
        // write the payload, if the handle has capacity
-        handle.write(insertPayload, (Optional<IndexedRecord>) payload.getOutputPayload(), payload.exception);
+        handle.write(insertPayload, payload._2());
      } else {
        // handle is full.
        statuses.add(handle.close());
        // Need to handle the rejected payload & open new handle
        handle = new HoodieCreateHandle(hoodieConfig, commitTime, hoodieTable, insertPayload.getPartitionPath());
-        handle.write(insertPayload,
-            (Optional<IndexedRecord>) payload.getOutputPayload(),
-            payload.exception); // we should be able to write 1 payload.
+        handle.write(insertPayload, payload._2()); // we should be able to write 1 payload.
      }
    }

-    // If we exited out, because we ran out of records, just close the pending handle.
-    if (!bufferedIterator.hasNext()) {
+    @Override
+    protected void finish() {
      if (handle != null) {
        statuses.add(handle.close());
      }
+      handle = null;
+      assert statuses.size() > 0;
    }

-    assert statuses.size() > 0 && !bufferedIterator.hasNext(); // should never return empty statuses
-    return statuses;
-  }
-
-  @Override
-  protected void end() {
-
+    @Override
+    protected List<WriteStatus> getResult() {
+      return statuses;
+    }
  }
 }
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/ParquetReaderIterator.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/ParquetReaderIterator.java
@@ -16,6 +16,7 @@

 package com.uber.hoodie.func;

+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueue;
 import com.uber.hoodie.exception.HoodieIOException;
 import java.io.IOException;
 import java.util.Iterator;
@@ -23,7 +24,7 @@ import org.apache.parquet.hadoop.ParquetReader;

 /**
 * This class wraps a parquet reader and provides an iterator based api to
- * read from a parquet file. This is used in {@link BufferedIterator}
+ * read from a parquet file. This is used in {@link BoundedInMemoryQueue}
 */
 public class ParquetReaderIterator<T> implements Iterator<T> {

--- a/hoodie-client/src/main/java/com/uber/hoodie/func/SparkBoundedInMemoryExecutor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/SparkBoundedInMemoryExecutor.java
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.func;
+
+import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueProducer;
+import com.uber.hoodie.common.util.queue.IteratorBasedQueueProducer;
+import com.uber.hoodie.config.HoodieWriteConfig;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.function.Function;
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskContext$;
+
+public class SparkBoundedInMemoryExecutor<I, O, E> extends BoundedInMemoryExecutor<I, O, E> {
+
+  // Need to set current spark thread's TaskContext into newly launched thread so that new thread can access
+  // TaskContext properties.
+  final TaskContext sparkThreadTaskContext;
+
+  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, final Iterator<I> inputItr,
+      BoundedInMemoryQueueConsumer<O, E> consumer,
+      Function<I, O> bufferedIteratorTransform) {
+    this(hoodieConfig, new IteratorBasedQueueProducer<>(inputItr), consumer, bufferedIteratorTransform);
+  }
+
+  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig,
+      BoundedInMemoryQueueProducer<I> producer,
+      BoundedInMemoryQueueConsumer<O, E> consumer,
+      Function<I, O> bufferedIteratorTransform) {
+    super(hoodieConfig.getWriteBufferLimitBytes(), producer,
+        Optional.of(consumer), bufferedIteratorTransform);
+    this.sparkThreadTaskContext = TaskContext.get();
+  }
+
+  public void preExecute() {
+    // Passing parent thread's TaskContext to newly launched thread for it to access original TaskContext properties.
+    TaskContext$.MODULE$.setTaskContext(sparkThreadTaskContext);
+  }
+}
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/payload/AbstractBufferedIteratorPayload.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/payload/AbstractBufferedIteratorPayload.java
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *          http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.func.payload;
-
-/**
- * @param <I> Input data type for BufferedIterator
- * @param <O> Output data type for BufferedIterator
- */
-public abstract class AbstractBufferedIteratorPayload<I, O> {
-
-  // input payload for iterator
-  protected I inputPayload;
-  // output payload for iterator, this is used in cases where the output payload is computed
-  // from the input payload and most of this computation is off-loaded to the reader
-  protected O outputPayload;
-
-  public AbstractBufferedIteratorPayload(I record) {
-    this.inputPayload = record;
-  }
-
-  public I getInputPayload() {
-    return inputPayload;
-  }
-
-  public O getOutputPayload() {
-    return outputPayload;
-  }
-}
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/payload/HoodieRecordBufferedIteratorPayload.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/payload/HoodieRecordBufferedIteratorPayload.java
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *          http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.func.payload;
-
-import com.uber.hoodie.common.model.HoodieRecord;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
-import java.util.Optional;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-
-/**
- * BufferedIteratorPayload that takes HoodieRecord as input and transforms to output Optional<IndexedRecord>
- * @param <T>
- */
-public class HoodieRecordBufferedIteratorPayload<T extends HoodieRecordPayload>
-    extends AbstractBufferedIteratorPayload<HoodieRecord<T>, Optional<IndexedRecord>> {
-
-  // It caches the exception seen while fetching insert value.
-  public Optional<Exception> exception = Optional.empty();
-
-  public HoodieRecordBufferedIteratorPayload(HoodieRecord record, Schema schema) {
-    super(record);
-    try {
-      this.outputPayload = record.getData().getInsertValue(schema);
-    } catch (Exception e) {
-      this.exception = Optional.of(e);
-    }
-  }
-
-  public Optional<Exception> getException() {
-    return exception;
-  }
-}
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
@@ -90,15 +90,9 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
  /**
   * Perform the actual writing of the given record into the backing file.
   */
-  public void write(HoodieRecord record, Optional<IndexedRecord> insertValue,
-      Optional<Exception> getInsertValueException) {
+  public void write(HoodieRecord record, Optional<IndexedRecord> avroRecord) {
    Optional recordMetadata = record.getData().getMetadata();
    try {
-      // throws exception if there was any exception while fetching insert value
-      if (getInsertValueException.isPresent()) {
-        throw getInsertValueException.get();
-      }
-      Optional<IndexedRecord> avroRecord = insertValue;
      if (avroRecord.isPresent()) {
        storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
        // update the new location of record, so we know where to find it next
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieMergeHandle.java
@@ -24,7 +24,9 @@ import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.model.HoodieWriteStat;
 import com.uber.hoodie.common.model.HoodieWriteStat.RuntimeStats;
 import com.uber.hoodie.common.table.TableFileSystemView;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.common.util.HoodieRecordSizeEstimator;
 import com.uber.hoodie.common.util.ReflectionUtils;
 import com.uber.hoodie.common.util.collection.ExternalSpillableMap;
 import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
@@ -143,7 +145,8 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
      logger.info("MaxMemoryPerPartitionMerge => " + config.getMaxMemoryPerPartitionMerge());
      this.keyToNewRecords = new ExternalSpillableMap<>(config.getMaxMemoryPerPartitionMerge(),
          config.getSpillableMapBasePath(), new StringConverter(),
-          new HoodieRecordConverter(schema, config.getPayloadClass()));
+          new HoodieRecordConverter(schema, config.getPayloadClass()),
+          new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema));
    } catch (IOException io) {
      throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
    }
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/compact/HoodieRealtimeTableCompactor.java
@@ -28,7 +28,7 @@ import com.uber.hoodie.common.model.HoodieWriteStat.RuntimeStats;
 import com.uber.hoodie.common.table.HoodieTableMetaClient;
 import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.TableFileSystemView;
-import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
+import com.uber.hoodie.common.table.log.HoodieMergedLogRecordScanner;
 import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
 import com.uber.hoodie.config.HoodieWriteConfig;
@@ -115,7 +115,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {

        .filterCompletedInstants().lastInstant().get().getTimestamp();
    log.info("MaxMemoryPerCompaction => " + config.getMaxMemoryPerCompaction());
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs,
        metaClient.getBasePath(), operation.getDeltaFilePaths(), readerSchema, maxInstantTime,
        config.getMaxMemoryPerCompaction(), config.getCompactionLazyBlockReadEnabled(),
        config.getCompactionReverseLogReadEnabled(), config.getMaxDFSStreamBufferSize(),
@@ -131,7 +131,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
    Iterable<List<WriteStatus>> resultIterable = () -> result;
    return StreamSupport.stream(resultIterable.spliterator(), false).flatMap(Collection::stream)
        .map(s -> {
-          s.getStat().setTotalUpdatedRecordsCompacted(scanner.getTotalRecordsToUpdate());
+          s.getStat().setTotalUpdatedRecordsCompacted(scanner.getNumMergedRecordsInLog());
          s.getStat().setTotalLogFilesCompacted(scanner.getTotalLogFiles());
          s.getStat().setTotalLogRecords(scanner.getTotalLogRecords());
          s.getStat().setPartitionPath(operation.getPartitionPath());
--- a/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/table/HoodieCopyOnWriteTable.java
@@ -33,17 +33,16 @@ import com.uber.hoodie.common.table.HoodieTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.table.timeline.HoodieInstant;
 import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import com.uber.hoodie.exception.HoodieUpsertException;
-import com.uber.hoodie.func.BufferedIterator;
-import com.uber.hoodie.func.BufferedIteratorExecutor;
 import com.uber.hoodie.func.LazyInsertIterable;
 import com.uber.hoodie.func.ParquetReaderIterator;
-import com.uber.hoodie.func.payload.AbstractBufferedIteratorPayload;
-import com.uber.hoodie.func.payload.GenericRecordBufferedIteratorPayload;
+import com.uber.hoodie.func.SparkBoundedInMemoryExecutor;
 import com.uber.hoodie.io.HoodieCleanHelper;
 import com.uber.hoodie.io.HoodieMergeHandle;
 import java.io.IOException;
@@ -58,9 +57,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
 import java.util.stream.Collectors;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
@@ -182,16 +178,6 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    return handleUpdateInternal(upsertHandle, commitTime, fileLoc);
  }

-  /**
-   * Transformer function to help transform a GenericRecord. This transformer is used by BufferedIterator to offload
-   * some expensive operations of transformation to the reader thread.
-   *
-   */
-  public static java.util.function.Function<GenericRecord, AbstractBufferedIteratorPayload>
-      bufferedItrPayloadTransform() {
-    return (genericRecord) -> new GenericRecordBufferedIteratorPayload(genericRecord);
-  }
-
  protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle,
      String commitTime, String fileLoc)
      throws IOException {
@@ -202,23 +188,19 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
      AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
      ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath())
          .withConf(getHadoopConf()).build();
-      final ExecutorService writerService = Executors.newFixedThreadPool(1);
+      BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
      try {
-        java.util.function.Function<BufferedIterator, Void> runnableFunction = (bufferedIterator) -> {
-          handleWrite(bufferedIterator, upsertHandle);
-          return null;
-        };
-        BufferedIteratorExecutor<GenericRecord, AbstractBufferedIteratorPayload, Void> wrapper =
-            new BufferedIteratorExecutor(config, new ParquetReaderIterator(reader), bufferedItrPayloadTransform(),
-                writerService);
-        Future writerResult = wrapper.start(runnableFunction);
-        writerResult.get();
+        wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
+            new UpdateHandler(upsertHandle), x -> x);
+        wrapper.execute();
      } catch (Exception e) {
        throw new HoodieException(e);
      } finally {
        reader.close();
        upsertHandle.close();
-        writerService.shutdownNow();
+        if (null != wrapper) {
+          wrapper.shutdownNow();
+        }
      }
    }

@@ -231,15 +213,6 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
        .iterator();
  }

-  private void handleWrite(final BufferedIterator<GenericRecord, GenericRecord> bufferedIterator,
-      final HoodieMergeHandle upsertHandle) {
-    while (bufferedIterator.hasNext()) {
-      final GenericRecordBufferedIteratorPayload payload = (GenericRecordBufferedIteratorPayload) bufferedIterator
-          .next();
-      upsertHandle.write(payload.getOutputPayload());
-    }
-  }
-
  protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileLoc,
      Iterator<HoodieRecord<T>> recordItr) {
    return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileLoc);
@@ -493,6 +466,32 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
    UPDATE, INSERT
  }

+  /**
+   * Consumer that dequeues records from queue and sends to Merge Handle
+   */
+  private static class UpdateHandler extends BoundedInMemoryQueueConsumer<GenericRecord, Void> {
+
+    private final HoodieMergeHandle upsertHandle;
+
+    private UpdateHandler(HoodieMergeHandle upsertHandle) {
+      this.upsertHandle = upsertHandle;
+    }
+
+    @Override
+    protected void consumeOneRecord(GenericRecord record) {
+      upsertHandle.write(record);
+    }
+
+    @Override
+    protected void finish() {
+    }
+
+    @Override
+    protected Void getResult() {
+      return null;
+    }
+  }
+
  private static class PartitionCleanStat implements Serializable {

    private final String partitionPath;
--- a/hoodie-client/src/test/java/com/uber/hoodie/func/TestBufferedIteratorExecutor.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/func/TestBufferedIteratorExecutor.java
@@ -16,39 +16,35 @@

 package com.uber.hoodie.func;

+import static com.uber.hoodie.func.LazyInsertIterable.getTransformFunction;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;

 import com.uber.hoodie.common.HoodieTestDataGenerator;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer;
 import com.uber.hoodie.config.HoodieWriteConfig;
 import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.function.Function;
+import java.util.Optional;
+import org.apache.avro.generic.IndexedRecord;
 import org.junit.After;
 import org.junit.Assert;
-import org.junit.Before;
 import org.junit.Test;
+import scala.Tuple2;

-public class TestBufferedIteratorExecutor {
+public class TestBoundedInMemoryExecutor {

  private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
  private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
-  private ExecutorService executorService = null;
-
-  @Before
-  public void beforeTest() {
-    this.executorService = Executors.newFixedThreadPool(1);
-  }
+  private SparkBoundedInMemoryExecutor<HoodieRecord,
+        Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer> executor = null;

  @After
  public void afterTest() {
-    if (this.executorService != null) {
-      this.executorService.shutdownNow();
-      this.executorService = null;
+    if (this.executor != null) {
+      this.executor.shutdownNow();
+      this.executor = null;
    }
  }

@@ -59,21 +55,32 @@ public class TestBufferedIteratorExecutor {

    HoodieWriteConfig hoodieWriteConfig = mock(HoodieWriteConfig.class);
    when(hoodieWriteConfig.getWriteBufferLimitBytes()).thenReturn(1024);
-    BufferedIteratorExecutor bufferedIteratorExecutor = new BufferedIteratorExecutor(hoodieWriteConfig,
-        hoodieRecords.iterator(), LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema),
-        executorService);
-    Function<BufferedIterator, Integer> function = (bufferedIterator) -> {
-      Integer count = 0;
-      while (bufferedIterator.hasNext()) {
-        count++;
-        bufferedIterator.next();
-      }
-      return count;
-    };
-    Future<Integer> future = bufferedIteratorExecutor.start(function);
+    BoundedInMemoryQueueConsumer<Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer> consumer =
+        new BoundedInMemoryQueueConsumer<Tuple2<HoodieRecord, Optional<IndexedRecord>>, Integer>() {
+
+          private int count = 0;
+
+          @Override
+          protected void consumeOneRecord(Tuple2<HoodieRecord, Optional<IndexedRecord>> record) {
+            count++;
+          }
+
+          @Override
+          protected void finish() {
+          }
+
+          @Override
+          protected Integer getResult() {
+            return count;
+          }
+        };
+
+    executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig,
+        hoodieRecords.iterator(), consumer, getTransformFunction(HoodieTestDataGenerator.avroSchema));
+    int result = executor.execute();
    // It should buffer and write 100 records
-    Assert.assertEquals((int) future.get(), 100);
+    Assert.assertEquals(result, 100);
    // There should be no remaining records in the buffer
-    Assert.assertFalse(bufferedIteratorExecutor.isRemaining());
+    Assert.assertFalse(executor.isRemaining());
  }
 }
--- a/hoodie-client/src/test/java/com/uber/hoodie/func/TestBoundedInMemoryQueue.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/func/TestBoundedInMemoryQueue.java
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.uber.hoodie.func;
+
+import static com.uber.hoodie.func.LazyInsertIterable.getTransformFunction;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.uber.hoodie.common.HoodieTestDataGenerator;
+import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
+import com.uber.hoodie.common.util.SizeEstimator;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueue;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueProducer;
+import com.uber.hoodie.common.util.queue.FunctionBasedQueueProducer;
+import com.uber.hoodie.common.util.queue.IteratorBasedQueueProducer;
+import com.uber.hoodie.exception.HoodieException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.Semaphore;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.commons.io.FileUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import scala.Tuple2;
+
+public class TestBoundedInMemoryQueue {
+
+  private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
+  private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
+  private ExecutorService executorService = null;
+
+  @Before
+  public void beforeTest() {
+    this.executorService = Executors.newFixedThreadPool(2);
+  }
+
+  @After
+  public void afterTest() {
+    if (this.executorService != null) {
+      this.executorService.shutdownNow();
+      this.executorService = null;
+    }
+  }
+
+  // Test to ensure that we are reading all records from queue iterator in the same order
+  // without any exceptions.
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 60000)
+  public void testRecordReading() throws Exception {
+    final int numRecords = 128;
+    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
+    final BoundedInMemoryQueue<HoodieRecord,
+        Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue = new BoundedInMemoryQueue(FileUtils.ONE_KB,
+        getTransformFunction(HoodieTestDataGenerator.avroSchema));
+    // Produce
+    Future<Boolean> resFuture =
+        executorService.submit(() -> {
+          new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
+          queue.close();
+          return true;
+        });
+    final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
+    int recordsRead = 0;
+    while (queue.iterator().hasNext()) {
+      final HoodieRecord originalRecord = originalRecordIterator.next();
+      final Optional<IndexedRecord> originalInsertValue = originalRecord.getData()
+          .getInsertValue(HoodieTestDataGenerator.avroSchema);
+      final Tuple2<HoodieRecord, Optional<IndexedRecord>> payload = queue.iterator().next();
+      // Ensure that record ordering is guaranteed.
+      Assert.assertEquals(originalRecord, payload._1());
+      // cached insert value matches the expected insert value.
+      Assert.assertEquals(originalInsertValue,
+          payload._1().getData().getInsertValue(HoodieTestDataGenerator.avroSchema));
+      recordsRead++;
+    }
+    Assert.assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
+    // all the records should be read successfully.
+    Assert.assertEquals(numRecords, recordsRead);
+    // should not throw any exceptions.
+    resFuture.get();
+  }
+
+  /**
+   * Test to ensure that we are reading all records from queue iterator when we have multiple producers
+   */
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 60000)
+  public void testCompositeProducerRecordReading() throws Exception {
+    final int numRecords = 1000;
+    final int numProducers = 40;
+    final List<List<HoodieRecord>> recs = new ArrayList<>();
+
+    final BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue =
+        new BoundedInMemoryQueue(FileUtils.ONE_KB, getTransformFunction(HoodieTestDataGenerator.avroSchema));
+
+    // Record Key to <Producer Index, Rec Index within a producer>
+    Map<String, Tuple2<Integer, Integer>> keyToProducerAndIndexMap = new HashMap<>();
+
+    for (int i = 0; i < numProducers; i++) {
+      List<HoodieRecord> pRecs = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
+      int j = 0;
+      for (HoodieRecord r : pRecs) {
+        Assert.assertTrue(!keyToProducerAndIndexMap.containsKey(r.getRecordKey()));
+        keyToProducerAndIndexMap.put(r.getRecordKey(), new Tuple2<>(i, j));
+        j++;
+      }
+      recs.add(pRecs);
+    }
+
+    List<BoundedInMemoryQueueProducer<HoodieRecord>> producers = new ArrayList<>();
+    for (int i = 0; i < recs.size(); i++) {
+      final List<HoodieRecord> r = recs.get(i);
+      // Alternate between pull and push based iterators
+      if (i % 2 == 0) {
+        producers.add(new IteratorBasedQueueProducer<>(r.iterator()));
+      } else {
+        producers.add(new FunctionBasedQueueProducer<HoodieRecord>((buf) -> {
+          Iterator<HoodieRecord> itr = r.iterator();
+          while (itr.hasNext()) {
+            try {
+              buf.insertRecord(itr.next());
+            } catch (Exception e) {
+              throw new HoodieException(e);
+            }
+          }
+          return true;
+        }));
+      }
+    }
+
+    final List<Future<Boolean>> futureList = producers.stream().map(producer -> {
+      return executorService.submit(() -> {
+        producer.produce(queue);
+        return true;
+      });
+    }).collect(Collectors.toList());
+
+    // Close queue
+    Future<Boolean> closeFuture = executorService.submit(() -> {
+      try {
+        for (Future f : futureList) {
+          f.get();
+        }
+        queue.close();
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+      return true;
+    });
+
+    // Used to ensure that consumer sees the records generated by a single producer in FIFO order
+    Map<Integer, Integer> lastSeenMap = IntStream.range(0, numProducers).boxed()
+        .collect(Collectors.toMap(Function.identity(), x -> -1));
+    Map<Integer, Integer> countMap = IntStream.range(0, numProducers).boxed()
+        .collect(Collectors.toMap(Function.identity(), x -> 0));
+
+    // Read recs and ensure we have covered all producer recs.
+    while (queue.iterator().hasNext()) {
+      final Tuple2<HoodieRecord, Optional<IndexedRecord>> payload = queue.iterator().next();
+      final HoodieRecord rec = payload._1();
+      Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
+      Integer lastSeenPos = lastSeenMap.get(producerPos._1());
+      countMap.put(producerPos._1(), countMap.get(producerPos._1()) + 1);
+      lastSeenMap.put(producerPos._1(), lastSeenPos + 1);
+      // Ensure we are seeing the next record generated
+      Assert.assertEquals(lastSeenPos + 1, producerPos._2().intValue());
+    }
+
+    for (int i = 0; i < numProducers; i++) {
+      // Ensure we have seen all the records for each producers
+      Assert.assertEquals(Integer.valueOf(numRecords), countMap.get(i));
+    }
+
+    //Ensure Close future is done
+    closeFuture.get();
+  }
+
+  // Test to ensure that record queueing is throttled when we hit memory limit.
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 60000)
+  public void testMemoryLimitForBuffering() throws Exception {
+    final int numRecords = 128;
+    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
+    // maximum number of records to keep in memory.
+    final int recordLimit = 5;
+    final SizeEstimator<Tuple2<HoodieRecord, Optional<IndexedRecord>>> sizeEstimator =
+        new DefaultSizeEstimator<>();
+    final long objSize = sizeEstimator.sizeEstimate(
+        getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0)));
+    final long memoryLimitInBytes = recordLimit * objSize;
+    final BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue =
+        new BoundedInMemoryQueue(memoryLimitInBytes,
+            getTransformFunction(HoodieTestDataGenerator.avroSchema));
+
+    // Produce
+    Future<Boolean> resFuture = executorService.submit(() -> {
+      new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue);
+      return true;
+    });
+    // waiting for permits to expire.
+    while (!isQueueFull(queue.rateLimiter)) {
+      Thread.sleep(10);
+    }
+    Assert.assertEquals(0, queue.rateLimiter.availablePermits());
+    Assert.assertEquals(recordLimit, queue.currentRateLimit);
+    Assert.assertEquals(recordLimit, queue.size());
+    Assert.assertEquals(recordLimit - 1, queue.samplingRecordCounter.get());
+
+    // try to read 2 records.
+    Assert.assertEquals(hoodieRecords.get(0), queue.iterator().next()._1());
+    Assert.assertEquals(hoodieRecords.get(1), queue.iterator().next()._1());
+
+    // waiting for permits to expire.
+    while (!isQueueFull(queue.rateLimiter)) {
+      Thread.sleep(10);
+    }
+    // No change is expected in rate limit or number of queued records. We only expect
+    // queueing thread to read
+    // 2 more records into the queue.
+    Assert.assertEquals(0, queue.rateLimiter.availablePermits());
+    Assert.assertEquals(recordLimit, queue.currentRateLimit);
+    Assert.assertEquals(recordLimit, queue.size());
+    Assert.assertEquals(recordLimit - 1 + 2, queue.samplingRecordCounter.get());
+  }
+
+  // Test to ensure that exception in either queueing thread or BufferedIterator-reader thread
+  // is propagated to
+  // another thread.
+  @SuppressWarnings("unchecked")
+  @Test(timeout = 60000)
+  public void testException() throws Exception {
+    final int numRecords = 256;
+    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
+    final SizeEstimator<Tuple2<HoodieRecord, Optional<IndexedRecord>>> sizeEstimator =
+        new DefaultSizeEstimator<>();
+    // queue memory limit
+    final long objSize = sizeEstimator.sizeEstimate(
+        getTransformFunction(HoodieTestDataGenerator.avroSchema).apply(hoodieRecords.get(0)));
+    final long memoryLimitInBytes = 4 * objSize;
+
+    // first let us throw exception from queueIterator reader and test that queueing thread
+    // stops and throws
+    // correct exception back.
+    BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue1 =
+        new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.avroSchema));
+
+    // Produce
+    Future<Boolean> resFuture = executorService.submit(() -> {
+      new IteratorBasedQueueProducer<>(hoodieRecords.iterator()).produce(queue1);
+      return true;
+    });
+
+    // waiting for permits to expire.
+    while (!isQueueFull(queue1.rateLimiter)) {
+      Thread.sleep(10);
+    }
+    // notify queueing thread of an exception and ensure that it exits.
+    final Exception e = new Exception("Failing it :)");
+    queue1.markAsFailed(e);
+    try {
+      resFuture.get();
+      Assert.fail("exception is expected");
+    } catch (ExecutionException e1) {
+      Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
+      Assert.assertEquals(e, e1.getCause().getCause());
+    }
+
+    // second let us raise an exception while doing record queueing. this exception should get
+    // propagated to
+    // queue iterator reader.
+    final RuntimeException expectedException = new RuntimeException("failing record reading");
+    final Iterator<HoodieRecord> mockHoodieRecordsIterator = mock(Iterator.class);
+    when(mockHoodieRecordsIterator.hasNext()).thenReturn(true);
+    when(mockHoodieRecordsIterator.next()).thenThrow(expectedException);
+    BoundedInMemoryQueue<HoodieRecord, Tuple2<HoodieRecord, Optional<IndexedRecord>>> queue2 =
+        new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.avroSchema));
+
+    // Produce
+    Future<Boolean> res = executorService.submit(() -> {
+      try {
+        new IteratorBasedQueueProducer<>(mockHoodieRecordsIterator).produce(queue2);
+      } catch (Exception ex) {
+        queue2.markAsFailed(ex);
+        throw ex;
+      }
+      return true;
+    });
+
+    try {
+      queue2.iterator().hasNext();
+      Assert.fail("exception is expected");
+    } catch (Exception e1) {
+      Assert.assertEquals(expectedException, e1.getCause());
+    }
+    // queueing thread should also have exited. make sure that it is not running.
+    try {
+      res.get();
+      Assert.fail("exception is expected");
+    } catch (ExecutionException e2) {
+      Assert.assertEquals(expectedException, e2.getCause());
+    }
+  }
+
+  private boolean isQueueFull(Semaphore rateLimiter) {
+    return (rateLimiter.availablePermits() == 0 && rateLimiter.hasQueuedThreads());
+  }
+}
--- a/hoodie-client/src/test/java/com/uber/hoodie/func/TestBufferedIterator.java
+++ b/hoodie-client/src/test/java/com/uber/hoodie/func/TestBufferedIterator.java
@@ -1,203 +0,0 @@
-/*
- * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *          http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.uber.hoodie.func;
-
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import com.uber.hoodie.common.HoodieTestDataGenerator;
-import com.uber.hoodie.common.model.HoodieRecord;
-import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
-import com.uber.hoodie.exception.HoodieException;
-import com.uber.hoodie.func.payload.AbstractBufferedIteratorPayload;
-import com.uber.hoodie.func.payload.HoodieRecordBufferedIteratorPayload;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Optional;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.Semaphore;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.commons.io.FileUtils;
-import org.apache.spark.util.SizeEstimator;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestBufferedIterator {
-
-  private final HoodieTestDataGenerator hoodieTestDataGenerator = new HoodieTestDataGenerator();
-  private final String commitTime = HoodieActiveTimeline.createNewCommitTime();
-  private ExecutorService recordReader = null;
-
-  @Before
-  public void beforeTest() {
-    this.recordReader = Executors.newFixedThreadPool(1);
-  }
-
-  @After
-  public void afterTest() {
-    if (this.recordReader != null) {
-      this.recordReader.shutdownNow();
-      this.recordReader = null;
-    }
-  }
-
-  // Test to ensure that we are reading all records from buffered iterator in the same order
-  // without any exceptions.
-  @Test(timeout = 60000)
-  public void testRecordReading() throws IOException, ExecutionException, InterruptedException {
-    final int numRecords = 128;
-    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
-    final BufferedIterator bufferedIterator = new BufferedIterator(hoodieRecords.iterator(), FileUtils.ONE_KB,
-        LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
-    Future<Boolean> result = recordReader.submit(() -> {
-      bufferedIterator.startBuffering();
-      return true;
-    });
-    final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
-    int recordsRead = 0;
-    while (bufferedIterator.hasNext()) {
-      final HoodieRecord originalRecord = originalRecordIterator.next();
-      final Optional<IndexedRecord> originalInsertValue = originalRecord.getData()
-          .getInsertValue(HoodieTestDataGenerator.avroSchema);
-      final HoodieRecordBufferedIteratorPayload payload = (HoodieRecordBufferedIteratorPayload) bufferedIterator.next();
-      // Ensure that record ordering is guaranteed.
-      Assert.assertEquals(originalRecord, payload.getInputPayload());
-      // cached insert value matches the expected insert value.
-      Assert.assertEquals(originalInsertValue,
-          ((HoodieRecord) payload.getInputPayload()).getData().getInsertValue(HoodieTestDataGenerator.avroSchema));
-      recordsRead++;
-    }
-    Assert.assertFalse(bufferedIterator.hasNext() || originalRecordIterator.hasNext());
-    // all the records should be read successfully.
-    Assert.assertEquals(numRecords, recordsRead);
-    // should not throw any exceptions.
-    Assert.assertTrue(result.get());
-  }
-
-  // Test to ensure that record buffering is throttled when we hit memory limit.
-  @Test(timeout = 60000)
-  public void testMemoryLimitForBuffering() throws IOException, InterruptedException {
-    final int numRecords = 128;
-    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
-    // maximum number of records to keep in memory.
-    final int recordLimit = 5;
-    final long memoryLimitInBytes = recordLimit * SizeEstimator.estimate(hoodieRecords.get(0));
-    final BufferedIterator<HoodieRecord, AbstractBufferedIteratorPayload> bufferedIterator =
-        new BufferedIterator(hoodieRecords.iterator(), memoryLimitInBytes,
-        LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
-    Future<Boolean> result = recordReader.submit(() -> {
-      bufferedIterator.startBuffering();
-      return true;
-    });
-    // waiting for permits to expire.
-    while (!isQueueFull(bufferedIterator.rateLimiter)) {
-      Thread.sleep(10);
-    }
-    Assert.assertEquals(0, bufferedIterator.rateLimiter.availablePermits());
-    Assert.assertEquals(recordLimit, bufferedIterator.currentRateLimit);
-    Assert.assertEquals(recordLimit, bufferedIterator.size());
-    Assert.assertEquals(recordLimit - 1, bufferedIterator.samplingRecordCounter.get());
-
-    // try to read 2 records.
-    Assert.assertEquals(hoodieRecords.get(0), bufferedIterator.next().getInputPayload());
-    Assert.assertEquals(hoodieRecords.get(1), bufferedIterator.next().getInputPayload());
-
-    // waiting for permits to expire.
-    while (!isQueueFull(bufferedIterator.rateLimiter)) {
-      Thread.sleep(10);
-    }
-    // No change is expected in rate limit or number of buffered records. We only expect
-    // buffering thread to read
-    // 2 more records into the buffer.
-    Assert.assertEquals(0, bufferedIterator.rateLimiter.availablePermits());
-    Assert.assertEquals(recordLimit, bufferedIterator.currentRateLimit);
-    Assert.assertEquals(recordLimit, bufferedIterator.size());
-    Assert.assertEquals(recordLimit - 1 + 2, bufferedIterator.samplingRecordCounter.get());
-  }
-
-  // Test to ensure that exception in either buffering thread or BufferedIterator-reader thread
-  // is propagated to
-  // another thread.
-  @Test(timeout = 60000)
-  public void testException() throws IOException, InterruptedException {
-    final int numRecords = 256;
-    final List<HoodieRecord> hoodieRecords = hoodieTestDataGenerator.generateInserts(commitTime, numRecords);
-    // buffer memory limit
-    final long memoryLimitInBytes = 4 * SizeEstimator.estimate(hoodieRecords.get(0));
-
-    // first let us throw exception from bufferIterator reader and test that buffering thread
-    // stops and throws
-    // correct exception back.
-    BufferedIterator bufferedIterator1 = new BufferedIterator(hoodieRecords.iterator(), memoryLimitInBytes,
-        LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
-    Future<Boolean> result = recordReader.submit(() -> {
-      bufferedIterator1.startBuffering();
-      return true;
-    });
-    // waiting for permits to expire.
-    while (!isQueueFull(bufferedIterator1.rateLimiter)) {
-      Thread.sleep(10);
-    }
-    // notify buffering thread of an exception and ensure that it exits.
-    final Exception e = new Exception("Failing it :)");
-    bufferedIterator1.markAsFailed(e);
-    try {
-      result.get();
-      Assert.fail("exception is expected");
-    } catch (ExecutionException e1) {
-      Assert.assertEquals(HoodieException.class, e1.getCause().getClass());
-      Assert.assertEquals(e, e1.getCause().getCause());
-    }
-
-    // second let us raise an exception while doing record buffering. this exception should get
-    // propagated to
-    // buffered iterator reader.
-    final RuntimeException expectedException = new RuntimeException("failing record reading");
-    final Iterator<HoodieRecord> mockHoodieRecordsIterator = mock(Iterator.class);
-    when(mockHoodieRecordsIterator.hasNext()).thenReturn(true);
-    when(mockHoodieRecordsIterator.next()).thenThrow(expectedException);
-    BufferedIterator bufferedIterator2 = new BufferedIterator(mockHoodieRecordsIterator, memoryLimitInBytes,
-        LazyInsertIterable.bufferedItrPayloadTransform(HoodieTestDataGenerator.avroSchema));
-    Future<Boolean> result2 = recordReader.submit(() -> {
-      bufferedIterator2.startBuffering();
-      return true;
-    });
-    try {
-      bufferedIterator2.hasNext();
-      Assert.fail("exception is expected");
-    } catch (Exception e1) {
-      Assert.assertEquals(expectedException, e1.getCause());
-    }
-    // buffering thread should also have exited. make sure that it is not running.
-    try {
-      result2.get();
-      Assert.fail("exception is expected");
-    } catch (ExecutionException e2) {
-      Assert.assertEquals(expectedException, e2.getCause());
-    }
-  }
-
-  private boolean isQueueFull(Semaphore rateLimiter) {
-    return (rateLimiter.availablePermits() == 0 && rateLimiter.hasQueuedThreads());
-  }
-}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieCompactedLogRecordScanner.java
@@ -19,7 +19,6 @@ package com.uber.hoodie.common.table.log;
 import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HeaderMetadataType.INSTANT_TIME;
 import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;

-import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieLogFile;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
@@ -29,19 +28,14 @@ import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
 import com.uber.hoodie.common.table.log.block.HoodieCommandBlock;
 import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
 import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
-import com.uber.hoodie.common.util.HoodieTimer;
 import com.uber.hoodie.common.util.SpillableMapUtils;
-import com.uber.hoodie.common.util.collection.ExternalSpillableMap;
-import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
-import com.uber.hoodie.common.util.collection.converter.StringConverter;
 import com.uber.hoodie.exception.HoodieIOException;
-import java.io.IOException;
 import java.util.ArrayDeque;
 import java.util.Arrays;
 import java.util.Deque;
-import java.util.Iterator;
+import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 import org.apache.avro.Schema;
@@ -53,24 +47,38 @@ import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;

 /**
- * Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
- * be used as a lookup table when merging the base columnar file with the redo log file. NOTE:  If readBlockLazily is
+ * Implements logic to scan log blocks and expose valid and deleted log records to subclass implementation.
+ * Subclass is free to either apply merging or expose raw data back to the caller.
+ *
+ * NOTE:  If readBlockLazily is
 * turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
 * avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
 * forward seek()) during merge |            | Read Block 1 Metadata |            | Read Block 1 Data | | | Read Block 2
 * Metadata |            | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
 * | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
 */
+public abstract class AbstractHoodieLogRecordScanner {

-public class HoodieCompactedLogRecordScanner implements
-    Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
+  private static final Logger log = LogManager.getLogger(AbstractHoodieLogRecordScanner.class);

-  private static final Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
-
-  // Final map of compacted/merged records
-  private final ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records;
  // Reader schema for the records
  private final Schema readerSchema;
+  // Latest valid instant time
+  private final String latestInstantTime;
+  private final HoodieTableMetaClient hoodieTableMetaClient;
+  // Merge strategy to use when combining records from log
+  private final String payloadClassFQN;
+  // Log File Paths
+  private final List<String> logFilePaths;
+  // Read Lazily flag
+  private final boolean readBlocksLazily;
+  // Reverse reader - Not implemented yet (NA -> Why do we need ?)
+  // but present here for plumbing for future implementation
+  private final boolean reverseReader;
+  // Buffer Size for log file reader
+  private final int bufferSize;
+  // FileSystem
+  private final FileSystem fs;
  // Total log files read - for metrics
  private AtomicLong totalLogFiles = new AtomicLong(0);
  // Total log blocks read - for metrics
@@ -81,46 +89,47 @@ public class HoodieCompactedLogRecordScanner implements
  private AtomicLong totalRollbacks = new AtomicLong(0);
  // Total number of corrupt blocks written across all log files
  private AtomicLong totalCorruptBlocks = new AtomicLong(0);
-  // Total final list of compacted/merged records
-  private long totalRecordsToUpdate;
-  // Latest valid instant time
-  private String latestInstantTime;
-  private HoodieTableMetaClient hoodieTableMetaClient;
-  // Merge strategy to use when combining records from log
-  private String payloadClassFQN;
  // Store the last instant log blocks (needed to implement rollback)
  private Deque<HoodieLogBlock> currentInstantLogBlocks = new ArrayDeque<>();
-  // Stores the total time taken to perform reading and merging of log blocks
-  private long totalTimeTakenToReadAndMergeBlocks = 0L;
-  // A timer for calculating elapsed time in millis
-  public HoodieTimer timer = new HoodieTimer();

-  public HoodieCompactedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
-      Schema readerSchema, String latestInstantTime, Long maxMemorySizeInBytes,
-      boolean readBlocksLazily, boolean reverseReader, int bufferSize, String spillableMapBasePath) {
+  // Progress
+  private float progress = 0.0f;
+
+  public AbstractHoodieLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
+      Schema readerSchema, String latestInstantTime,
+      boolean readBlocksLazily, boolean reverseReader, int bufferSize) {
    this.readerSchema = readerSchema;
    this.latestInstantTime = latestInstantTime;
    this.hoodieTableMetaClient = new HoodieTableMetaClient(fs.getConf(), basePath);
    // load class from the payload fully qualified class name
    this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
    this.totalLogFiles.addAndGet(logFilePaths.size());
-    timer.startTimer();
+    this.logFilePaths = logFilePaths;
+    this.readBlocksLazily = readBlocksLazily;
+    this.reverseReader = reverseReader;
+    this.fs = fs;
+    this.bufferSize = bufferSize;
+  }

+  /**
+   * Scan Log files
+   */
+  public void scan() {
    try {
-      // Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
-      this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath,
-          new StringConverter(), new HoodieRecordConverter(readerSchema, payloadClassFQN));
      // iterate over the paths
      HoodieLogFormatReader logFormatReaderWrapper =
          new HoodieLogFormatReader(fs,
              logFilePaths.stream().map(logFile -> new HoodieLogFile(new Path(logFile)))
                  .collect(Collectors.toList()), readerSchema, readBlocksLazily, reverseReader, bufferSize);
-      HoodieLogFile logFile;
+      Set<HoodieLogFile> scannedLogFiles = new HashSet<>();
      while (logFormatReaderWrapper.hasNext()) {
-        logFile = logFormatReaderWrapper.getLogFile();
+        HoodieLogFile logFile = logFormatReaderWrapper.getLogFile();
        log.info("Scanning log file " + logFile);
+        scannedLogFiles.add(logFile);
+        totalLogFiles.set(scannedLogFiles.size());
        // Use the HoodieLogFileReader to iterate through the blocks in the log file
        HoodieLogBlock r = logFormatReaderWrapper.next();
+        totalLogBlocks.incrementAndGet();
        if (r.getBlockType() != CORRUPT_BLOCK
            && !HoodieTimeline.compareTimestamps(r.getLogBlockHeader().get(INSTANT_TIME),
            this.latestInstantTime,
@@ -134,7 +143,7 @@ public class HoodieCompactedLogRecordScanner implements
            if (isNewInstantBlock(r) && !readBlocksLazily) {
              // If this is an avro data block belonging to a different commit/instant,
              // then merge the last blocks and records into the main result
-              merge(records, currentInstantLogBlocks);
+              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size());
            }
            // store the current block
            currentInstantLogBlocks.push(r);
@@ -144,7 +153,7 @@ public class HoodieCompactedLogRecordScanner implements
            if (isNewInstantBlock(r) && !readBlocksLazily) {
              // If this is a delete data block belonging to a different commit/instant,
              // then merge the last blocks and records into the main result
-              merge(records, currentInstantLogBlocks);
+              processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size());
            }
            // store deletes so can be rolled back
            currentInstantLogBlocks.push(r);
@@ -208,7 +217,6 @@ public class HoodieCompactedLogRecordScanner implements
                break;
              default:
                throw new UnsupportedOperationException("Command type not yet supported.");
-
            }
            break;
          case CORRUPT_BLOCK:
@@ -224,19 +232,14 @@ public class HoodieCompactedLogRecordScanner implements
      // merge the last read block when all the blocks are done reading
      if (!currentInstantLogBlocks.isEmpty()) {
        log.info("Merging the final data blocks");
-        merge(records, currentInstantLogBlocks);
+        processQueuedBlocksForInstant(currentInstantLogBlocks, scannedLogFiles.size());
      }
-    } catch (IOException e) {
+      // Done
+      progress = 1.0f;
+    } catch (Exception e) {
+      log.error("Got exception when reading log file", e);
      throw new HoodieIOException("IOException when reading log file ");
    }
-    this.totalRecordsToUpdate = records.size();
-    this.totalTimeTakenToReadAndMergeBlocks = timer.endTimer();
-    log.info("MaxMemoryInBytes allowed for compaction => " + maxMemorySizeInBytes);
-    log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records.getInMemoryMapNumEntries());
-    log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records.getCurrentInMemoryMapSize());
-    log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records.getDiskBasedMapNumEntries());
-    log.info("Size of file spilled to disk => " + records.getSizeOfFileOnDiskInBytes());
-    log.debug("Total time taken for scanning and compacting log files => " + totalTimeTakenToReadAndMergeBlocks);
  }

  /**
@@ -250,66 +253,69 @@ public class HoodieCompactedLogRecordScanner implements
  }

  /**
-   * Iterate over the GenericRecord in the block, read the hoodie key and partition path and merge with the application
-   * specific payload if the same key was found before. Sufficient to just merge the log records since the base data is
-   * merged on previous compaction. Finally, merge this log block with the accumulated records
+   * Iterate over the GenericRecord in the block, read the hoodie key and partition path and
+   * call subclass processors to handle it.
   */
-  private Map<String, HoodieRecord<? extends HoodieRecordPayload>> merge(
-      HoodieAvroDataBlock dataBlock) throws IOException {
-    // TODO (NA) - Implemnt getRecordItr() in HoodieAvroDataBlock and use that here
+  private void processAvroDataBlock(HoodieAvroDataBlock dataBlock) throws Exception {
+    // TODO (NA) - Implement getRecordItr() in HoodieAvroDataBlock and use that here
    List<IndexedRecord> recs = dataBlock.getRecords();
    totalLogRecords.addAndGet(recs.size());
-    recs.forEach(rec -> {
-      String key = ((GenericRecord) rec).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
-          .toString();
+    for (IndexedRecord rec : recs) {
      HoodieRecord<? extends HoodieRecordPayload> hoodieRecord =
          SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN);
-      if (records.containsKey(key)) {
-        // Merge and store the merged record
-        HoodieRecordPayload combinedValue = records.get(key).getData()
-            .preCombine(hoodieRecord.getData());
-        records
-            .put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
-                combinedValue));
-      } else {
-        // Put the record as is
-        records.put(key, hoodieRecord);
-      }
-    });
-    return records;
+      processNextRecord(hoodieRecord);
+    }
  }

  /**
-   * Merge the last seen log blocks with the accumulated records
+   * Process next record
+   *
+   * @param hoodieRecord Hoodie Record to process
   */
-  private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
-      Deque<HoodieLogBlock> lastBlocks) throws IOException {
+  protected abstract void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord)
+      throws Exception;
+
+  /**
+   * Process next deleted key
+   *
+   * @param key Deleted record key
+   */
+  protected abstract void processNextDeletedKey(String key);
+
+  /**
+   * Process the set of log blocks belonging to the last instant which is read fully.
+   */
+  private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> lastBlocks, int numLogFilesSeen)
+      throws Exception {
    while (!lastBlocks.isEmpty()) {
      log.info("Number of remaining logblocks to merge " + lastBlocks.size());
      // poll the element at the bottom of the stack since that's the order it was inserted
      HoodieLogBlock lastBlock = lastBlocks.pollLast();
      switch (lastBlock.getBlockType()) {
        case AVRO_DATA_BLOCK:
-          merge((HoodieAvroDataBlock) lastBlock);
+          processAvroDataBlock((HoodieAvroDataBlock) lastBlock);
          break;
        case DELETE_BLOCK:
          // TODO : If delete is the only block written and/or records are present in parquet file
          // TODO : Mark as tombstone (optional.empty()) for data instead of deleting the entry
-          Arrays.stream(((HoodieDeleteBlock) lastBlock).getKeysToDelete()).forEach(records::remove);
+          Arrays.stream(((HoodieDeleteBlock) lastBlock).getKeysToDelete()).forEach(this::processNextDeletedKey);
          break;
        case CORRUPT_BLOCK:
          log.warn("Found a corrupt block which was not rolled back");
          break;
        default:
-          //TODO <vb> : Need to understand if COMMAND_BLOCK has to be handled?
          break;
      }
    }
+    // At this step the lastBlocks are consumed. We track approximate progress by number of log-files seen
+    progress = numLogFilesSeen - 1 / logFilePaths.size();
  }

-  @Override
-  public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
-    return records.iterator();
+  /**
+   * Return progress of scanning as a float between 0.0 to 1.0
+   */
+  public float getProgress() {
+    return progress;
  }

  public long getTotalLogFiles() {
@@ -324,12 +330,8 @@ public class HoodieCompactedLogRecordScanner implements
    return totalLogBlocks.get();
  }

-  public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
-    return records;
-  }
-
-  public long getTotalRecordsToUpdate() {
-    return totalRecordsToUpdate;
+  protected String getPayloadClassFQN() {
+    return payloadClassFQN;
  }

  public long getTotalRollbacks() {
@@ -339,9 +341,4 @@ public class HoodieCompactedLogRecordScanner implements
  public long getTotalCorruptBlocks() {
    return totalCorruptBlocks.get();
  }
-
-  public long getTotalTimeTakenToReadAndMergeBlocks() {
-    return totalTimeTakenToReadAndMergeBlocks;
-  }
 }
-
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieMergedLogRecordScanner.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieMergedLogRecordScanner.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.uber.hoodie.common.table.log;
+
+import com.uber.hoodie.common.model.HoodieKey;
+import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
+import com.uber.hoodie.common.util.HoodieRecordSizeEstimator;
+import com.uber.hoodie.common.util.HoodieTimer;
+import com.uber.hoodie.common.util.collection.ExternalSpillableMap;
+import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
+import com.uber.hoodie.common.util.collection.converter.StringConverter;
+import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of records which will
+ * be used as a lookup table when merging the base columnar file with the redo log file.
+ *
+ * NOTE:  If readBlockLazily is
+ * turned on, does not merge, instead keeps reading log blocks and merges everything at once This is an optimization to
+ * avoid seek() back and forth to read new block (forward seek()) and lazily read content of seen block (reverse and
+ * forward seek()) during merge |            | Read Block 1 Metadata |            | Read Block 1 Data | | | Read Block 2
+ * Metadata |            | Read Block 2 Data | | I/O Pass 1 | ..................... | I/O Pass 2 | ................. | |
+ * | Read Block N Metadata | | Read Block N Data | <p> This results in two I/O passes over the log file.
+ */
+
+public class HoodieMergedLogRecordScanner extends AbstractHoodieLogRecordScanner
+    implements Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
+
+  private static final Logger log = LogManager.getLogger(HoodieMergedLogRecordScanner.class);
+
+  // Final map of compacted/merged records
+  private final ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records;
+
+  // count of merged records in log
+  private long numMergedRecordsInLog;
+
+  // Stores the total time taken to perform reading and merging of log blocks
+  private final long totalTimeTakenToReadAndMergeBlocks;
+  // A timer for calculating elapsed time in millis
+  public final HoodieTimer timer = new HoodieTimer();
+
+  @SuppressWarnings("unchecked")
+  public HoodieMergedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
+      Schema readerSchema, String latestInstantTime, Long maxMemorySizeInBytes,
+      boolean readBlocksLazily, boolean reverseReader, int bufferSize, String spillableMapBasePath) {
+    super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
+    try {
+      // Store merged records for all versions for this log file, set the in-memory footprint to maxInMemoryMapSize
+      this.records = new ExternalSpillableMap<>(maxMemorySizeInBytes, spillableMapBasePath,
+          new StringConverter(), new HoodieRecordConverter(readerSchema, getPayloadClassFQN()),
+          new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(readerSchema));
+      // Do the scan and merge
+      timer.startTimer();
+      scan();
+      this.totalTimeTakenToReadAndMergeBlocks = timer.endTimer();
+      this.numMergedRecordsInLog = records.size();
+      log.info("MaxMemoryInBytes allowed for compaction => " + maxMemorySizeInBytes);
+      log.info("Number of entries in MemoryBasedMap in ExternalSpillableMap => " + records
+          .getInMemoryMapNumEntries());
+      log.info("Total size in bytes of MemoryBasedMap in ExternalSpillableMap => " + records
+          .getCurrentInMemoryMapSize());
+      log.info("Number of entries in DiskBasedMap in ExternalSpillableMap => " + records
+          .getDiskBasedMapNumEntries());
+      log.info("Size of file spilled to disk => " + records.getSizeOfFileOnDiskInBytes());
+    } catch (IOException e) {
+      throw new HoodieIOException("IOException when reading log file ");
+    }
+  }
+
+  @Override
+  public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
+    return records.iterator();
+  }
+
+  public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
+    return records;
+  }
+
+  public long getNumMergedRecordsInLog() {
+    return numMergedRecordsInLog;
+  }
+
+  @Override
+  protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) {
+    String key = hoodieRecord.getRecordKey();
+    if (records.containsKey(key)) {
+      // Merge and store the merged record
+      HoodieRecordPayload combinedValue = records.get(key).getData().preCombine(hoodieRecord.getData());
+      records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue));
+    } else {
+      // Put the record as is
+      records.put(key, hoodieRecord);
+    }
+  }
+
+  @Override
+  protected void processNextDeletedKey(String key) {
+    // TODO : If delete is the only block written and/or records are present in parquet file
+    // TODO : Mark as tombstone (optional.empty()) for data instead of deleting the entry
+    records.remove(key);
+  }
+
+  public long getTotalTimeTakenToReadAndMergeBlocks() {
+    return totalTimeTakenToReadAndMergeBlocks;
+  }
+}
+
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieUnMergedLogRecordScanner.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/HoodieUnMergedLogRecordScanner.java
@@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.common.table.log;
+
+import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import java.util.List;
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.FileSystem;
+
+public class HoodieUnMergedLogRecordScanner extends AbstractHoodieLogRecordScanner {
+
+  private final LogRecordScannerCallback callback;
+
+  public HoodieUnMergedLogRecordScanner(FileSystem fs, String basePath,
+      List<String> logFilePaths, Schema readerSchema, String latestInstantTime,
+      boolean readBlocksLazily, boolean reverseReader, int bufferSize,
+      LogRecordScannerCallback callback) {
+    super(fs, basePath, logFilePaths, readerSchema, latestInstantTime, readBlocksLazily, reverseReader, bufferSize);
+    this.callback = callback;
+  }
+
+  @Override
+  protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) throws Exception {
+    // Just call callback without merging
+    callback.apply(hoodieRecord);
+  }
+
+  @Override
+  protected void processNextDeletedKey(String key) {
+    throw new IllegalStateException("Not expected to see delete records in this log-scan mode. Check Job Config");
+  }
+
+  @FunctionalInterface
+  public static interface LogRecordScannerCallback {
+
+    public void apply(HoodieRecord<? extends HoodieRecordPayload> record) throws Exception;
+  }
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/table/log/block/HoodieLogBlock.java
@@ -18,6 +18,7 @@ package com.uber.hoodie.common.table.log.block;

 import com.google.common.collect.Maps;
 import com.uber.hoodie.common.model.HoodieLogFile;
+import com.uber.hoodie.common.table.log.HoodieMergedLogRecordScanner;
 import com.uber.hoodie.exception.HoodieException;
 import com.uber.hoodie.exception.HoodieIOException;
 import java.io.ByteArrayOutputStream;
@@ -219,7 +220,7 @@ public abstract class HoodieLogBlock {

  /**
   * Read or Skip block content of a log block in the log file. Depends on lazy reading enabled in
-   * {@link com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner}
+   * {@link HoodieMergedLogRecordScanner}
   */
  public static byte[] readOrSkipContent(FSDataInputStream inputStream,
      Integer contentLength, boolean readBlockLazily) throws IOException {
--- a/hoodie-client/src/main/java/com/uber/hoodie/func/payload/GenericRecordBufferedIteratorPayload.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/func/payload/GenericRecordBufferedIteratorPayload.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,18 +14,18 @@
 * limitations under the License.
 */

-package com.uber.hoodie.func.payload;
+package com.uber.hoodie.common.util;

-import org.apache.avro.generic.GenericRecord;
+import com.twitter.common.objectsize.ObjectSizeCalculator;

 /**
- * BufferedIteratorPayload that takes GenericRecord as input and GenericRecord as output
+ * Default implementation of size-estimator that uses Twitter's ObjectSizeCalculator
+ * @param <T>
 */
-public class GenericRecordBufferedIteratorPayload
-    extends AbstractBufferedIteratorPayload<GenericRecord, GenericRecord> {
+public class DefaultSizeEstimator<T> implements SizeEstimator<T> {

-  public GenericRecordBufferedIteratorPayload(GenericRecord record) {
-    super(record);
-    this.outputPayload = record;
+  @Override
+  public long sizeEstimate(T t)  {
+    return ObjectSizeCalculator.getObjectSize(t);
  }
 }
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieRecordSizeEstimator.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.uber.hoodie.common.util;
+
+import com.twitter.common.objectsize.ObjectSizeCalculator;
+import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import org.apache.avro.Schema;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Size Estimator for Hoodie record payload
+ * @param <T>
+ */
+public class HoodieRecordSizeEstimator<T extends HoodieRecordPayload> implements SizeEstimator<HoodieRecord<T>> {
+
+  private static Logger log = LogManager.getLogger(HoodieRecordSizeEstimator.class);
+
+  // Schema used to get GenericRecord from HoodieRecordPayload then convert to bytes and vice-versa
+  private final Schema schema;
+
+  public HoodieRecordSizeEstimator(Schema schema) {
+    this.schema = schema;
+  }
+
+  @Override
+  public long sizeEstimate(HoodieRecord<T> hoodieRecord) {
+    // Most HoodieRecords are bound to have data + schema. Although, the same schema object is shared amongst
+    // all records in the JVM. Calculate and print the size of the Schema and of the Record to
+    // note the sizes and differences. A correct estimation in such cases is handled in
+    /** {@link com.uber.hoodie.common.util.collection.ExternalSpillableMap} **/
+    long sizeOfRecord = ObjectSizeCalculator.getObjectSize(hoodieRecord);
+    long sizeOfSchema = ObjectSizeCalculator.getObjectSize(schema);
+    log.info("SizeOfRecord => " + sizeOfRecord + " SizeOfSchema => " + sizeOfSchema);
+    return sizeOfRecord;
+  }
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/SizeEstimator.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/SizeEstimator.java
@@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package com.uber.hoodie.common.util;
+
+/**
+ * An interface to estimate the size of payload in memory
+ * @param <T>
+ */
+public interface SizeEstimator<T> {
+
+  /**
+   * This method is used to estimate the size of a payload in memory.
+   * The default implementation returns the total allocated size, in bytes, of the object
+   * and all other objects reachable from it
+   */
+  long sizeEstimate(T t);
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/SpillableMapUtils.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/SpillableMapUtils.java
@@ -20,7 +20,6 @@ import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.util.collection.DiskBasedMap;
-import com.uber.hoodie.common.util.collection.converter.Converter;
 import com.uber.hoodie.common.util.collection.io.storage.SizeAwareDataOutputStream;
 import com.uber.hoodie.exception.HoodieCorruptedDataException;
 import java.io.IOException;
@@ -99,8 +98,8 @@ public class SpillableMapUtils {
   * Compute a bytes representation of the payload by serializing the contents This is used to estimate the size of the
   * payload (either in memory or when written to disk)
   */
-  public static <R> long computePayloadSize(R value, Converter<R> valueConverter) throws IOException {
-    return valueConverter.sizeEstimate(value);
+  public static <R> long computePayloadSize(R value, SizeEstimator<R> valueSizeEstimator) throws IOException {
+    return valueSizeEstimator.sizeEstimate(value);
  }

  /**
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/ExternalSpillableMap.java
@@ -17,6 +17,7 @@
 package com.uber.hoodie.common.util.collection;

 import com.twitter.common.objectsize.ObjectSizeCalculator;
+import com.uber.hoodie.common.util.SizeEstimator;
 import com.uber.hoodie.common.util.collection.converter.Converter;
 import com.uber.hoodie.exception.HoodieNotSupportedException;
 import java.io.IOException;
@@ -56,6 +57,10 @@ public class ExternalSpillableMap<T, R> implements Map<T, R> {
  private final Converter<T> keyConverter;
  // Value converter to convert value type to bytes
  private final Converter<R> valueConverter;
+  // Size Estimator for key type
+  private final SizeEstimator<T> keySizeEstimator;
+  // Size Estimator for key types
+  private final SizeEstimator<R> valueSizeEstimator;
  // current space occupied by this map in-memory
  private Long currentInMemoryMapSize;
  // An estimate of the size of each payload written to this map
@@ -64,7 +69,8 @@ public class ExternalSpillableMap<T, R> implements Map<T, R> {
  private boolean shouldEstimatePayloadSize = true;

  public ExternalSpillableMap(Long maxInMemorySizeInBytes, String baseFilePath,
-      Converter<T> keyConverter, Converter<R> valueConverter) throws IOException {
+      Converter<T> keyConverter, Converter<R> valueConverter,
+      SizeEstimator<T> keySizeEstimator, SizeEstimator<R> valueSizeEstimator) throws IOException {
    this.inMemoryMap = new HashMap<>();
    this.diskBasedMap = new DiskBasedMap<>(baseFilePath, keyConverter, valueConverter);
    this.maxInMemorySizeInBytes = (long) Math
@@ -72,6 +78,8 @@ public class ExternalSpillableMap<T, R> implements Map<T, R> {
    this.currentInMemoryMapSize = 0L;
    this.keyConverter = keyConverter;
    this.valueConverter = valueConverter;
+    this.keySizeEstimator = keySizeEstimator;
+    this.valueSizeEstimator = valueSizeEstimator;
  }

  /**
@@ -146,7 +154,7 @@ public class ExternalSpillableMap<T, R> implements Map<T, R> {
        // At first, use the sizeEstimate of a record being inserted into the spillable map.
        // Note, the converter may over estimate the size of a record in the JVM
        this.estimatedPayloadSize =
-            keyConverter.sizeEstimate(key) + valueConverter.sizeEstimate(value);
+            keySizeEstimator.sizeEstimate(key) + valueSizeEstimator.sizeEstimate(value);
        log.info("Estimated Payload size => " + estimatedPayloadSize);
      } else if (shouldEstimatePayloadSize
          && inMemoryMap.size() % NUMBER_OF_RECORDS_TO_ESTIMATE_PAYLOAD_SIZE == 0) {
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/Converter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/Converter.java
@@ -31,9 +31,4 @@ public interface Converter<T> {
   * This method is used to convert the serialized payload (in bytes) to the actual payload instance
   */
  T getData(byte[] bytes);
-
-  /**
-   * This method is used to estimate the size of a payload in memory
-   */
-  long sizeEstimate(T t);
 }
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/HoodieRecordConverter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/HoodieRecordConverter.java
@@ -16,7 +16,6 @@

 package com.uber.hoodie.common.util.collection.converter;

-import com.twitter.common.objectsize.ObjectSizeCalculator;
 import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
@@ -87,16 +86,4 @@ public class HoodieRecordConverter<V> implements
      throw new HoodieNotSerializableException("Cannot de-serialize value from bytes", io);
    }
  }
-
-  @Override
-  public long sizeEstimate(HoodieRecord<? extends HoodieRecordPayload> hoodieRecord) {
-    // Most HoodieRecords are bound to have data + schema. Although, the same schema object is shared amongst
-    // all records in the JVM. Calculate and print the size of the Schema and of the Record to
-    // note the sizes and differences. A correct estimation in such cases is handled in
-    /** {@link com.uber.hoodie.common.util.collection.ExternalSpillableMap} **/
-    long sizeOfRecord = ObjectSizeCalculator.getObjectSize(hoodieRecord);
-    long sizeOfSchema = ObjectSizeCalculator.getObjectSize(schema);
-    log.info("SizeOfRecord => " + sizeOfRecord + " SizeOfSchema => " + sizeOfSchema);
-    return sizeOfRecord;
-  }
 }
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/StringConverter.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/collection/converter/StringConverter.java
@@ -16,7 +16,6 @@

 package com.uber.hoodie.common.util.collection.converter;

-import com.twitter.common.objectsize.ObjectSizeCalculator;
 import java.nio.charset.StandardCharsets;

 /**
@@ -33,9 +32,4 @@ public class StringConverter implements Converter<String> {
  public String getData(byte[] bytes) {
    return new String(bytes);
  }
-
-  @Override
-  public long sizeEstimate(String s) {
-    return ObjectSizeCalculator.getObjectSize(s);
-  }
 }
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryExecutor.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryExecutor.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *          http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package com.uber.hoodie.common.util.queue;
+
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
+import com.uber.hoodie.common.util.SizeEstimator;
+import com.uber.hoodie.exception.HoodieException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import org.apache.commons.lang3.concurrent.ConcurrentUtils;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Executor which orchestrates concurrent producers and consumers communicating through a bounded in-memory queue.
+ * This class takes as input the size limit, queue producer(s), consumer and transformer
+ * and exposes API to orchestrate concurrent execution of these actors communicating through a central bounded queue
+ */
+public class BoundedInMemoryExecutor<I, O, E> {
+
+  private static Logger logger = LogManager.getLogger(BoundedInMemoryExecutor.class);
+
+  // Executor service used for launching writer thread.
+  private final ExecutorService executorService;
+  // Used for buffering records which is controlled by HoodieWriteConfig#WRITE_BUFFER_LIMIT_BYTES.
+  private final BoundedInMemoryQueue<I, O> queue;
+  // Producers
+  private final List<BoundedInMemoryQueueProducer<I>> producers;
+  // Consumer
+  private final Optional<BoundedInMemoryQueueConsumer<O, E>> consumer;
+
+  public BoundedInMemoryExecutor(final long bufferLimitInBytes,
+      BoundedInMemoryQueueProducer<I> producer,
+      Optional<BoundedInMemoryQueueConsumer<O, E>> consumer,
+      final Function<I, O> transformFunction) {
+    this(bufferLimitInBytes, Arrays.asList(producer), consumer, transformFunction, new DefaultSizeEstimator<>());
+  }
+
+  public BoundedInMemoryExecutor(final long bufferLimitInBytes,
+      List<BoundedInMemoryQueueProducer<I>> producers,
+      Optional<BoundedInMemoryQueueConsumer<O, E>> consumer,
+      final Function<I, O> transformFunction,
+      final SizeEstimator<O> sizeEstimator) {
+    this.producers = producers;
+    this.consumer = consumer;
+    // Ensure single thread for each producer thread and one for consumer
+    this.executorService = Executors.newFixedThreadPool(producers.size() + 1);
+    this.queue = new BoundedInMemoryQueue<>(bufferLimitInBytes, transformFunction, sizeEstimator);
+  }
+
+  /**
+   * Callback to implement environment specific behavior before executors (producers/consumer)
+   * run.
+   */
+  public void preExecute() {
+    // Do Nothing in general context
+  }
+
+  /**
+   * Start all Producers
+   */
+  public ExecutorCompletionService<Boolean> startProducers() {
+    // Latch to control when and which producer thread will close the queue
+    final CountDownLatch latch = new CountDownLatch(producers.size());
+    final ExecutorCompletionService<Boolean> completionService =
+        new ExecutorCompletionService<Boolean>(executorService);
+    producers.stream().map(producer -> {
+      return completionService.submit(() -> {
+        try {
+          preExecute();
+          producer.produce(queue);
+        } catch (Exception e) {
+          logger.error("error consuming records", e);
+          queue.markAsFailed(e);
+          throw e;
+        } finally {
+          synchronized (latch) {
+            latch.countDown();
+            if (latch.getCount() == 0) {
+              // Mark production as done so that consumer will be able to exit
+              queue.close();
+            }
+          }
+        }
+        return true;
+      });
+    }).collect(Collectors.toList());
+    return completionService;
+  }
+
+  /**
+   * Start only consumer
+   */
+  private Future<E> startConsumer() {
+    return consumer.map(consumer -> {
+      return executorService.submit(
+          () -> {
+            logger.info("starting consumer thread");
+            preExecute();
+            try {
+              E result = consumer.consume(queue);
+              logger.info("Queue Consumption is done; notifying producer threads");
+              return result;
+            } catch (Exception e) {
+              logger.error("error consuming records", e);
+              queue.markAsFailed(e);
+              throw e;
+            }
+          });
+    }).orElse(ConcurrentUtils.constantFuture(null));
+  }
+
+  /**
+   * Main API to run both production and consumption
+   */
+  public E execute() {
+    try {
+      ExecutorCompletionService<Boolean> producerService = startProducers();
+      Future<E> future = startConsumer();
+      // Wait for consumer to be done
+      return future.get();
+    } catch (Exception e) {
+      throw new HoodieException(e);
+    }
+  }
+
+
+  public boolean isRemaining() {
+    return queue.iterator().hasNext();
+  }
+
+  public void shutdownNow() {
+    executorService.shutdownNow();
+  }
+
+  public BoundedInMemoryQueue<I, O> getQueue() {
+    return queue;
+  }
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueue.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueue.java
@@ -0,0 +1,273 @@
+/*
+ *  Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+package com.uber.hoodie.common.util.queue;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
+import com.uber.hoodie.common.util.SizeEstimator;
+import com.uber.hoodie.exception.HoodieException;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Used for enqueueing input records. Queue limit is controlled by {@link #memoryLimit}.
+ * Unlike standard bounded queue implementations, this queue bounds the size by memory bytes occupied by its
+ * tenants. The standard implementation bounds by the number of entries in the queue.
+ *
+ * It internally samples every {@link #RECORD_SAMPLING_RATE}th record and adjusts number of records in
+ * queue accordingly. This is done to ensure that we don't OOM.
+ *
+ * This queue supports multiple producer single consumer pattern.
+ *
+ * @param <I> input payload data type
+ * @param <O> output payload data type
+ */
+public class BoundedInMemoryQueue<I, O> implements Iterable<O> {
+
+  // interval used for polling records in the queue.
+  public static final int RECORD_POLL_INTERVAL_SEC = 1;
+  // rate used for sampling records to determine avg record size in bytes.
+  public static final int RECORD_SAMPLING_RATE = 64;
+  // maximum records that will be cached
+  private static final int RECORD_CACHING_LIMIT = 128 * 1024;
+  private static Logger logger = LogManager.getLogger(BoundedInMemoryQueue.class);
+  // It indicates number of records to cache. We will be using sampled record's average size to
+  // determine how many
+  // records we should cache and will change (increase/decrease) permits accordingly.
+  @VisibleForTesting
+  public final Semaphore rateLimiter = new Semaphore(1);
+  // used for sampling records with "RECORD_SAMPLING_RATE" frequency.
+  public final AtomicLong samplingRecordCounter = new AtomicLong(-1);
+  // internal queue for records.
+  private final LinkedBlockingQueue<Optional<O>> queue = new
+      LinkedBlockingQueue<>();
+  // maximum amount of memory to be used for queueing records.
+  private final long memoryLimit;
+  // it holds the root cause of the exception in case either queueing records (consuming from
+  // inputIterator) fails or
+  // thread reading records from queue fails.
+  private final AtomicReference<Exception> hasFailed = new AtomicReference(null);
+  // used for indicating that all the records from queue are read successfully.
+  private final AtomicBoolean isReadDone = new AtomicBoolean(false);
+  // used for indicating that all records have been enqueued
+  private final AtomicBoolean isWriteDone = new AtomicBoolean(false);
+  // Function to transform the input payload to the expected output payload
+  private final Function<I, O> transformFunction;
+  // Payload Size Estimator
+  private final SizeEstimator<O> payloadSizeEstimator;
+  // Singleton (w.r.t this instance) Iterator for this queue
+  private final QueueIterator iterator;
+  // indicates rate limit (number of records to cache). it is updated whenever there is a change
+  // in avg record size.
+  @VisibleForTesting
+  public int currentRateLimit = 1;
+  // indicates avg record size in bytes. It is updated whenever a new record is sampled.
+  @VisibleForTesting
+  public long avgRecordSizeInBytes = 0;
+  // indicates number of samples collected so far.
+  private long numSamples = 0;
+
+  /**
+   * Construct BoundedInMemoryQueue with default SizeEstimator
+   *
+   * @param memoryLimit       MemoryLimit in bytes
+   * @param transformFunction Transformer Function to convert input payload type to stored payload type
+   */
+  public BoundedInMemoryQueue(final long memoryLimit, final Function<I, O> transformFunction) {
+    this(memoryLimit, transformFunction, new DefaultSizeEstimator() {
+    });
+  }
+
+  /**
+   * Construct BoundedInMemoryQueue with passed in size estimator
+   *
+   * @param memoryLimit          MemoryLimit in bytes
+   * @param transformFunction    Transformer Function to convert input payload type to stored payload type
+   * @param payloadSizeEstimator Payload Size Estimator
+   */
+  public BoundedInMemoryQueue(
+      final long memoryLimit,
+      final Function<I, O> transformFunction,
+      final SizeEstimator<O> payloadSizeEstimator) {
+    this.memoryLimit = memoryLimit;
+    this.transformFunction = transformFunction;
+    this.payloadSizeEstimator = payloadSizeEstimator;
+    this.iterator = new QueueIterator();
+  }
+
+  @VisibleForTesting
+  public int size() {
+    return this.queue.size();
+  }
+
+  /**
+   * Samples records with "RECORD_SAMPLING_RATE" frequency and computes average record size in bytes. It is used
+   * for determining how many maximum records to queue. Based on change in avg size it ma increase or decrease
+   * available permits.
+   *
+   * @param payload Payload to size
+   */
+  private void adjustBufferSizeIfNeeded(final O payload) throws InterruptedException {
+    if (this.samplingRecordCounter.incrementAndGet() % RECORD_SAMPLING_RATE != 0) {
+      return;
+    }
+
+    final long recordSizeInBytes = payloadSizeEstimator.sizeEstimate(payload);
+    final long newAvgRecordSizeInBytes = Math
+        .max(1, (avgRecordSizeInBytes * numSamples + recordSizeInBytes) / (numSamples + 1));
+    final int newRateLimit = (int) Math
+        .min(RECORD_CACHING_LIMIT, Math.max(1, this.memoryLimit / newAvgRecordSizeInBytes));
+
+    // If there is any change in number of records to cache then we will either release (if it increased) or acquire
+    // (if it decreased) to adjust rate limiting to newly computed value.
+    if (newRateLimit > currentRateLimit) {
+      rateLimiter.release(newRateLimit - currentRateLimit);
+    } else if (newRateLimit < currentRateLimit) {
+      rateLimiter.acquire(currentRateLimit - newRateLimit);
+    }
+    currentRateLimit = newRateLimit;
+    avgRecordSizeInBytes = newAvgRecordSizeInBytes;
+    numSamples++;
+  }
+
+  /**
+   * Inserts record into queue after applying transformation
+   *
+   * @param t Item to be queueed
+   */
+  public void insertRecord(I t) throws Exception {
+    // If already closed, throw exception
+    if (isWriteDone.get()) {
+      throw new IllegalStateException("Queue closed for enqueueing new entries");
+    }
+
+    // We need to stop queueing if queue-reader has failed and exited.
+    throwExceptionIfFailed();
+
+    rateLimiter.acquire();
+    // We are retrieving insert value in the record queueing thread to offload computation
+    // around schema validation
+    // and record creation to it.
+    final O payload = transformFunction.apply(t);
+    adjustBufferSizeIfNeeded(payload);
+    queue.put(Optional.of(payload));
+  }
+
+  /**
+   * Checks if records are either available in the queue or expected to be written in future
+   */
+  private boolean expectMoreRecords() {
+    return !isWriteDone.get() || (isWriteDone.get() && !queue.isEmpty());
+  }
+
+  /**
+   * Reader interface but never exposed to outside world as this is a single consumer queue.
+   * Reading is done through a singleton iterator for this queue.
+   */
+  private Optional<O> readNextRecord() {
+    if (this.isReadDone.get()) {
+      return Optional.empty();
+    }
+
+    rateLimiter.release();
+    Optional<O> newRecord = Optional.empty();
+    while (expectMoreRecords()) {
+      try {
+        throwExceptionIfFailed();
+        newRecord = queue.poll(RECORD_POLL_INTERVAL_SEC, TimeUnit.SECONDS);
+        if (newRecord != null) {
+          break;
+        }
+      } catch (InterruptedException e) {
+        logger.error("error reading records from queue", e);
+        throw new HoodieException(e);
+      }
+    }
+    if (newRecord != null && newRecord.isPresent()) {
+      return newRecord;
+    } else {
+      // We are done reading all the records from internal iterator.
+      this.isReadDone.set(true);
+      return Optional.empty();
+    }
+  }
+
+  /**
+   * Puts an empty entry to queue to denote termination
+   */
+  public void close() throws InterruptedException {
+    // done queueing records notifying queue-reader.
+    isWriteDone.set(true);
+  }
+
+  private void throwExceptionIfFailed() {
+    if (this.hasFailed.get() != null) {
+      throw new HoodieException("operation has failed", this.hasFailed.get());
+    }
+  }
+
+  /**
+   * API to allow producers and consumer to communicate termination due to failure
+   */
+  public void markAsFailed(Exception e) {
+    this.hasFailed.set(e);
+    // release the permits so that if the queueing thread is waiting for permits then it will
+    // get it.
+    this.rateLimiter.release(RECORD_CACHING_LIMIT + 1);
+  }
+
+  @Override
+  public Iterator<O> iterator() {
+    return iterator;
+  }
+
+  /**
+   * Iterator for the memory bounded queue
+   */
+  private final class QueueIterator implements Iterator<O> {
+
+    // next record to be read from queue.
+    private O nextRecord;
+
+    @Override
+    public boolean hasNext() {
+      if (this.nextRecord == null) {
+        Optional<O> res = readNextRecord();
+        this.nextRecord = res.orElse(null);
+      }
+      return this.nextRecord != null;
+    }
+
+    @Override
+    public O next() {
+      Preconditions.checkState(hasNext() && this.nextRecord != null);
+      final O ret = this.nextRecord;
+      this.nextRecord = null;
+      return ret;
+    }
+  }
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueueConsumer.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueueConsumer.java
@@ -0,0 +1,63 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.common.util.queue;
+
+import java.util.Iterator;
+
+
+/**
+ * Consume entries from queue and execute callback function
+ */
+public abstract class BoundedInMemoryQueueConsumer<I, O> {
+
+  /**
+   * API to de-queue entries to memory bounded queue
+   *
+   * @param queue In Memory bounded queue
+   */
+  public O consume(BoundedInMemoryQueue<?, I> queue) throws Exception {
+    Iterator<I> iterator = queue.iterator();
+
+    while (iterator.hasNext()) {
+      consumeOneRecord(iterator.next());
+    }
+
+    // Notifies done
+    finish();
+
+    return getResult();
+  }
+
+  /**
+   * Consumer One record
+   */
+  protected abstract void consumeOneRecord(I record);
+
+  /**
+   * Notifies implementation that we have exhausted consuming records from queue
+   */
+  protected abstract void finish();
+
+  /**
+   * Return result of consuming records so far
+   */
+  protected abstract O getResult();
+
+
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueueProducer.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/BoundedInMemoryQueueProducer.java
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.common.util.queue;
+
+/**
+ * Producer for BoundedInMemoryQueue. Memory Bounded Buffer supports
+ * multiple producers single consumer pattern.
+ *
+ * @param <I> Input type for buffer items produced
+ */
+public interface BoundedInMemoryQueueProducer<I> {
+
+  /**
+   * API to enqueue entries to memory bounded queue
+   *
+   * @param queue In Memory bounded queue
+   */
+  void produce(BoundedInMemoryQueue<I, ?> queue) throws Exception;
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/FunctionBasedQueueProducer.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/FunctionBasedQueueProducer.java
@@ -0,0 +1,46 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.common.util.queue;
+
+import java.util.function.Function;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Buffer producer which allows custom functions to insert entries to queue.
+ *
+ * @param <I> Type of entry produced for queue
+ */
+public class FunctionBasedQueueProducer<I> implements BoundedInMemoryQueueProducer<I> {
+
+  private static final Logger logger = LogManager.getLogger(FunctionBasedQueueProducer.class);
+
+  private final Function<BoundedInMemoryQueue<I, ?>, Boolean> producerFunction;
+
+  public FunctionBasedQueueProducer(Function<BoundedInMemoryQueue<I, ?>, Boolean> producerFunction) {
+    this.producerFunction = producerFunction;
+  }
+
+  @Override
+  public void produce(BoundedInMemoryQueue<I, ?> queue) {
+    logger.info("starting function which will enqueue records");
+    producerFunction.apply(queue);
+    logger.info("finished function which will enqueue records");
+  }
+}
--- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/IteratorBasedQueueProducer.java
+++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/queue/IteratorBasedQueueProducer.java
@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.common.util.queue;
+
+import java.util.Iterator;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+/**
+ * Iterator based producer which pulls entry from iterator and produces items for the queue
+ *
+ * @param <I> Item type produced for the buffer.
+ */
+public class IteratorBasedQueueProducer<I> implements BoundedInMemoryQueueProducer<I> {
+
+  private static final Logger logger = LogManager.getLogger(IteratorBasedQueueProducer.class);
+
+  // input iterator for producing items in the buffer.
+  private final Iterator<I> inputIterator;
+
+  public IteratorBasedQueueProducer(Iterator<I> inputIterator) {
+    this.inputIterator = inputIterator;
+  }
+
+  @Override
+  public void produce(BoundedInMemoryQueue<I, ?> queue) throws Exception {
+    logger.info("starting to buffer records");
+    while (inputIterator.hasNext()) {
+      queue.insertRecord(inputIterator.next());
+    }
+    logger.info("finished buffering records");
+  }
+}
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/table/log/HoodieLogFormatTest.java
@@ -73,12 +73,11 @@ import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
 public class HoodieLogFormatTest {

+  private static final String BASE_OUTPUT_PATH = "/tmp/";
+  private static String basePath;
  private FileSystem fs;
  private Path partitionPath;
-  private static String basePath;
  private int bufferSize = 4096;
-  private static final String BASE_OUTPUT_PATH = "/tmp/";
-
  private Boolean readBlocksLazily = true;

  public HoodieLogFormatTest(Boolean readBlocksLazily) {
@@ -87,7 +86,7 @@ public class HoodieLogFormatTest {

  @Parameterized.Parameters(name = "LogBlockReadMode")
  public static Collection<Boolean[]> data() {
-    return Arrays.asList(new Boolean[][] {{true}, {false}});
+    return Arrays.asList(new Boolean[][]{{true}, {false}});
  }

  @BeforeClass
@@ -400,7 +399,7 @@ public class HoodieLogFormatTest {
    writer.close();

    // scan all log blocks (across multiple log files)
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath,
        logFiles.stream().map(logFile -> logFile.getPath().toString()).collect(Collectors.toList()), schema, "100",
        10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);

@@ -527,7 +526,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("", 200, scanner.getTotalLogRecords());
    Set<String> readKeys = new HashSet<>(200);
@@ -587,7 +586,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "102", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We read 200 records from 2 write batches", 200, scanner.getTotalLogRecords());
    Set<String> readKeys = new HashSet<>(200);
@@ -665,7 +664,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "103", 10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We would read 200 records", 200, scanner.getTotalLogRecords());
    Set<String> readKeys = new HashSet<>(200);
@@ -719,7 +718,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "102", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We still would read 200 records", 200, scanner.getTotalLogRecords());
    final List<String> readKeys = new ArrayList<>(200);
@@ -739,8 +738,8 @@ public class HoodieLogFormatTest {
    writer = writer.appendBlock(commandBlock);

    readKeys.clear();
-    scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema, "101", 10240L, readBlocksLazily,
-        false, bufferSize, BASE_OUTPUT_PATH);
+    scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
+        10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
    assertEquals("Stream collect should return all 200 records after rollback of delete", 200, readKeys.size());
  }
@@ -800,7 +799,7 @@ public class HoodieLogFormatTest {
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

    // all data must be rolled back before merge
-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We would have scanned 0 records because of rollback", 0, scanner.getTotalLogRecords());

@@ -849,7 +848,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
  }
@@ -881,7 +880,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We still would read 100 records", 100, scanner.getTotalLogRecords());
    final List<String> readKeys = new ArrayList<>(100);
@@ -931,7 +930,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "101", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
  }
@@ -1019,7 +1018,7 @@ public class HoodieLogFormatTest {
    List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION,
        "100").map(s -> s.getPath().toString()).collect(Collectors.toList());

-    HoodieCompactedLogRecordScanner scanner = new HoodieCompactedLogRecordScanner(fs, basePath, allLogFiles, schema,
+    HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema,
        "101", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
    assertEquals("We would read 0 records", 0, scanner.getTotalLogRecords());
  }
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/collection/TestDiskBasedMap.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/collection/TestDiskBasedMap.java
@@ -27,6 +27,7 @@ import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
+import com.uber.hoodie.common.util.HoodieRecordSizeEstimator;
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.common.util.SpillableMapTestUtils;
 import com.uber.hoodie.common.util.SpillableMapUtils;
@@ -156,14 +157,14 @@ public class TestDiskBasedMap {
    List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);

    long payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
-        new HoodieRecordConverter(schema, HoodieAvroPayload.class.getName()));
+        new HoodieRecordSizeEstimator(schema));
    assertTrue(payloadSize > 0);

    // Test sizeEstimator with hoodie metadata fields
    schema = HoodieAvroUtils.addMetadataFields(schema);
    hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);
    payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
-        new HoodieRecordConverter(schema, HoodieAvroPayload.class.getName()));
+        new HoodieRecordSizeEstimator(schema));
    assertTrue(payloadSize > 0);

    // Following tests payloads without an Avro Schema in the Record
@@ -175,7 +176,7 @@ public class TestDiskBasedMap {
        .map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
            new AvroBinaryTestPayload(Optional.of((GenericRecord) r)))).collect(Collectors.toList());
    payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
-        new HoodieRecordConverter(schema, AvroBinaryTestPayload.class.getName()));
+        new HoodieRecordSizeEstimator(schema));
    assertTrue(payloadSize > 0);

    // Test sizeEstimator with hoodie metadata fields and without schema object in the payload
@@ -188,7 +189,7 @@ public class TestDiskBasedMap {
                .of(HoodieAvroUtils.rewriteRecord((GenericRecord) r, simpleSchemaWithMetadata)))))
        .collect(Collectors.toList());
    payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0),
-        new HoodieRecordConverter(schema, AvroBinaryTestPayload.class.getName()));
+        new HoodieRecordSizeEstimator(schema));
    assertTrue(payloadSize > 0);
  }

@@ -201,11 +202,11 @@ public class TestDiskBasedMap {
    // Test sizeEstimatorPerformance with simpleSchema
    Schema schema = SchemaTestUtil.getSimpleSchema();
    List<HoodieRecord> hoodieRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1, schema);
-    HoodieRecordConverter converter =
-        new HoodieRecordConverter(schema, HoodieAvroPayload.class.getName());
+    HoodieRecordSizeEstimator sizeEstimator =
+        new HoodieRecordSizeEstimator(schema);
    HoodieRecord record = hoodieRecords.remove(0);
    long startTime = System.currentTimeMillis();
-    SpillableMapUtils.computePayloadSize(record, converter);
+    SpillableMapUtils.computePayloadSize(record, sizeEstimator);
    long timeTaken = System.currentTimeMillis() - startTime;
    System.out.println("Time taken :" + timeTaken);
    assertTrue(timeTaken < 100);
--- a/hoodie-common/src/test/java/com/uber/hoodie/common/util/collection/TestExternalSpillableMap.java
+++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/collection/TestExternalSpillableMap.java
@@ -25,7 +25,9 @@ import com.uber.hoodie.common.model.HoodieKey;
 import com.uber.hoodie.common.model.HoodieRecord;
 import com.uber.hoodie.common.model.HoodieRecordPayload;
 import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
 import com.uber.hoodie.common.util.HoodieAvroUtils;
+import com.uber.hoodie.common.util.HoodieRecordSizeEstimator;
 import com.uber.hoodie.common.util.SchemaTestUtil;
 import com.uber.hoodie.common.util.SpillableMapTestUtils;
 import com.uber.hoodie.common.util.collection.converter.HoodieRecordConverter;
@@ -66,7 +68,8 @@ public class TestExternalSpillableMap {
    String payloadClazz = HoodieAvroPayload.class.getName();
    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, BASE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -88,7 +91,8 @@ public class TestExternalSpillableMap {

    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, BASE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -126,7 +130,8 @@ public class TestExternalSpillableMap {

    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, BASE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    // insert a bunch of records so that values spill to disk too
@@ -181,7 +186,8 @@ public class TestExternalSpillableMap {

    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, FAILURE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<IndexedRecord> iRecords = SchemaTestUtil.generateHoodieTestRecords(0, 100);
    List<String> recordKeys = SpillableMapTestUtils.upsertRecords(iRecords, records);
@@ -200,7 +206,8 @@ public class TestExternalSpillableMap {

    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, BASE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<String> recordKeys = new ArrayList<>();
    // Ensure we spill to disk
@@ -253,7 +260,8 @@ public class TestExternalSpillableMap {

    ExternalSpillableMap<String, HoodieRecord<? extends HoodieRecordPayload>> records =
        new ExternalSpillableMap<>(16L, BASE_OUTPUT_PATH, new StringConverter(),
-            new HoodieRecordConverter(schema, payloadClazz)); //16B
+            new HoodieRecordConverter(schema, payloadClazz),
+            new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(schema)); //16B

    List<String> recordKeys = new ArrayList<>();
    // Ensure we spill to disk
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/RecordReaderValueIterator.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/RecordReaderValueIterator.java
@@ -0,0 +1,83 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop;
+
+import com.uber.hoodie.exception.HoodieException;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.RecordReader;
+
+/**
+ * Provides Iterator Interface to iterate value entries read from record reader
+ *
+ * @param <K> Key Type
+ * @param <V> Value Type
+ */
+public class RecordReaderValueIterator<K, V> implements Iterator<V> {
+
+  public static final Log LOG = LogFactory.getLog(RecordReaderValueIterator.class);
+
+  private final RecordReader<K, V> reader;
+  private V nextVal = null;
+
+  /**
+   * Construct RecordReaderValueIterator
+   *
+   * @param reader reader
+   */
+  public RecordReaderValueIterator(RecordReader<K, V> reader) {
+    this.reader = reader;
+  }
+
+  @Override
+  public boolean hasNext() {
+    if (nextVal == null) {
+      K key = reader.createKey();
+      V val = reader.createValue();
+      try {
+        boolean notDone = reader.next(key, val);
+        if (!notDone) {
+          return false;
+        }
+        this.nextVal = val;
+      } catch (IOException e) {
+        LOG.error("Got error reading next record from record reader");
+        throw new HoodieException(e);
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public V next() {
+    if (!hasNext()) {
+      throw new NoSuchElementException("Make sure you are following iterator contract.");
+    }
+    V retVal = this.nextVal;
+    this.nextVal = null;
+    return retVal;
+  }
+
+  public void close() throws IOException {
+    this.reader.close();
+  }
+}
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/SafeParquetRecordReaderWrapper.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/SafeParquetRecordReaderWrapper.java
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+
+/**
+ * Record Reader for parquet. Records read from this reader is safe to be
+ * buffered for concurrent processing.
+ *
+ * In concurrent producer/consumer pattern, where the record is read and buffered by one thread and processed in
+ * another thread, we need to ensure new instance of ArrayWritable is buffered. ParquetReader createKey/Value is unsafe
+ * as it gets reused for subsequent fetch. This wrapper makes ParquetReader safe for this use-case.
+ */
+public class SafeParquetRecordReaderWrapper implements RecordReader<Void, ArrayWritable> {
+
+  // real Parquet reader to be wrapped
+  private final RecordReader<Void, ArrayWritable> parquetReader;
+
+  // Value Class
+  private final Class valueClass;
+
+  // Number of fields in Value Schema
+  private final int numValueFields;
+
+
+  public SafeParquetRecordReaderWrapper(RecordReader<Void, ArrayWritable> parquetReader) {
+    this.parquetReader = parquetReader;
+    ArrayWritable arrayWritable = parquetReader.createValue();
+    this.valueClass = arrayWritable.getValueClass();
+    this.numValueFields = arrayWritable.get().length;
+  }
+
+  @Override
+  public boolean next(Void key, ArrayWritable value) throws IOException {
+    return parquetReader.next(key, value);
+  }
+
+  @Override
+  public Void createKey() {
+    return parquetReader.createKey();
+  }
+
+  /**
+   * We could be in concurrent fetch and read env.
+   * We need to ensure new ArrayWritable as ParquetReader implementation reuses same
+   * ArrayWritable for all reads which will cause corruption when buffering.
+   * So, we create a new ArrayWritable here with Value class from parquetReader's value
+   * and an empty array.
+   */
+  @Override
+  public ArrayWritable createValue() {
+    // Call createValue of parquetReader to get size and class type info only
+    Writable[] emptyWritableBuf = new Writable[numValueFields];
+    return new ArrayWritable(valueClass, emptyWritableBuf);
+  }
+
+  @Override
+  public long getPos() throws IOException {
+    return parquetReader.getPos();
+  }
+
+  @Override
+  public void close() throws IOException {
+    parquetReader.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return parquetReader.getProgress();
+  }
+}
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/AbstractRealtimeRecordReader.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/AbstractRealtimeRecordReader.java
@@ -0,0 +1,282 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop.realtime;
+
+import com.uber.hoodie.exception.HoodieException;
+import com.uber.hoodie.exception.HoodieIOException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericArray;
+import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import parquet.avro.AvroSchemaConverter;
+import parquet.hadoop.ParquetFileReader;
+import parquet.schema.MessageType;
+
+/**
+ * Record Reader implementation to merge fresh avro data with base parquet data, to support real
+ * time queries.
+ */
+public abstract class AbstractRealtimeRecordReader {
+
+  // Fraction of mapper/reducer task memory used for compaction of log files
+  public static final String COMPACTION_MEMORY_FRACTION_PROP = "compaction.memory.fraction";
+  public static final String DEFAULT_COMPACTION_MEMORY_FRACTION = "0.75";
+  // used to choose a trade off between IO vs Memory when performing compaction process
+  // Depending on outputfile size and memory provided, choose true to avoid OOM for large file
+  // size + small memory
+  public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP =
+      "compaction.lazy.block.read.enabled";
+  public static final String DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED = "true";
+
+  // Property to set the max memory for dfs inputstream buffer size
+  public static final String MAX_DFS_STREAM_BUFFER_SIZE_PROP = "hoodie.memory.dfs.buffer.max.size";
+  // Setting this to lower value of 1 MB since no control over how many RecordReaders will be started in a mapper
+  public static final int DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE = 1 * 1024 * 1024; // 1 MB
+  // Property to set file path prefix for spillable file
+  public static final String SPILLABLE_MAP_BASE_PATH_PROP = "hoodie.memory.spillable.map.path";
+  // Default file path prefix for spillable file
+  public static final String DEFAULT_SPILLABLE_MAP_BASE_PATH = "/tmp/";
+
+  public static final Log LOG = LogFactory.getLog(AbstractRealtimeRecordReader.class);
+  protected final HoodieRealtimeFileSplit split;
+  protected final JobConf jobConf;
+  private final MessageType baseFileSchema;
+
+  // Schema handles
+  private Schema readerSchema;
+  private Schema writerSchema;
+
+  public AbstractRealtimeRecordReader(HoodieRealtimeFileSplit split, JobConf job) {
+    this.split = split;
+    this.jobConf = job;
+
+    LOG.info("cfg ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
+    try {
+      baseFileSchema = readSchema(jobConf, split.getPath());
+      init();
+    } catch (IOException e) {
+      throw new HoodieIOException(
+          "Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
+    }
+  }
+
+  /**
+   * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
+   * twitter parquet to support hive 1.1.0
+   */
+  private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
+    try {
+      return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData().getSchema();
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
+    }
+  }
+
+  protected static String arrayWritableToString(ArrayWritable writable) {
+    if (writable == null) {
+      return "null";
+    }
+
+    StringBuilder builder = new StringBuilder();
+    Writable[] values = writable.get();
+    builder.append(String.format("Size: %s,", values.length));
+    for (Writable w : values) {
+      builder.append(w + " ");
+    }
+    return builder.toString();
+  }
+
+  /**
+   * Given a comma separated list of field names and positions at which they appear on Hive, return
+   * a ordered list of field names, that can be passed onto storage.
+   */
+  public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv,
+      String partitioningFieldsCsv) {
+
+    String[] fieldOrders = fieldOrderCsv.split(",");
+    Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(","))
+        .collect(Collectors.toSet());
+    List<String> fieldNames = Arrays.stream(fieldNameCsv.split(","))
+        .filter(fn -> !partitioningFields.contains(fn)).collect(Collectors.toList());
+
+    // Hive does not provide ids for partitioning fields, so check for lengths excluding that.
+    if (fieldNames.size() != fieldOrders.length) {
+      throw new HoodieException(String
+          .format("Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
+              fieldNames.size(), fieldOrders.length));
+    }
+    TreeMap<Integer, String> orderedFieldMap = new TreeMap<>();
+    for (int ox = 0; ox < fieldOrders.length; ox++) {
+      orderedFieldMap.put(Integer.parseInt(fieldOrders[ox]), fieldNames.get(ox));
+    }
+    return new ArrayList<>(orderedFieldMap.values());
+  }
+
+  /**
+   * Generate a reader schema off the provided writeSchema, to just project out the provided
+   * columns
+   */
+  public static Schema generateProjectionSchema(Schema writeSchema, List<String> fieldNames) {
+    List<Schema.Field> projectedFields = new ArrayList<>();
+    for (String fn : fieldNames) {
+      Schema.Field field = writeSchema.getField(fn);
+      if (field == null) {
+        throw new HoodieException("Field " + fn + " not found log schema. Query cannot proceed!");
+      }
+      projectedFields
+          .add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
+    }
+
+    return Schema.createRecord(projectedFields);
+  }
+
+  /**
+   * Convert the projected read from delta record into an array writable
+   */
+  public static Writable avroToArrayWritable(Object value, Schema schema) {
+
+    // if value is null, make a NullWritable
+    if (value == null) {
+      return NullWritable.get();
+    }
+
+    switch (schema.getType()) {
+      case STRING:
+        return new Text(value.toString());
+      case BYTES:
+        return new BytesWritable((byte[]) value);
+      case INT:
+        return new IntWritable((Integer) value);
+      case LONG:
+        return new LongWritable((Long) value);
+      case FLOAT:
+        return new FloatWritable((Float) value);
+      case DOUBLE:
+        return new DoubleWritable((Double) value);
+      case BOOLEAN:
+        return new BooleanWritable((Boolean) value);
+      case NULL:
+        return NullWritable.get();
+      case RECORD:
+        GenericRecord record = (GenericRecord) value;
+        Writable[] values1 = new Writable[schema.getFields().size()];
+        int index1 = 0;
+        for (Schema.Field field : schema.getFields()) {
+          values1[index1++] = avroToArrayWritable(record.get(field.name()), field.schema());
+        }
+        return new ArrayWritable(Writable.class, values1);
+      case ENUM:
+        return new Text(value.toString());
+      case ARRAY:
+        GenericArray arrayValue = (GenericArray) value;
+        Writable[] values2 = new Writable[arrayValue.size()];
+        int index2 = 0;
+        for (Object obj : arrayValue) {
+          values2[index2++] = avroToArrayWritable(obj, schema.getElementType());
+        }
+        return new ArrayWritable(Writable.class, values2);
+      case MAP:
+        Map mapValue = (Map) value;
+        Writable[] values3 = new Writable[mapValue.size()];
+        int index3 = 0;
+        for (Object entry : mapValue.entrySet()) {
+          Map.Entry mapEntry = (Map.Entry) entry;
+          Writable[] mapValues = new Writable[2];
+          mapValues[0] = new Text(mapEntry.getKey().toString());
+          mapValues[1] = avroToArrayWritable(mapEntry.getValue(), schema.getValueType());
+          values3[index3++] = new ArrayWritable(Writable.class, mapValues);
+        }
+        return new ArrayWritable(Writable.class, values3);
+      case UNION:
+        List<Schema> types = schema.getTypes();
+        if (types.size() != 2) {
+          throw new IllegalArgumentException("Only support union with 2 fields");
+        }
+        Schema s1 = types.get(0);
+        Schema s2 = types.get(1);
+        if (s1.getType() == Schema.Type.NULL) {
+          return avroToArrayWritable(value, s2);
+        } else if (s2.getType() == Schema.Type.NULL) {
+          return avroToArrayWritable(value, s1);
+        } else {
+          throw new IllegalArgumentException("Only support union with null");
+        }
+      case FIXED:
+        return new BytesWritable(((GenericFixed) value).bytes());
+      default:
+        return null;
+    }
+  }
+
+  /**
+   * Goes through the log files and populates a map with latest version of each key logged, since
+   * the base split was written.
+   */
+  private void init() throws IOException {
+    writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
+    List<String> projectionFields = orderFields(
+        jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
+        jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
+        jobConf.get("partition_columns", ""));
+    // TODO(vc): In the future, the reader schema should be updated based on log files & be able
+    // to null out fields not present before
+    readerSchema = generateProjectionSchema(writerSchema, projectionFields);
+
+    LOG.info(String.format("About to read compacted logs %s for base split %s, projecting cols %s",
+        split.getDeltaFilePaths(), split.getPath(), projectionFields));
+  }
+
+  public Schema getReaderSchema() {
+    return readerSchema;
+  }
+
+  public Schema getWriterSchema() {
+    return writerSchema;
+  }
+
+  public long getMaxCompactionMemoryInBytes() {
+    return (long) Math.ceil(Double
+        .valueOf(jobConf.get(COMPACTION_MEMORY_FRACTION_PROP, DEFAULT_COMPACTION_MEMORY_FRACTION))
+        * jobConf.getMemoryForMapTask());
+  }
+}
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReader.java
@@ -18,339 +18,85 @@

 package com.uber.hoodie.hadoop.realtime;

-import com.uber.hoodie.common.model.HoodieRecord;
-import com.uber.hoodie.common.model.HoodieRecordPayload;
-import com.uber.hoodie.common.table.log.HoodieCompactedLogRecordScanner;
-import com.uber.hoodie.common.util.FSUtils;
 import com.uber.hoodie.exception.HoodieException;
-import com.uber.hoodie.exception.HoodieIOException;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.stream.Collectors;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.generic.GenericFixed;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
-import parquet.avro.AvroSchemaConverter;
-import parquet.hadoop.ParquetFileReader;
-import parquet.schema.MessageType;

 /**
- * Record Reader implementation to merge fresh avro data with base parquet data, to support real
- * time queries.
+ * Realtime Record Reader which can do compacted (merge-on-read) record reading or
+ * unmerged reading (parquet and log files read in parallel) based on job configuration.
 */
 public class HoodieRealtimeRecordReader implements RecordReader<Void, ArrayWritable> {

-  private final RecordReader<Void, ArrayWritable> parquetReader;
-  private final HoodieRealtimeFileSplit split;
-  private final JobConf jobConf;
-
-  // Fraction of mapper/reducer task memory used for compaction of log files
-  public static final String COMPACTION_MEMORY_FRACTION_PROP = "compaction.memory.fraction";
-  public static final String DEFAULT_COMPACTION_MEMORY_FRACTION = "0.75";
-
-  // used to choose a trade off between IO vs Memory when performing compaction process
-  // Depending on outputfile size and memory provided, choose true to avoid OOM for large file
-  // size + small memory
-  public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP =
-      "compaction.lazy.block.read.enabled";
-  public static final String DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED = "true";
-
-  // Property to set the max memory for dfs inputstream buffer size
-  public static final String MAX_DFS_STREAM_BUFFER_SIZE_PROP = "hoodie.memory.dfs.buffer.max.size";
-  // Setting this to lower value of 1 MB since no control over how many RecordReaders will be started in a mapper
-  public static final int DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE = 1 * 1024 * 1024; // 1 MB
-  // Property to set file path prefix for spillable file
-  public static final String SPILLABLE_MAP_BASE_PATH_PROP = "hoodie.memory.spillable.map.path";
-  // Default file path prefix for spillable file
-  public static final String DEFAULT_SPILLABLE_MAP_BASE_PATH = "/tmp/";
-
+  // Property to enable parallel reading of parquet and log files without merging.
+  public static final String REALTIME_SKIP_MERGE_PROP = "hoodie.realtime.merge.skip";
+  // By default, we do merged-reading
+  public static final String DEFAULT_REALTIME_SKIP_MERGE = "false";
  public static final Log LOG = LogFactory.getLog(HoodieRealtimeRecordReader.class);
-
-  private final HashMap<String, ArrayWritable> deltaRecordMap;
-  private final MessageType baseFileSchema;
+  private final RecordReader<Void, ArrayWritable> reader;

  public HoodieRealtimeRecordReader(HoodieRealtimeFileSplit split, JobConf job,
      RecordReader<Void, ArrayWritable> realReader) {
-    this.split = split;
-    this.jobConf = job;
-    this.parquetReader = realReader;
-    this.deltaRecordMap = new HashMap<>();
+    this.reader = constructRecordReader(split, job, realReader);
+  }

-    LOG.info("cfg ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR));
+  public static boolean canSkipMerging(JobConf jobConf) {
+    return Boolean.valueOf(jobConf.get(REALTIME_SKIP_MERGE_PROP, DEFAULT_REALTIME_SKIP_MERGE));
+  }
+
+  /**
+   * Construct record reader based on job configuration
+   *
+   * @param split      File Split
+   * @param jobConf    Job Configuration
+   * @param realReader Parquet Record Reader
+   * @return Realtime Reader
+   */
+  private static RecordReader<Void, ArrayWritable> constructRecordReader(HoodieRealtimeFileSplit split,
+      JobConf jobConf, RecordReader<Void, ArrayWritable> realReader) {
    try {
-      baseFileSchema = readSchema(jobConf, split.getPath());
-      readAndCompactLog(jobConf);
-    } catch (IOException e) {
-      throw new HoodieIOException(
-          "Could not create HoodieRealtimeRecordReader on path " + this.split.getPath(), e);
-    }
-  }
-
-  /**
-   * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
-   * twitter parquet to support hive 1.1.0
-   */
-  private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
-    try {
-      return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData().getSchema();
-    } catch (IOException e) {
-      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
-    }
-  }
-
-
-  /**
-   * Goes through the log files and populates a map with latest version of each key logged, since
-   * the base split was written.
-   */
-  private void readAndCompactLog(JobConf jobConf) throws IOException {
-    Schema writerSchema = new AvroSchemaConverter().convert(baseFileSchema);
-    List<String> projectionFields = orderFields(
-        jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR),
-        jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR),
-        jobConf.get("partition_columns", ""));
-    // TODO(vc): In the future, the reader schema should be updated based on log files & be able
-    // to null out fields not present before
-    Schema readerSchema = generateProjectionSchema(writerSchema, projectionFields);
-
-    LOG.info(String.format("About to read compacted logs %s for base split %s, projecting cols %s",
-        split.getDeltaFilePaths(), split.getPath(), projectionFields));
-    HoodieCompactedLogRecordScanner compactedLogRecordScanner = new HoodieCompactedLogRecordScanner(
-        FSUtils.getFs(split.getPath().toString(), jobConf), split.getBasePath(),
-        split.getDeltaFilePaths(), readerSchema, split.getMaxCommitTime(), (long) Math.ceil(Double
-        .valueOf(jobConf.get(COMPACTION_MEMORY_FRACTION_PROP, DEFAULT_COMPACTION_MEMORY_FRACTION))
-        * jobConf.getMemoryForMapTask()), Boolean.valueOf(jobConf
-        .get(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
-        false, jobConf.getInt(MAX_DFS_STREAM_BUFFER_SIZE_PROP, DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE),
-        jobConf.get(SPILLABLE_MAP_BASE_PATH_PROP, DEFAULT_SPILLABLE_MAP_BASE_PATH));
-    // NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
-    // but can return records for completed commits > the commit we are trying to read (if using
-    // readCommit() API)
-    for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : compactedLogRecordScanner) {
-      GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(readerSchema).get();
-      String key = hoodieRecord.getRecordKey();
-      // we assume, a later safe record in the log, is newer than what we have in the map &
-      // replace it.
-      // TODO : handle deletes here
-      ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, writerSchema);
-      deltaRecordMap.put(key, aWritable);
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Log record : " + arrayWritableToString(aWritable));
+      if (canSkipMerging(jobConf)) {
+        LOG.info("Enabling un-merged reading of realtime records");
+        return new RealtimeUnmergedRecordReader(split, jobConf, realReader);
      }
-    }
-  }
-
-  private static String arrayWritableToString(ArrayWritable writable) {
-    if (writable == null) {
-      return "null";
-    }
-
-    StringBuilder builder = new StringBuilder();
-    Writable[] values = writable.get();
-    builder.append(String.format("Size: %s,", values.length));
-    for (Writable w : values) {
-      builder.append(w + " ");
-    }
-    return builder.toString();
-  }
-
-  /**
-   * Given a comma separated list of field names and positions at which they appear on Hive, return
-   * a ordered list of field names, that can be passed onto storage.
-   */
-  public static List<String> orderFields(String fieldNameCsv, String fieldOrderCsv,
-      String partitioningFieldsCsv) {
-
-    String[] fieldOrders = fieldOrderCsv.split(",");
-    Set<String> partitioningFields = Arrays.stream(partitioningFieldsCsv.split(","))
-        .collect(Collectors.toSet());
-    List<String> fieldNames = Arrays.stream(fieldNameCsv.split(","))
-        .filter(fn -> !partitioningFields.contains(fn)).collect(Collectors.toList());
-
-    // Hive does not provide ids for partitioning fields, so check for lengths excluding that.
-    if (fieldNames.size() != fieldOrders.length) {
-      throw new HoodieException(String
-          .format("Error ordering fields for storage read. #fieldNames: %d, #fieldPositions: %d",
-              fieldNames.size(), fieldOrders.length));
-    }
-    TreeMap<Integer, String> orderedFieldMap = new TreeMap<>();
-    for (int ox = 0; ox < fieldOrders.length; ox++) {
-      orderedFieldMap.put(Integer.parseInt(fieldOrders[ox]), fieldNames.get(ox));
-    }
-    return new ArrayList<>(orderedFieldMap.values());
-  }
-
-  /**
-   * Generate a reader schema off the provided writeSchema, to just project out the provided
-   * columns
-   */
-  public static Schema generateProjectionSchema(Schema writeSchema, List<String> fieldNames) {
-    List<Schema.Field> projectedFields = new ArrayList<>();
-    for (String fn : fieldNames) {
-      Schema.Field field = writeSchema.getField(fn);
-      if (field == null) {
-        throw new HoodieException("Field " + fn + " not found log schema. Query cannot proceed!");
-      }
-      projectedFields
-          .add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
-    }
-
-    return Schema.createRecord(projectedFields);
-  }
-
-  /**
-   * Convert the projected read from delta record into an array writable
-   */
-  public static Writable avroToArrayWritable(Object value, Schema schema) {
-
-    // if value is null, make a NullWritable
-    if (value == null) {
-      return NullWritable.get();
-    }
-
-    switch (schema.getType()) {
-      case STRING:
-        return new Text(value.toString());
-      case BYTES:
-        return new BytesWritable((byte[]) value);
-      case INT:
-        return new IntWritable((Integer) value);
-      case LONG:
-        return new LongWritable((Long) value);
-      case FLOAT:
-        return new FloatWritable((Float) value);
-      case DOUBLE:
-        return new DoubleWritable((Double) value);
-      case BOOLEAN:
-        return new BooleanWritable((Boolean) value);
-      case NULL:
-        return NullWritable.get();
-      case RECORD:
-        GenericRecord record = (GenericRecord) value;
-        Writable[] values1 = new Writable[schema.getFields().size()];
-        int index1 = 0;
-        for (Schema.Field field : schema.getFields()) {
-          values1[index1++] = avroToArrayWritable(record.get(field.name()), field.schema());
-        }
-        return new ArrayWritable(Writable.class, values1);
-      case ENUM:
-        return new Text(value.toString());
-      case ARRAY:
-        GenericArray arrayValue = (GenericArray) value;
-        Writable[] values2 = new Writable[arrayValue.size()];
-        int index2 = 0;
-        for (Object obj : arrayValue) {
-          values2[index2++] = avroToArrayWritable(obj, schema.getElementType());
-        }
-        return new ArrayWritable(Writable.class, values2);
-      case MAP:
-        Map mapValue = (Map) value;
-        Writable[] values3 = new Writable[mapValue.size()];
-        int index3 = 0;
-        for (Object entry : mapValue.entrySet()) {
-          Map.Entry mapEntry = (Map.Entry) entry;
-          Writable[] mapValues = new Writable[2];
-          mapValues[0] = new Text(mapEntry.getKey().toString());
-          mapValues[1] = avroToArrayWritable(mapEntry.getValue(), schema.getValueType());
-          values3[index3++] = new ArrayWritable(Writable.class, mapValues);
-        }
-        return new ArrayWritable(Writable.class, values3);
-      case UNION:
-        List<Schema> types = schema.getTypes();
-        if (types.size() != 2) {
-          throw new IllegalArgumentException("Only support union with 2 fields");
-        }
-        Schema s1 = types.get(0);
-        Schema s2 = types.get(1);
-        if (s1.getType() == Schema.Type.NULL) {
-          return avroToArrayWritable(value, s2);
-        } else if (s2.getType() == Schema.Type.NULL) {
-          return avroToArrayWritable(value, s1);
-        } else {
-          throw new IllegalArgumentException("Only support union with null");
-        }
-      case FIXED:
-        return new BytesWritable(((GenericFixed) value).bytes());
-      default:
-        return null;
+      return new RealtimeCompactedRecordReader(split, jobConf, realReader);
+    } catch (IOException ex) {
+      LOG.error("Got exception when constructing record reader", ex);
+      throw new HoodieException(ex);
    }
  }

  @Override
-  public boolean next(Void aVoid, ArrayWritable arrayWritable) throws IOException {
-    // Call the underlying parquetReader.next - which may replace the passed in ArrayWritable
-    // with a new block of values
-    boolean result = this.parquetReader.next(aVoid, arrayWritable);
-    if (!result) {
-      // if the result is false, then there are no more records
-      return false;
-    } else {
-      // TODO(VC): Right now, we assume all records in log, have a matching base record. (which
-      // would be true until we have a way to index logs too)
-      // return from delta records map if we have some match.
-      String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS]
-          .toString();
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format("key %s, base values: %s, log values: %s", key,
-            arrayWritableToString(arrayWritable), arrayWritableToString(deltaRecordMap.get(key))));
-      }
-      if (deltaRecordMap.containsKey(key)) {
-        // TODO(NA): Invoke preCombine here by converting arrayWritable to Avro ?
-        Writable[] replaceValue = deltaRecordMap.get(key).get();
-        Writable[] originalValue = arrayWritable.get();
-        System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);
-        arrayWritable.set(originalValue);
-      }
-      return true;
-    }
+  public boolean next(Void key, ArrayWritable value) throws IOException {
+    return this.reader.next(key, value);
  }

  @Override
  public Void createKey() {
-    return parquetReader.createKey();
+    return this.reader.createKey();
  }

  @Override
  public ArrayWritable createValue() {
-    return parquetReader.createValue();
+    return this.reader.createValue();
  }

  @Override
  public long getPos() throws IOException {
-    return parquetReader.getPos();
+    return this.reader.getPos();
  }

  @Override
  public void close() throws IOException {
-    parquetReader.close();
+    this.reader.close();
  }

  @Override
  public float getProgress() throws IOException {
-    return parquetReader.getProgress();
+    return this.reader.getProgress();
  }
 }
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/RealtimeCompactedRecordReader.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/RealtimeCompactedRecordReader.java
@@ -0,0 +1,129 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop.realtime;
+
+import com.uber.hoodie.common.model.HoodieRecord;
+import com.uber.hoodie.common.model.HoodieRecordPayload;
+import com.uber.hoodie.common.table.log.HoodieMergedLogRecordScanner;
+import com.uber.hoodie.common.util.FSUtils;
+import java.io.IOException;
+import java.util.HashMap;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader implements
+    RecordReader<Void, ArrayWritable> {
+
+  protected final RecordReader<Void, ArrayWritable> parquetReader;
+  private final HashMap<String, ArrayWritable> deltaRecordMap;
+
+  public RealtimeCompactedRecordReader(HoodieRealtimeFileSplit split, JobConf job,
+      RecordReader<Void, ArrayWritable> realReader) throws IOException {
+    super(split, job);
+    this.parquetReader = realReader;
+    this.deltaRecordMap = new HashMap<>();
+    readAndCompactLog();
+  }
+
+  /**
+   * Goes through the log files and populates a map with latest version of each key logged, since
+   * the base split was written.
+   */
+  private void readAndCompactLog() throws IOException {
+    HoodieMergedLogRecordScanner compactedLogRecordScanner = new HoodieMergedLogRecordScanner(
+        FSUtils.getFs(split.getPath().toString(), jobConf), split.getBasePath(),
+        split.getDeltaFilePaths(), getReaderSchema(), split.getMaxCommitTime(), getMaxCompactionMemoryInBytes(),
+        Boolean.valueOf(jobConf.get(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
+            DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
+        false, jobConf.getInt(MAX_DFS_STREAM_BUFFER_SIZE_PROP, DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE),
+        jobConf.get(SPILLABLE_MAP_BASE_PATH_PROP, DEFAULT_SPILLABLE_MAP_BASE_PATH));
+    // NOTE: HoodieCompactedLogRecordScanner will not return records for an in-flight commit
+    // but can return records for completed commits > the commit we are trying to read (if using
+    // readCommit() API)
+    for (HoodieRecord<? extends HoodieRecordPayload> hoodieRecord : compactedLogRecordScanner) {
+      GenericRecord rec = (GenericRecord) hoodieRecord.getData().getInsertValue(getReaderSchema()).get();
+      String key = hoodieRecord.getRecordKey();
+      // we assume, a later safe record in the log, is newer than what we have in the map &
+      // replace it.
+      // TODO : handle deletes here
+      ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, getWriterSchema());
+      deltaRecordMap.put(key, aWritable);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Log record : " + arrayWritableToString(aWritable));
+      }
+    }
+  }
+
+  @Override
+  public boolean next(Void aVoid, ArrayWritable arrayWritable) throws IOException {
+    // Call the underlying parquetReader.next - which may replace the passed in ArrayWritable
+    // with a new block of values
+    boolean result = this.parquetReader.next(aVoid, arrayWritable);
+    if (!result) {
+      // if the result is false, then there are no more records
+      return false;
+    } else {
+      // TODO(VC): Right now, we assume all records in log, have a matching base record. (which
+      // would be true until we have a way to index logs too)
+      // return from delta records map if we have some match.
+      String key = arrayWritable.get()[HoodieRealtimeInputFormat.HOODIE_RECORD_KEY_COL_POS]
+          .toString();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("key %s, base values: %s, log values: %s", key,
+            arrayWritableToString(arrayWritable), arrayWritableToString(deltaRecordMap.get(key))));
+      }
+      if (deltaRecordMap.containsKey(key)) {
+        // TODO(NA): Invoke preCombine here by converting arrayWritable to Avro ?
+        Writable[] replaceValue = deltaRecordMap.get(key).get();
+        Writable[] originalValue = arrayWritable.get();
+        System.arraycopy(replaceValue, 0, originalValue, 0, originalValue.length);
+        arrayWritable.set(originalValue);
+      }
+      return true;
+    }
+  }
+
+  @Override
+  public Void createKey() {
+    return parquetReader.createKey();
+  }
+
+  @Override
+  public ArrayWritable createValue() {
+    return parquetReader.createValue();
+  }
+
+  @Override
+  public long getPos() throws IOException {
+    return parquetReader.getPos();
+  }
+
+  @Override
+  public void close() throws IOException {
+    parquetReader.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return parquetReader.getProgress();
+  }
+}
--- a/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/RealtimeUnmergedRecordReader.java
+++ b/hoodie-hadoop-mr/src/main/java/com/uber/hoodie/hadoop/realtime/RealtimeUnmergedRecordReader.java
@@ -0,0 +1,142 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop.realtime;
+
+import com.uber.hoodie.common.table.log.HoodieUnMergedLogRecordScanner;
+import com.uber.hoodie.common.util.DefaultSizeEstimator;
+import com.uber.hoodie.common.util.FSUtils;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor;
+import com.uber.hoodie.common.util.queue.BoundedInMemoryQueueProducer;
+import com.uber.hoodie.common.util.queue.FunctionBasedQueueProducer;
+import com.uber.hoodie.common.util.queue.IteratorBasedQueueProducer;
+import com.uber.hoodie.hadoop.RecordReaderValueIterator;
+import com.uber.hoodie.hadoop.SafeParquetRecordReaderWrapper;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Optional;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader implements
+    RecordReader<Void, ArrayWritable> {
+
+  // Log Record unmerged scanner
+  private final HoodieUnMergedLogRecordScanner logRecordScanner;
+
+  // Parquet record reader
+  private final RecordReader<Void, ArrayWritable> parquetReader;
+
+  // Parquet record iterator wrapper for the above reader
+  private final RecordReaderValueIterator<Void, ArrayWritable> parquetRecordsIterator;
+
+  // Executor that runs the above producers in parallel
+  private final BoundedInMemoryExecutor<ArrayWritable, ArrayWritable, ?> executor;
+
+  // Iterator for the buffer consumer
+  private final Iterator<ArrayWritable> iterator;
+
+  /**
+   * Construct a Unmerged record reader that parallely consumes both parquet and log records and buffers for upstream
+   * clients to consume
+   *
+   * @param split      File split
+   * @param job        Job Configuration
+   * @param realReader Parquet Reader
+   */
+  public RealtimeUnmergedRecordReader(HoodieRealtimeFileSplit split, JobConf job,
+      RecordReader<Void, ArrayWritable> realReader) {
+    super(split, job);
+    this.parquetReader = new SafeParquetRecordReaderWrapper(realReader);
+    // Iterator for consuming records from parquet file
+    this.parquetRecordsIterator = new RecordReaderValueIterator<>(this.parquetReader);
+    this.executor = new BoundedInMemoryExecutor<>(getMaxCompactionMemoryInBytes(), getParallelProducers(),
+        Optional.empty(), x -> x, new DefaultSizeEstimator<>());
+    // Consumer of this record reader
+    this.iterator = this.executor.getQueue().iterator();
+    this.logRecordScanner = new HoodieUnMergedLogRecordScanner(
+        FSUtils.getFs(split.getPath().toString(), jobConf), split.getBasePath(),
+        split.getDeltaFilePaths(), getReaderSchema(), split.getMaxCommitTime(), Boolean.valueOf(jobConf
+        .get(COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP, DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED)),
+        false, jobConf.getInt(MAX_DFS_STREAM_BUFFER_SIZE_PROP, DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE),
+        record -> {
+          // convert Hoodie log record to Hadoop AvroWritable and buffer
+          GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema()).get();
+          ArrayWritable aWritable = (ArrayWritable) avroToArrayWritable(rec, getWriterSchema());
+          this.executor.getQueue().insertRecord(aWritable);
+        });
+    // Start reading and buffering
+    this.executor.startProducers();
+  }
+
+  /**
+   * Setup log and parquet reading in parallel. Both write to central buffer.
+   */
+  @SuppressWarnings("unchecked")
+  private List<BoundedInMemoryQueueProducer<ArrayWritable>> getParallelProducers() {
+    List<BoundedInMemoryQueueProducer<ArrayWritable>> producers = new ArrayList<>();
+    producers.add(new FunctionBasedQueueProducer<>(buffer -> {
+      logRecordScanner.scan();
+      return null;
+    }));
+    producers.add(new IteratorBasedQueueProducer<>(parquetRecordsIterator));
+    return producers;
+  }
+
+  @Override
+  public boolean next(Void key, ArrayWritable value) throws IOException {
+    if (!iterator.hasNext()) {
+      return false;
+    }
+    // Copy from buffer iterator and set to passed writable
+    value.set(iterator.next().get());
+    return true;
+  }
+
+  @Override
+  public Void createKey() {
+    return parquetReader.createKey();
+  }
+
+  @Override
+  public ArrayWritable createValue() {
+    return parquetReader.createValue();
+  }
+
+  @Override
+  public long getPos() throws IOException {
+    //TODO: vb - No logical way to represent parallel stream pos in a single long.
+    // Should we just return invalid (-1). Where is it used ?
+    return 0;
+  }
+
+  @Override
+  public void close() throws IOException {
+    this.parquetRecordsIterator.close();
+    this.executor.shutdownNow();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return Math.min(parquetReader.getProgress(), logRecordScanner.getProgress());
+  }
+}
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestRecordReaderValueIterator.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/TestRecordReaderValueIterator.java
@@ -0,0 +1,105 @@
+/*
+ *  Copyright (c) 2017 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *           http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *
+ */
+
+package com.uber.hoodie.hadoop;
+
+import groovy.lang.Tuple2;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.RecordReader;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestRecordReaderValueIterator {
+
+  @Test
+  public void testValueIterator() {
+    String[] values = new String[]{
+        "hoodie",
+        "efficient",
+        "new project",
+        "realtime",
+        "spark",
+        "dataset",
+    };
+    List<Tuple2<Integer, String>> entries = IntStream.range(0, values.length)
+        .boxed().map(idx -> new Tuple2<>(idx, values[idx])).collect(Collectors.toList());
+    TestRecordReader reader = new TestRecordReader(entries);
+    RecordReaderValueIterator<IntWritable, Text> itr = new RecordReaderValueIterator<IntWritable, Text>(reader);
+    for (int i = 0; i < values.length; i++) {
+      Assert.assertTrue(itr.hasNext());
+      Text val = itr.next();
+      Assert.assertEquals(values[i], val.toString());
+    }
+    Assert.assertFalse(itr.hasNext());
+  }
+
+  /**
+   * Simple replay record reader for unit-testing
+   */
+  private static class TestRecordReader implements RecordReader<IntWritable, Text> {
+
+    private final List<Tuple2<Integer, String>> entries;
+    private int currIndex = 0;
+
+    public TestRecordReader(List<Tuple2<Integer, String>> entries) {
+      this.entries = entries;
+    }
+
+
+    @Override
+    public boolean next(IntWritable key, Text value) throws IOException {
+      if (currIndex >= entries.size()) {
+        return false;
+      }
+      key.set(entries.get(currIndex).getFirst());
+      value.set(entries.get(currIndex).getSecond());
+      currIndex++;
+      return true;
+    }
+
+    @Override
+    public IntWritable createKey() {
+      return new IntWritable();
+    }
+
+    @Override
+    public Text createValue() {
+      return new Text();
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return currIndex;
+    }
+
+    @Override
+    public void close() throws IOException {
+
+    }
+
+    @Override
+    public float getProgress() throws IOException {
+      return (currIndex * 1.0F) / entries.size();
+    }
+  }
+}
--- a/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java
+++ b/hoodie-hadoop-mr/src/test/java/com/uber/hoodie/hadoop/realtime/HoodieRealtimeRecordReaderTest.java
@@ -35,8 +35,10 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
@@ -71,7 +73,7 @@ public class HoodieRealtimeRecordReaderTest {
  @Before
  public void setUp() {
    jobConf = new JobConf();
-    jobConf.set(HoodieRealtimeRecordReader.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1 * 1024 * 1024));
+    jobConf.set(AbstractRealtimeRecordReader.MAX_DFS_STREAM_BUFFER_SIZE_PROP, String.valueOf(1 * 1024 * 1024));
    hadoopConf = HoodieTestUtils.getDefaultHadoopConf();
    fs = FSUtils.getFs(basePath.getRoot().getAbsolutePath(), hadoopConf);
  }
@@ -82,12 +84,18 @@ public class HoodieRealtimeRecordReaderTest {
  private HoodieLogFormat.Writer writeLogFile(File partitionDir, Schema schema, String fileId,
      String baseCommit, String newCommit, int numberOfRecords)
      throws InterruptedException, IOException {
+    return writeLogFile(partitionDir, schema, fileId, baseCommit, newCommit, numberOfRecords, 0);
+  }
+
+  private HoodieLogFormat.Writer writeLogFile(File partitionDir, Schema schema, String fileId,
+      String baseCommit, String newCommit, int numberOfRecords, int offset)
+      throws InterruptedException, IOException {
    HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
        .onParentPath(new Path(partitionDir.getPath()))
        .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(fileId)
        .overBaseCommit(baseCommit).withFs(fs).build();
    List<IndexedRecord> records = new ArrayList<>();
-    for (int i = 0; i < numberOfRecords; i++) {
+    for (int i = offset; i < offset + numberOfRecords; i++) {
      records.add(SchemaTestUtil.generateAvroRecordFromJson(schema, i, newCommit, "fileid0"));
    }
    Schema writeSchema = records.get(0).getSchema();
@@ -142,8 +150,7 @@ public class HoodieRealtimeRecordReaderTest {
    jobConf.set("partition_columns", "datestr");

    //validate record reader compaction
-    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf,
-        reader);
+    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);

    //use reader to read base Parquet File and log file, merge in flight and return latest commit
    //here all 100 records should be updated, see above
@@ -158,6 +165,90 @@ public class HoodieRealtimeRecordReaderTest {
    }
  }

+  @Test
+  public void testUnMergedReader() throws Exception {
+    // initial commit
+    Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getEvolvedSchema());
+    HoodieTestUtils.initTableType(hadoopConf, basePath.getRoot().getAbsolutePath(),
+        HoodieTableType.MERGE_ON_READ);
+    String commitTime = "100";
+    final int numRecords = 1000;
+    final int firstBatchLastRecordKey = numRecords - 1;
+    final int secondBatchLastRecordKey = 2 * numRecords - 1;
+    File partitionDir = InputFormatTestUtil
+        .prepareParquetDataset(basePath, schema, 1, numRecords, commitTime);
+    InputFormatTestUtil.commit(basePath, commitTime);
+    // Add the paths
+    FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
+
+    // insert new records to log file
+    String newCommitTime = "101";
+    HoodieLogFormat.Writer writer = writeLogFile(partitionDir, schema, "fileid0", commitTime,
+        newCommitTime, numRecords, numRecords);
+    long size = writer.getCurrentSize();
+    writer.close();
+    assertTrue("block - size should be > 0", size > 0);
+
+    //create a split with baseFile (parquet file written earlier) and new log file(s)
+    String logFilePath = writer.getLogFile().getPath().toString();
+    HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
+        new FileSplit(new Path(partitionDir + "/fileid0_1_" + commitTime + ".parquet"), 0, 1,
+            jobConf), basePath.getRoot().getPath(), Arrays.asList(logFilePath), newCommitTime);
+
+    //create a RecordReader to be used by HoodieRealtimeRecordReader
+    RecordReader<Void, ArrayWritable> reader =
+        new MapredParquetInputFormat().getRecordReader(
+            new FileSplit(split.getPath(), 0, fs.getLength(split.getPath()), (String[]) null),
+            jobConf, null);
+    JobConf jobConf = new JobConf();
+    List<Schema.Field> fields = schema.getFields();
+    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
+    String postions = fields.stream().map(f -> String.valueOf(f.pos()))
+        .collect(Collectors.joining(","));
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names);
+    jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
+    jobConf.set("partition_columns", "datestr");
+    // Enable merge skipping.
+    jobConf.set("hoodie.realtime.merge.skip", "true");
+
+    //validate unmerged record reader
+    RealtimeUnmergedRecordReader recordReader = new RealtimeUnmergedRecordReader(split, jobConf, reader);
+
+    //use reader to read base Parquet File and log file
+    //here all records should be present. Also ensure log records are in order.
+    Void key = recordReader.createKey();
+    ArrayWritable value = recordReader.createValue();
+    int numRecordsAtCommit1 = 0;
+    int numRecordsAtCommit2 = 0;
+    Set<Integer> seenKeys = new HashSet<>();
+    Integer lastSeenKeyFromLog = firstBatchLastRecordKey;
+    while (recordReader.next(key, value)) {
+      Writable[] values = value.get();
+      String gotCommit = values[0].toString();
+      String keyStr = values[2].toString();
+      Integer gotKey = Integer.parseInt(keyStr.substring("key".length()));
+      if (gotCommit.equals(newCommitTime)) {
+        numRecordsAtCommit2++;
+        Assert.assertTrue(gotKey > firstBatchLastRecordKey);
+        Assert.assertTrue(gotKey <= secondBatchLastRecordKey);
+        Assert.assertEquals(gotKey.intValue(), lastSeenKeyFromLog + 1);
+        lastSeenKeyFromLog++;
+      } else {
+        numRecordsAtCommit1++;
+        Assert.assertTrue(gotKey >= 0);
+        Assert.assertTrue(gotKey <= firstBatchLastRecordKey);
+      }
+      // Ensure unique key
+      Assert.assertFalse(seenKeys.contains(gotKey));
+      seenKeys.add(gotKey);
+      key = recordReader.createKey();
+      value = recordReader.createValue();
+    }
+    Assert.assertEquals(numRecords, numRecordsAtCommit1);
+    Assert.assertEquals(numRecords, numRecordsAtCommit2);
+    Assert.assertEquals(2 * numRecords, seenKeys.size());
+  }
+
  @Test
  public void testReaderWithNestedAndComplexSchema() throws Exception {
    // initial commit
@@ -203,8 +294,7 @@ public class HoodieRealtimeRecordReaderTest {
    jobConf.set("partition_columns", "datestr");

    // validate record reader compaction
-    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf,
-        reader);
+    HoodieRealtimeRecordReader recordReader = new HoodieRealtimeRecordReader(split, jobConf, reader);

    // use reader to read base Parquet File and log file, merge in flight and return latest commit
    // here the first 50 records should be updated, see above