Spawning parallel writer thread to separate reading records from spark and writing records to parquet file

2018-03-14 16:00:47 -07:00
parent 9dff8c2326
commit c5b4cb1b75
8 changed files with 529 additions and 28 deletions
--- a/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/io/HoodieCreateHandle.java
@@ -93,11 +93,15 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
  /**
   * Perform the actual writing of the given record into the backing file.
   */
-  public void write(HoodieRecord record) {
+  public void write(HoodieRecord record, Optional<IndexedRecord> insertValue,
+      Optional<Exception> getInsertValueException) {
    Optional recordMetadata = record.getData().getMetadata();
    try {
-      Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
-
+      // throws exception if there was any exception while fetching insert value
+      if (getInsertValueException.isPresent()) {
+        throw getInsertValueException.get();
+      }
+      Optional<IndexedRecord> avroRecord = insertValue;
      if (avroRecord.isPresent()) {
        storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
        // update the new location of record, so we know where to find it next
@@ -106,8 +110,10 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
      } else {
        recordsDeleted++;
      }
-      record.deflate();
      status.markSuccess(record, recordMetadata);
+      // deflate record payload after recording success. This will help users access payload as a part of marking
+      // record successful.
+      record.deflate();
    } catch (Throwable t) {
      // Not throwing exception from here, since we don't want to fail the entire job
      // for a single record