1
0

Spawning parallel writer thread to separate reading records from spark and writing records to parquet file

This commit is contained in:
Omkar Joshi
2018-03-14 16:00:47 -07:00
committed by vinoth chandar
parent 9dff8c2326
commit c5b4cb1b75
8 changed files with 529 additions and 28 deletions

View File

@@ -93,11 +93,15 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
/**
* Perform the actual writing of the given record into the backing file.
*/
public void write(HoodieRecord record) {
public void write(HoodieRecord record, Optional<IndexedRecord> insertValue,
Optional<Exception> getInsertValueException) {
Optional recordMetadata = record.getData().getMetadata();
try {
Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
// throws exception if there was any exception while fetching insert value
if (getInsertValueException.isPresent()) {
throw getInsertValueException.get();
}
Optional<IndexedRecord> avroRecord = insertValue;
if (avroRecord.isPresent()) {
storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
// update the new location of record, so we know where to find it next
@@ -106,8 +110,10 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
} else {
recordsDeleted++;
}
record.deflate();
status.markSuccess(record, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a part of marking
// record successful.
record.deflate();
} catch (Throwable t) {
// Not throwing exception from here, since we don't want to fail the entire job
// for a single record