1
0

Spawning parallel writer thread to separate reading records from spark and writing records to parquet file

This commit is contained in:
Omkar Joshi
2018-03-14 16:00:47 -07:00
committed by vinoth chandar
parent 9dff8c2326
commit c5b4cb1b75
8 changed files with 529 additions and 28 deletions

View File

@@ -149,8 +149,10 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
recordsDeleted++;
}
hoodieRecord.deflate();
writeStatus.markSuccess(hoodieRecord, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a part of marking
// record successful.
hoodieRecord.deflate();
return avroRecord;
} catch (Exception e) {
logger.error("Error writing record " + hoodieRecord, e);

View File

@@ -93,11 +93,15 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
/**
* Perform the actual writing of the given record into the backing file.
*/
public void write(HoodieRecord record) {
public void write(HoodieRecord record, Optional<IndexedRecord> insertValue,
Optional<Exception> getInsertValueException) {
Optional recordMetadata = record.getData().getMetadata();
try {
Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
// throws exception if there was any exception while fetching insert value
if (getInsertValueException.isPresent()) {
throw getInsertValueException.get();
}
Optional<IndexedRecord> avroRecord = insertValue;
if (avroRecord.isPresent()) {
storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
// update the new location of record, so we know where to find it next
@@ -106,8 +110,10 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
} else {
recordsDeleted++;
}
record.deflate();
status.markSuccess(record, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a part of marking
// record successful.
record.deflate();
} catch (Throwable t) {
// Not throwing exception from here, since we don't want to fail the entire job
// for a single record

View File

@@ -50,8 +50,7 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
this.fs = hoodieTable.getMetaClient().getFs();
this.hoodieTable = hoodieTable;
this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
this.schema =
HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
this.schema = createHoodieWriteSchema(config);
}
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
@@ -101,4 +100,8 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
public Schema getSchema() {
return schema;
}
public static Schema createHoodieWriteSchema(HoodieWriteConfig config) {
return HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
}
}

View File

@@ -167,8 +167,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
recordsDeleted++;
}
hoodieRecord.deflate();
writeStatus.markSuccess(hoodieRecord, recordMetadata);
// deflate record payload after recording success. This will help users access payload as a part of marking
// record successful.
hoodieRecord.deflate();
return true;
} catch (Exception e) {
logger.error("Error writing record " + hoodieRecord, e);