Spawning parallel writer thread to separate reading records from spark and writing records to parquet file
This commit is contained in:
committed by
vinoth chandar
parent
9dff8c2326
commit
c5b4cb1b75
@@ -149,8 +149,10 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
recordsDeleted++;
|
||||
}
|
||||
|
||||
hoodieRecord.deflate();
|
||||
writeStatus.markSuccess(hoodieRecord, recordMetadata);
|
||||
// deflate record payload after recording success. This will help users access payload as a part of marking
|
||||
// record successful.
|
||||
hoodieRecord.deflate();
|
||||
return avroRecord;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error writing record " + hoodieRecord, e);
|
||||
|
||||
@@ -93,11 +93,15 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
/**
|
||||
* Perform the actual writing of the given record into the backing file.
|
||||
*/
|
||||
public void write(HoodieRecord record) {
|
||||
public void write(HoodieRecord record, Optional<IndexedRecord> insertValue,
|
||||
Optional<Exception> getInsertValueException) {
|
||||
Optional recordMetadata = record.getData().getMetadata();
|
||||
try {
|
||||
Optional<IndexedRecord> avroRecord = record.getData().getInsertValue(schema);
|
||||
|
||||
// throws exception if there was any exception while fetching insert value
|
||||
if (getInsertValueException.isPresent()) {
|
||||
throw getInsertValueException.get();
|
||||
}
|
||||
Optional<IndexedRecord> avroRecord = insertValue;
|
||||
if (avroRecord.isPresent()) {
|
||||
storageWriter.writeAvroWithMetadata(avroRecord.get(), record);
|
||||
// update the new location of record, so we know where to find it next
|
||||
@@ -106,8 +110,10 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload> extends HoodieIOH
|
||||
} else {
|
||||
recordsDeleted++;
|
||||
}
|
||||
record.deflate();
|
||||
status.markSuccess(record, recordMetadata);
|
||||
// deflate record payload after recording success. This will help users access payload as a part of marking
|
||||
// record successful.
|
||||
record.deflate();
|
||||
} catch (Throwable t) {
|
||||
// Not throwing exception from here, since we don't want to fail the entire job
|
||||
// for a single record
|
||||
|
||||
@@ -50,8 +50,7 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
||||
this.fs = hoodieTable.getMetaClient().getFs();
|
||||
this.hoodieTable = hoodieTable;
|
||||
this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline();
|
||||
this.schema =
|
||||
HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
||||
this.schema = createHoodieWriteSchema(config);
|
||||
}
|
||||
|
||||
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) {
|
||||
@@ -101,4 +100,8 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload> {
|
||||
public Schema getSchema() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
public static Schema createHoodieWriteSchema(HoodieWriteConfig config) {
|
||||
return HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,8 +167,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
||||
recordsDeleted++;
|
||||
}
|
||||
|
||||
hoodieRecord.deflate();
|
||||
writeStatus.markSuccess(hoodieRecord, recordMetadata);
|
||||
// deflate record payload after recording success. This will help users access payload as a part of marking
|
||||
// record successful.
|
||||
hoodieRecord.deflate();
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error writing record " + hoodieRecord, e);
|
||||
|
||||
Reference in New Issue
Block a user