1
0

[HUDI-1632] Supports merge on read write mode for Flink writer (#2593)

Also supports async compaction with pluggable strategies.
This commit is contained in:
Danny Chan
2021-03-01 12:29:41 +08:00
committed by GitHub
parent be257b58c6
commit 7a11de1276
38 changed files with 2296 additions and 152 deletions

View File

@@ -74,38 +74,38 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
// This acts as the sequenceID for records written
private static final AtomicLong RECORD_COUNTER = new AtomicLong(1);
private final String fileId;
protected final String fileId;
// Buffer for holding records in memory before they are flushed to disk
private final List<IndexedRecord> recordList = new ArrayList<>();
// Buffer for holding records (to be deleted) in memory before they are flushed to disk
private final List<HoodieKey> keysToDelete = new ArrayList<>();
// Incoming records to be written to logs.
private final Iterator<HoodieRecord<T>> recordItr;
protected Iterator<HoodieRecord<T>> recordItr;
// Writer to log into the file group's latest slice.
private Writer writer;
protected Writer writer;
private final List<WriteStatus> statuses;
protected final List<WriteStatus> statuses;
// Total number of records written during an append
private long recordsWritten = 0;
protected long recordsWritten = 0;
// Total number of records deleted during an append
private long recordsDeleted = 0;
protected long recordsDeleted = 0;
// Total number of records updated during an append
private long updatedRecordsWritten = 0;
protected long updatedRecordsWritten = 0;
// Total number of new records inserted into the delta file
private long insertRecordsWritten = 0;
protected long insertRecordsWritten = 0;
// Average record size for a HoodieRecord. This size is updated at the end of every log block flushed to disk
private long averageRecordSize = 0;
// Flag used to initialize some metadata
private boolean doInit = true;
// Total number of bytes written during this append phase (an estimation)
private long estimatedNumberOfBytesWritten;
protected long estimatedNumberOfBytesWritten;
// Number of records that must be written to meet the max block size for a log block
private int numberOfRecords = 0;
// Max block size to limit to for a log block
private final int maxBlockSize = config.getLogFileDataBlockMaxSize();
// Header metadata for a log block
private final Map<HeaderMetadataType, String> header = new HashMap<>();
protected final Map<HeaderMetadataType, String> header = new HashMap<>();
private SizeEstimator<HoodieRecord> sizeEstimator;
public HoodieAppendHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
@@ -178,6 +178,14 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
}
}
/**
* Returns whether the hoodie record is an UPDATE.
*/
protected boolean isUpdateRecord(HoodieRecord<T> hoodieRecord) {
// If currentLocation is present, then this is an update
return hoodieRecord.getCurrentLocation() != null;
}
private Option<IndexedRecord> getIndexedRecord(HoodieRecord<T> hoodieRecord) {
Option<Map<String, String>> recordMetadata = hoodieRecord.getData().getMetadata();
try {
@@ -190,8 +198,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
HoodieAvroUtils.addHoodieKeyToRecord((GenericRecord) avroRecord.get(), hoodieRecord.getRecordKey(),
hoodieRecord.getPartitionPath(), fileId);
HoodieAvroUtils.addCommitMetadataToRecord((GenericRecord) avroRecord.get(), instantTime, seqId);
// If currentLocation is present, then this is an update
if (hoodieRecord.getCurrentLocation() != null) {
if (isUpdateRecord(hoodieRecord)) {
updatedRecordsWritten++;
} else {
insertRecordsWritten++;
@@ -324,7 +331,7 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords;
}
private void appendDataAndDeleteBlocks(Map<HeaderMetadataType, String> header) {
protected void appendDataAndDeleteBlocks(Map<HeaderMetadataType, String> header) {
try {
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, instantTime);
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writerSchemaWithMetafields.toString());
@@ -412,6 +419,13 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
.withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
}
/**
* Whether there is need to update the record location.
*/
protected boolean needsUpdateLocation() {
return true;
}
private void writeToBuffer(HoodieRecord<T> record) {
if (!partitionPath.equals(record.getPartitionPath())) {
HoodieUpsertException failureEx = new HoodieUpsertException("mismatched partition path, record partition: "
@@ -421,9 +435,11 @@ public class HoodieAppendHandle<T extends HoodieRecordPayload, I, K, O> extends
}
// update the new location of the record, so we know where to find it next
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
if (needsUpdateLocation()) {
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
Option<IndexedRecord> indexedRecord = getIndexedRecord(record);
if (indexedRecord.isPresent()) {
recordList.add(indexedRecord.get());

View File

@@ -198,6 +198,13 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
}
}
/**
* Whether there is need to update the record location.
*/
boolean needsUpdateLocation() {
return true;
}
/**
* Load the new incoming records in a map and return partitionPath.
*/
@@ -206,9 +213,11 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
while (newRecordsItr.hasNext()) {
HoodieRecord<T> record = newRecordsItr.next();
// update the new location of the record, so we know where to find it next
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
if (needsUpdateLocation()) {
record.unseal();
record.setNewLocation(new HoodieRecordLocation(instantTime, fileId));
record.seal();
}
// NOTE: Once Records are added to map (spillable-map), DO NOT change it as they won't persist
keyToNewRecords.put(record.getRecordKey(), record);
}