1
0

[HUDI-1598] Write as minor batches during one checkpoint interval for the new writer (#2553)

This commit is contained in:
Danny Chan
2021-02-17 15:24:50 +08:00
committed by GitHub
parent 302bd29dab
commit 5d2491d10c
27 changed files with 1105 additions and 111 deletions

View File

@@ -53,11 +53,11 @@ public class HoodieCreateHandle<T extends HoodieRecordPayload, I, K, O> extends
private static final Logger LOG = LogManager.getLogger(HoodieCreateHandle.class);
private final HoodieFileWriter<IndexedRecord> fileWriter;
private final Path path;
private long recordsWritten = 0;
private long insertRecordsWritten = 0;
private long recordsDeleted = 0;
protected final HoodieFileWriter<IndexedRecord> fileWriter;
protected final Path path;
protected long recordsWritten = 0;
protected long insertRecordsWritten = 0;
protected long recordsDeleted = 0;
private Map<String, HoodieRecord<T>> recordMap;
private boolean useWriterSchema = false;

View File

@@ -95,10 +95,10 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
protected HoodieFileWriter<IndexedRecord> fileWriter;
protected Path newFilePath;
private Path oldFilePath;
protected Path oldFilePath;
protected long recordsWritten = 0;
private long recordsDeleted = 0;
private long updatedRecordsWritten = 0;
protected long recordsDeleted = 0;
protected long updatedRecordsWritten = 0;
protected long insertRecordsWritten = 0;
protected boolean useWriterSchema;
private HoodieBaseFile baseFileToMerge;
@@ -132,6 +132,13 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
return writerSchema;
}
/**
* Returns the data file name.
*/
protected String generatesDataFileName() {
return FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
}
/**
* Extract old file path, initialize StorageWriter and WriteStatus.
*/
@@ -149,7 +156,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
partitionMetadata.trySave(getPartitionId());
oldFilePath = new Path(config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath);
String newFileName = FSUtils.makeDataFileName(instantTime, writeToken, fileId, hoodieTable.getBaseFileExtension());
String newFileName = generatesDataFileName();
String relativePath = new Path((partitionPath.isEmpty() ? "" : partitionPath + "/")
+ newFileName).toString();
newFilePath = new Path(config.getBasePath(), relativePath);
@@ -177,18 +184,25 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload, I, K, O> extends H
}
/**
* Load the new incoming records in a map and return partitionPath.
* Initialize a spillable map for incoming records.
*/
private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
protected void initializeIncomingRecordsMap() {
try {
// Load the new records in a map
long memoryForMerge = IOUtils.getMaxMemoryPerPartitionMerge(taskContextSupplier, config.getProps());
LOG.info("MaxMemoryPerPartitionMerge => " + memoryForMerge);
this.keyToNewRecords = new ExternalSpillableMap<>(memoryForMerge, config.getSpillableMapBasePath(),
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(writerSchema));
new DefaultSizeEstimator(), new HoodieRecordSizeEstimator(writerSchema));
} catch (IOException io) {
throw new HoodieIOException("Cannot instantiate an ExternalSpillableMap", io);
}
}
/**
* Load the new incoming records in a map and return partitionPath.
*/
protected void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
initializeIncomingRecordsMap();
while (newRecordsItr.hasNext()) {
HoodieRecord<T> record = newRecordsItr.next();
// update the new location of the record, so we know where to find it next