refactor classes to accept Map passed by RealtimeCompactor to avoid multiple map creations in HoodieMergeHandle
This commit is contained in:
committed by
vinoth chandar
parent
30049383f5
commit
7076c2e9f0
@@ -33,7 +33,9 @@ import com.uber.hoodie.table.HoodieTable;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
@@ -47,7 +49,7 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
|
private static Logger logger = LogManager.getLogger(HoodieMergeHandle.class);
|
||||||
|
|
||||||
private WriteStatus writeStatus;
|
private WriteStatus writeStatus;
|
||||||
private HashMap<String, HoodieRecord<T>> keyToNewRecords;
|
private Map<String, HoodieRecord<T>> keyToNewRecords;
|
||||||
private HoodieStorageWriter<IndexedRecord> storageWriter;
|
private HoodieStorageWriter<IndexedRecord> storageWriter;
|
||||||
private TableFileSystemView.ReadOptimizedView fileSystemView;
|
private TableFileSystemView.ReadOptimizedView fileSystemView;
|
||||||
private Path newFilePath;
|
private Path newFilePath;
|
||||||
@@ -64,26 +66,32 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
String fileId) {
|
String fileId) {
|
||||||
super(config, commitTime, hoodieTable);
|
super(config, commitTime, hoodieTable);
|
||||||
this.fileSystemView = hoodieTable.getROFileSystemView();
|
this.fileSystemView = hoodieTable.getROFileSystemView();
|
||||||
init(fileId, recordItr);
|
init(fileId, init(fileId, recordItr));
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieMergeHandle(HoodieWriteConfig config,
|
||||||
|
String commitTime,
|
||||||
|
HoodieTable<T> hoodieTable,
|
||||||
|
Map<String, HoodieRecord<T>> keyToNewRecords,
|
||||||
|
String fileId) {
|
||||||
|
super(config, commitTime, hoodieTable);
|
||||||
|
this.fileSystemView = hoodieTable.getROFileSystemView();
|
||||||
|
this.keyToNewRecords = keyToNewRecords;
|
||||||
|
init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get()).getPartitionPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load the new incoming records in a map, and extract the old file path.
|
* Extract old file path, initialize StorageWriter and WriteStatus
|
||||||
|
* @param fileId
|
||||||
|
* @param partitionPath
|
||||||
*/
|
*/
|
||||||
private void init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
|
private void init(String fileId, String partitionPath) {
|
||||||
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
|
WriteStatus writeStatus = ReflectionUtils.loadClass(config.getWriteStatusClassName());
|
||||||
writeStatus.setStat(new HoodieWriteStat());
|
writeStatus.setStat(new HoodieWriteStat());
|
||||||
this.writeStatus = writeStatus;
|
this.writeStatus = writeStatus;
|
||||||
this.keyToNewRecords = new HashMap<>();
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Load the new records in a map
|
|
||||||
while (newRecordsItr.hasNext()) {
|
|
||||||
HoodieRecord<T> record = newRecordsItr.next();
|
|
||||||
// If the first record, we need to extract some info out
|
|
||||||
if (oldFilePath == null) {
|
|
||||||
String latestValidFilePath = fileSystemView
|
String latestValidFilePath = fileSystemView
|
||||||
.getLatestDataFiles(record.getPartitionPath())
|
.getLatestDataFiles(partitionPath)
|
||||||
.filter(dataFile -> dataFile.getFileId().equals(fileId))
|
.filter(dataFile -> dataFile.getFileId().equals(fileId))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.get().getFileName();
|
.get().getFileName();
|
||||||
@@ -92,18 +100,15 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
|
HoodiePartitionMetadata partitionMetadata = new HoodiePartitionMetadata(fs,
|
||||||
commitTime,
|
commitTime,
|
||||||
new Path(config.getBasePath()),
|
new Path(config.getBasePath()),
|
||||||
new Path(config.getBasePath(), record.getPartitionPath()));
|
new Path(config.getBasePath(), partitionPath));
|
||||||
partitionMetadata.trySave(TaskContext.getPartitionId());
|
partitionMetadata.trySave(TaskContext.getPartitionId());
|
||||||
|
|
||||||
oldFilePath = new Path(
|
oldFilePath = new Path(
|
||||||
config.getBasePath() + "/" + record.getPartitionPath() + "/"
|
config.getBasePath() + "/" + partitionPath + "/"
|
||||||
+ latestValidFilePath);
|
+ latestValidFilePath);
|
||||||
String relativePath = new Path(record.getPartitionPath() + "/" + FSUtils
|
String relativePath = new Path(partitionPath + "/" + FSUtils
|
||||||
.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
|
.makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString();
|
||||||
newFilePath = new Path(config.getBasePath(), relativePath);
|
newFilePath = new Path(config.getBasePath(), relativePath);
|
||||||
if (config.shouldUseTempFolderForCopyOnWriteForMerge()) {
|
|
||||||
this.tempPath = makeTempPath(record.getPartitionPath(), TaskContext.getPartitionId(), fileId, TaskContext.get().stageId(), TaskContext.get().taskAttemptId());
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle cases of partial failures, for update task
|
// handle cases of partial failures, for update task
|
||||||
if (fs.exists(newFilePath)) {
|
if (fs.exists(newFilePath)) {
|
||||||
@@ -111,30 +116,43 @@ public class HoodieMergeHandle<T extends HoodieRecordPayload> extends HoodieIOHa
|
|||||||
}
|
}
|
||||||
|
|
||||||
logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
|
logger.info(String.format("Merging new data into oldPath %s, as newPath %s",
|
||||||
oldFilePath.toString(), getStorageWriterPath().toString()));
|
oldFilePath.toString(), newFilePath.toString()));
|
||||||
// file name is same for all records, in this bunch
|
// file name is same for all records, in this bunch
|
||||||
writeStatus.setFileId(fileId);
|
writeStatus.setFileId(fileId);
|
||||||
writeStatus.setPartitionPath(record.getPartitionPath());
|
writeStatus.setPartitionPath(partitionPath);
|
||||||
writeStatus.getStat().setFileId(fileId);
|
writeStatus.getStat().setFileId(fileId);
|
||||||
writeStatus.getStat().setPaths(new Path(config.getBasePath()), newFilePath, tempPath);
|
writeStatus.getStat().setPath(relativePath);
|
||||||
|
// Create the writer for writing the new version file
|
||||||
|
storageWriter = HoodieStorageWriterFactory
|
||||||
|
.getStorageWriter(commitTime, newFilePath, hoodieTable, config, schema);
|
||||||
|
} catch (IOException io) {
|
||||||
|
logger.error("Error in update task at commit " + commitTime, io);
|
||||||
|
writeStatus.setGlobalError(io);
|
||||||
|
throw new HoodieUpsertException(
|
||||||
|
"Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
|
||||||
|
+ commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), io);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the new incoming records in a map and return partitionPath
|
||||||
|
* @param fileId
|
||||||
|
* @param newRecordsItr
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private String init(String fileId, Iterator<HoodieRecord<T>> newRecordsItr) {
|
||||||
|
// Load the new records in a map
|
||||||
|
this.keyToNewRecords = new HashMap<>();
|
||||||
|
String partitionPath = null;
|
||||||
|
while (newRecordsItr.hasNext()) {
|
||||||
|
HoodieRecord<T> record = newRecordsItr.next();
|
||||||
|
partitionPath = record.getPartitionPath();
|
||||||
keyToNewRecords.put(record.getRecordKey(), record);
|
keyToNewRecords.put(record.getRecordKey(), record);
|
||||||
// update the new location of the record, so we know where to find it next
|
// update the new location of the record, so we know where to find it next
|
||||||
record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
|
record.setNewLocation(new HoodieRecordLocation(commitTime, fileId));
|
||||||
}
|
}
|
||||||
// Create the writer for writing the new version file
|
return partitionPath;
|
||||||
storageWriter = HoodieStorageWriterFactory
|
|
||||||
.getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema);
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.error("Error in update task at commit " + commitTime, e);
|
|
||||||
writeStatus.setGlobalError(e);
|
|
||||||
throw new HoodieUpsertException(
|
|
||||||
"Failed to initialize HoodieUpdateHandle for FileId: " + fileId + " on commit "
|
|
||||||
+ commitTime + " on path " + hoodieTable.getMetaClient().getBasePath(), e);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord,
|
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord,
|
||||||
Optional<IndexedRecord> indexedRecord) {
|
Optional<IndexedRecord> indexedRecord) {
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ public class HoodieRealtimeTableCompactor implements HoodieCompactor {
|
|||||||
// Compacting is very similar to applying updates to existing file
|
// Compacting is very similar to applying updates to existing file
|
||||||
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metaClient);
|
HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, metaClient);
|
||||||
Iterator<List<WriteStatus>> result = table
|
Iterator<List<WriteStatus>> result = table
|
||||||
.handleUpdate(commitTime, operation.getFileId(), scanner.iterator());
|
.handleUpdate(commitTime, operation.getFileId(), scanner.getRecords());
|
||||||
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
Iterable<List<WriteStatus>> resultIterable = () -> result;
|
||||||
return StreamSupport.stream(resultIterable.spliterator(), false)
|
return StreamSupport.stream(resultIterable.spliterator(), false)
|
||||||
.flatMap(Collection::stream)
|
.flatMap(Collection::stream)
|
||||||
|
|||||||
@@ -420,6 +420,19 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
throws IOException {
|
throws IOException {
|
||||||
// these are updates
|
// these are updates
|
||||||
HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileLoc, recordItr);
|
HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileLoc, recordItr);
|
||||||
|
return handleUpdateInternal(upsertHandle, commitTime, fileLoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Iterator<List<WriteStatus>> handleUpdate(String commitTime, String fileLoc,
|
||||||
|
Map<String, HoodieRecord<T>> keyToNewRecords)
|
||||||
|
throws IOException {
|
||||||
|
// these are updates
|
||||||
|
HoodieMergeHandle upsertHandle = getUpdateHandle(commitTime, fileLoc, keyToNewRecords);
|
||||||
|
return handleUpdateInternal(upsertHandle, commitTime, fileLoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime, String fileLoc)
|
||||||
|
throws IOException {
|
||||||
if (upsertHandle.getOldFilePath() == null) {
|
if (upsertHandle.getOldFilePath() == null) {
|
||||||
throw new HoodieUpsertException("Error in finding the old file path at commit " +
|
throw new HoodieUpsertException("Error in finding the old file path at commit " +
|
||||||
commitTime + " at fileLoc: " + fileLoc);
|
commitTime + " at fileLoc: " + fileLoc);
|
||||||
@@ -459,6 +472,11 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileLoc);
|
return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileLoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileLoc,
|
||||||
|
Map<String, HoodieRecord<T>> keyToNewRecords) {
|
||||||
|
return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileLoc);
|
||||||
|
}
|
||||||
|
|
||||||
public Iterator<List<WriteStatus>> handleInsert(String commitTime,
|
public Iterator<List<WriteStatus>> handleInsert(String commitTime,
|
||||||
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
Iterator<HoodieRecord<T>> recordItr) throws Exception {
|
||||||
return new LazyInsertIterable<>(recordItr, config, commitTime, this);
|
return new LazyInsertIterable<>(recordItr, config, commitTime, this);
|
||||||
|
|||||||
@@ -43,6 +43,7 @@ import java.util.Arrays;
|
|||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Deque;
|
import java.util.Deque;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -62,8 +63,8 @@ public class HoodieCompactedLogRecordScanner implements
|
|||||||
|
|
||||||
private final static Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
|
private final static Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
|
||||||
|
|
||||||
// Final list of compacted/merged records to iterate
|
// Final map of compacted/merged records
|
||||||
private final Collection<HoodieRecord<? extends HoodieRecordPayload>> logRecords;
|
private final Map<String, HoodieRecord<? extends HoodieRecordPayload>> records;
|
||||||
// Reader schema for the records
|
// Reader schema for the records
|
||||||
private final Schema readerSchema;
|
private final Schema readerSchema;
|
||||||
// Total log files read - for metrics
|
// Total log files read - for metrics
|
||||||
@@ -89,7 +90,7 @@ public class HoodieCompactedLogRecordScanner implements
|
|||||||
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
|
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
|
||||||
|
|
||||||
// Store merged records for all versions for this log file
|
// Store merged records for all versions for this log file
|
||||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> records = Maps.newHashMap();
|
this.records = Maps.newHashMap();
|
||||||
// iterate over the paths
|
// iterate over the paths
|
||||||
Iterator<String> logFilePathsItr = logFilePaths.iterator();
|
Iterator<String> logFilePathsItr = logFilePaths.iterator();
|
||||||
while (logFilePathsItr.hasNext()) {
|
while (logFilePathsItr.hasNext()) {
|
||||||
@@ -202,7 +203,6 @@ public class HoodieCompactedLogRecordScanner implements
|
|||||||
merge(records, currentInstantLogBlocks);
|
merge(records, currentInstantLogBlocks);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.logRecords = Collections.unmodifiableCollection(records.values());
|
|
||||||
this.totalRecordsToUpdate = records.size();
|
this.totalRecordsToUpdate = records.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,7 +297,7 @@ public class HoodieCompactedLogRecordScanner implements
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
|
public Iterator<HoodieRecord<? extends HoodieRecordPayload>> iterator() {
|
||||||
return logRecords.iterator();
|
return records.values().iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getTotalLogFiles() {
|
public long getTotalLogFiles() {
|
||||||
@@ -308,6 +308,10 @@ public class HoodieCompactedLogRecordScanner implements
|
|||||||
return totalLogRecords.get();
|
return totalLogRecords.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<String, HoodieRecord<? extends HoodieRecordPayload>> getRecords() {
|
||||||
|
return records;
|
||||||
|
}
|
||||||
|
|
||||||
public long getTotalRecordsToUpdate() {
|
public long getTotalRecordsToUpdate() {
|
||||||
return totalRecordsToUpdate;
|
return totalRecordsToUpdate;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user