1
0

Ensure Cleaner and Archiver do not delete file-slices and workload marked for compaction

This commit is contained in:
Balaji Varadarajan
2018-05-31 14:16:19 -07:00
committed by vinoth chandar
parent 0a0451a765
commit 9b78523d62
10 changed files with 666 additions and 76 deletions

View File

@@ -106,7 +106,7 @@ public class CompactionOperation implements Serializable {
op.dataFilePath = Optional.ofNullable(operation.getDataFilePath());
op.deltaFilePaths = new ArrayList<>(operation.getDeltaFilePaths());
op.fileId = operation.getFileId();
op.metrics = new HashMap<>(operation.getMetrics());
op.metrics = operation.getMetrics() == null ? new HashMap<>() : new HashMap<>(operation.getMetrics());
op.partitionPath = operation.getPartitionPath();
return op;
}

View File

@@ -17,6 +17,7 @@
package com.uber.hoodie.common.table.view;
import com.google.common.collect.ImmutableMap;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
@@ -74,7 +75,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
/**
* File Id to pending compaction instant time
*/
private final Map<String, String> fileIdToPendingCompactionInstantTime;
private final Map<String, Pair<String, CompactionOperation>> fileIdToPendingCompaction;
/**
* Create a file system view, as of the given timeline
@@ -89,9 +90,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
// Build fileId to Pending Compaction Instants
List<HoodieInstant> pendingCompactionInstants =
metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList());
this.fileIdToPendingCompactionInstantTime = ImmutableMap.copyOf(
this.fileIdToPendingCompaction = ImmutableMap.copyOf(
CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream().map(entry -> {
return Pair.of(entry.getKey(), entry.getValue().getKey());
return Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(),
CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())));
}).collect(Collectors.toMap(Pair::getKey, Pair::getValue)));
}
@@ -155,10 +157,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
if (logFiles.containsKey(pair)) {
logFiles.get(pair).forEach(logFile -> group.addLogFile(logFile));
}
if (fileIdToPendingCompactionInstantTime.containsKey(fileId)) {
if (fileIdToPendingCompaction.containsKey(fileId)) {
// If there is no delta-commit after compaction request, this step would ensure a new file-slice appears
// so that any new ingestion uses the correct base-instant
group.addNewFileSliceAtInstant(fileIdToPendingCompactionInstantTime.get(fileId));
group.addNewFileSliceAtInstant(fileIdToPendingCompaction.get(fileId).getKey());
}
fileGroups.add(group);
});
@@ -196,8 +198,9 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
* @param dataFile Data File
*/
private boolean isDataFileDueToPendingCompaction(HoodieDataFile dataFile) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(dataFile.getFileId());
if ((null != compactionInstantTime) && dataFile.getCommitTime().equals(compactionInstantTime)) {
Pair<String, CompactionOperation> compactionWithInstantTime = fileIdToPendingCompaction.get(dataFile.getFileId());
if ((null != compactionWithInstantTime) && (null != compactionWithInstantTime.getLeft())
&& dataFile.getCommitTime().equals(compactionWithInstantTime.getKey())) {
return true;
}
return false;
@@ -277,7 +280,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
FileSlice fileSlice = fileGroup.getLatestFileSlice().get();
// if the file-group is under compaction, pick the latest before compaction instant time.
if (isFileSliceAfterPendingCompaction(fileSlice)) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
String compactionInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId()).getLeft();
return fileGroup.getLatestFileSliceBefore(compactionInstantTime);
}
return Optional.of(fileSlice);
@@ -292,8 +295,9 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
* @return
*/
private boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
if ((null != compactionInstantTime) && fileSlice.getBaseInstantTime().equals(compactionInstantTime)) {
Pair<String, CompactionOperation> compactionWithInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId());
if ((null != compactionWithInstantTime)
&& fileSlice.getBaseInstantTime().equals(compactionWithInstantTime.getKey())) {
return true;
}
return false;
@@ -352,8 +356,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
*/
private FileSlice getMergedFileSlice(HoodieFileGroup fileGroup, FileSlice fileSlice) {
// if the file-group is under construction, pick the latest before compaction instant time.
if (fileIdToPendingCompactionInstantTime.containsKey(fileSlice.getFileId())) {
String compactionInstantTime = fileIdToPendingCompactionInstantTime.get(fileSlice.getFileId());
if (fileIdToPendingCompaction.containsKey(fileSlice.getFileId())) {
String compactionInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId()).getKey();
if (fileSlice.getBaseInstantTime().equals(compactionInstantTime)) {
Optional<FileSlice> prevFileSlice = fileGroup.getLatestFileSliceBefore(compactionInstantTime);
if (prevFileSlice.isPresent()) {
@@ -416,4 +420,8 @@ public class HoodieTableFileSystemView implements TableFileSystemView,
"Failed to list data files in partition " + partitionPathStr, e);
}
}
public Map<String, Pair<String, CompactionOperation>> getFileIdToPendingCompaction() {
return fileIdToPendingCompaction;
}
}