Ensure Cleaner and Archiver do not delete file-slices and workload marked for compaction
This commit is contained in:
committed by
vinoth chandar
parent
0a0451a765
commit
9b78523d62
@@ -36,6 +36,7 @@ import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant.State;
|
||||
import com.uber.hoodie.common.util.AvroUtils;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.config.HoodieCompactionConfig;
|
||||
@@ -65,6 +66,7 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.log4j.LogManager;
|
||||
@@ -91,6 +93,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
private final transient FileSystem fs;
|
||||
private final transient JavaSparkContext jsc;
|
||||
private final HoodieWriteConfig config;
|
||||
private final boolean rollbackInFlight;
|
||||
private final transient HoodieMetrics metrics;
|
||||
private final transient HoodieIndex<T> index;
|
||||
private transient Timer.Context writeContext = null;
|
||||
@@ -122,10 +125,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
this.config = clientConfig;
|
||||
this.index = index;
|
||||
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||
|
||||
if (rollbackInFlight) {
|
||||
rollbackInflightCommits();
|
||||
}
|
||||
this.rollbackInFlight = rollbackInFlight;
|
||||
}
|
||||
|
||||
public static SparkConf registerClasses(SparkConf conf) {
|
||||
@@ -681,6 +681,42 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
logger.info("Savepoint " + savepointTime + " deleted");
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a compaction request that is pending.
|
||||
*
|
||||
* NOTE - This is an Admin operation.
|
||||
* With async compaction, this is expected to be called with async compaction and ingestion shutdown.
|
||||
* Otherwise, async compactor could fail with errors
|
||||
*
|
||||
* @param compactionTime - delete the compaction time
|
||||
* @return
|
||||
*/
|
||||
private void deletePendingCompaction(String compactionTime) {
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
|
||||
HoodieInstant compactionRequestedInstant =
|
||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, compactionTime);
|
||||
HoodieInstant compactionInflightInstant =
|
||||
new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionTime);
|
||||
boolean isCompactionInstantInRequestedState = table.getActiveTimeline().filterPendingCompactionTimeline()
|
||||
.containsInstant(compactionRequestedInstant);
|
||||
boolean isCompactionInstantInInflightState = table.getActiveTimeline().filterPendingCompactionTimeline()
|
||||
.containsInstant(compactionInflightInstant);
|
||||
|
||||
if (isCompactionInstantInRequestedState) {
|
||||
activeTimeline.deleteCompactionRequested(compactionRequestedInstant);
|
||||
} else if (isCompactionInstantInInflightState) {
|
||||
activeTimeline.revertCompactionInflightToRequested(compactionInflightInstant);
|
||||
activeTimeline.deleteCompactionRequested(compactionRequestedInstant);
|
||||
} else {
|
||||
logger.error("No compaction present " + compactionTime);
|
||||
throw new IllegalArgumentException("No compaction present " + compactionTime);
|
||||
}
|
||||
logger.info("Compaction " + compactionTime + " deleted");
|
||||
}
|
||||
|
||||
/**
|
||||
* Rollback the state to the savepoint. WARNING: This rollsback recent commits and deleted data
|
||||
* files. Queries accessing the files will mostly fail. This should be done during a downtime.
|
||||
@@ -692,6 +728,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
|
||||
// Rollback to savepoint is expected to be a manual operation and no concurrent ingestion or compaction is expected
|
||||
// to be running. Rollback to savepoint also removes any pending compaction actions that are generated after
|
||||
// savepoint time. Allowing pending compaction to be retained is not safe as those workload could be referencing
|
||||
// file-slices that will be rolled-back as part of this operation
|
||||
HoodieTimeline commitTimeline = table.getMetaClient().getCommitsAndCompactionTimeline();
|
||||
|
||||
HoodieInstant savePoint = new HoodieInstant(false, HoodieTimeline.SAVEPOINT_ACTION,
|
||||
@@ -740,8 +781,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
HoodieTimeline inflightTimeline = table.getInflightCommitTimeline();
|
||||
Set<String> pendingCompactions =
|
||||
table.getActiveTimeline().filterPendingCompactionTimeline().getInstants()
|
||||
.map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
|
||||
HoodieTimeline inflightCommitTimeline = table.getInflightCommitTimeline();
|
||||
HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
|
||||
|
||||
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
||||
@@ -755,37 +798,45 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
}
|
||||
});
|
||||
|
||||
List<String> pendingCompactionToRollback =
|
||||
commits.stream().filter(c -> pendingCompactions.contains(c)).collect(Collectors.toList());
|
||||
List<String> commitsToRollback =
|
||||
commits.stream().filter(c -> !pendingCompactions.contains(c)).collect(Collectors.toList());
|
||||
|
||||
try {
|
||||
if (commitTimeline.empty() && inflightTimeline.empty()) {
|
||||
if (commitTimeline.empty() && inflightCommitTimeline.empty()) {
|
||||
// nothing to rollback
|
||||
logger.info("No commits to rollback " + commits);
|
||||
logger.info("No commits to rollback " + commitsToRollback);
|
||||
}
|
||||
|
||||
// Make sure only the last n commits are being rolled back
|
||||
// If there is a commit in-between or after that is not rolled back, then abort
|
||||
String lastCommit = commits.get(commits.size() - 1);
|
||||
String lastCommit = commitsToRollback.get(commitsToRollback.size() - 1);
|
||||
if (!commitTimeline.empty() && !commitTimeline
|
||||
.findInstantsAfter(lastCommit, Integer.MAX_VALUE).empty()) {
|
||||
throw new HoodieRollbackException(
|
||||
"Found commits after time :" + lastCommit + ", please rollback greater commits first");
|
||||
}
|
||||
|
||||
List<String> inflights = inflightTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||
List<String> inflights = inflightCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if (!inflights.isEmpty() && inflights.indexOf(lastCommit) != inflights.size() - 1) {
|
||||
throw new HoodieRollbackException("Found in-flight commits after time :" + lastCommit
|
||||
+ ", please rollback greater commits first");
|
||||
}
|
||||
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, commits);
|
||||
// Remove interleaving pending compactions before rolling back commits
|
||||
pendingCompactionToRollback.stream().forEach(this::deletePendingCompaction);
|
||||
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, commitsToRollback);
|
||||
|
||||
// cleanup index entries
|
||||
commits.stream().forEach(s -> {
|
||||
commitsToRollback.stream().forEach(s -> {
|
||||
if (!index.rollbackCommit(s)) {
|
||||
throw new HoodieRollbackException("Rollback index changes failed, for time :" + s);
|
||||
}
|
||||
});
|
||||
logger.info("Index rolled back for commits " + commits);
|
||||
logger.info("Index rolled back for commits " + commitsToRollback);
|
||||
|
||||
Optional<Long> durationInMs = Optional.empty();
|
||||
if (context != null) {
|
||||
@@ -795,11 +846,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
||||
}
|
||||
HoodieRollbackMetadata rollbackMetadata = AvroUtils
|
||||
.convertRollbackMetadata(startRollbackTime, durationInMs, commits, stats);
|
||||
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, stats);
|
||||
table.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
||||
AvroUtils.serializeRollbackMetadata(rollbackMetadata));
|
||||
logger.info("Commits " + commits + " rollback is complete");
|
||||
logger.info("Commits " + commitsToRollback + " rollback is complete");
|
||||
|
||||
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
||||
logger.info("Cleaning up older rollback meta files");
|
||||
@@ -810,7 +861,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRollbackException(
|
||||
"Failed to rollback " + config.getBasePath() + " commits " + commits, e);
|
||||
"Failed to rollback " + config.getBasePath() + " commits " + commitsToRollback, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -890,6 +941,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
}
|
||||
|
||||
public void startCommitWithTime(String commitTime) {
|
||||
if (rollbackInFlight) {
|
||||
// Only rollback inflight commit/delta-commits. Do not touch compaction commits
|
||||
rollbackInflightCommits();
|
||||
}
|
||||
logger.info("Generate a new commit time " + commitTime);
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
|
||||
@@ -1061,6 +1116,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
}
|
||||
|
||||
private HoodieTable getTableAndInitCtx() {
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieTable table = HoodieTable.getHoodieTable(
|
||||
new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
package com.uber.hoodie.io;
|
||||
|
||||
import com.uber.hoodie.common.model.CompactionOperation;
|
||||
import com.uber.hoodie.common.model.FileSlice;
|
||||
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
@@ -25,14 +26,17 @@ import com.uber.hoodie.common.model.HoodieTableType;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.table.view.HoodieTableFileSystemView;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.table.HoodieTable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -48,6 +52,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
|
||||
private final TableFileSystemView fileSystemView;
|
||||
private final HoodieTimeline commitTimeline;
|
||||
private final Map<String, CompactionOperation> fileIdToPendingCompactionOperations;
|
||||
private HoodieTable<T> hoodieTable;
|
||||
private HoodieWriteConfig config;
|
||||
|
||||
@@ -56,9 +61,12 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
this.fileSystemView = hoodieTable.getCompletedFileSystemView();
|
||||
this.commitTimeline = hoodieTable.getCompletedCommitTimeline();
|
||||
this.config = config;
|
||||
this.fileIdToPendingCompactionOperations =
|
||||
((HoodieTableFileSystemView)hoodieTable.getRTFileSystemView()).getFileIdToPendingCompaction().entrySet()
|
||||
.stream().map(entry -> Pair.of(entry.getKey(), entry.getValue().getValue()))
|
||||
.collect(Collectors.toMap(Pair::getKey, Pair::getValue));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Selects the older versions of files for cleaning, such that it bounds the number of versions of
|
||||
* each file. This policy is useful, if you are simply interested in querying the table, and you
|
||||
@@ -81,8 +89,8 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
while (fileSliceIterator.hasNext() && keepVersions > 0) {
|
||||
// Skip this most recent version
|
||||
FileSlice nextSlice = fileSliceIterator.next();
|
||||
HoodieDataFile dataFile = nextSlice.getDataFile().get();
|
||||
if (savepointedFiles.contains(dataFile.getFileName())) {
|
||||
Optional<HoodieDataFile> dataFile = nextSlice.getDataFile();
|
||||
if (dataFile.isPresent() && savepointedFiles.contains(dataFile.get().getFileName())) {
|
||||
// do not clean up a savepoint data file
|
||||
continue;
|
||||
}
|
||||
@@ -91,12 +99,16 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
// Delete the remaining files
|
||||
while (fileSliceIterator.hasNext()) {
|
||||
FileSlice nextSlice = fileSliceIterator.next();
|
||||
HoodieDataFile dataFile = nextSlice.getDataFile().get();
|
||||
deletePaths.add(dataFile.getFileStatus().getPath().toString());
|
||||
if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||
// If merge on read, then clean the log files for the commits as well
|
||||
deletePaths.addAll(nextSlice.getLogFiles().map(file -> file.getPath().toString())
|
||||
.collect(Collectors.toList()));
|
||||
if (!isFileSliceNeededForPendingCompaction(nextSlice)) {
|
||||
if (nextSlice.getDataFile().isPresent()) {
|
||||
HoodieDataFile dataFile = nextSlice.getDataFile().get();
|
||||
deletePaths.add(dataFile.getFileStatus().getPath().toString());
|
||||
}
|
||||
if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||
// If merge on read, then clean the log files for the commits as well
|
||||
deletePaths.addAll(nextSlice.getLogFiles().map(file -> file.getPath().toString())
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,17 +145,21 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
.collect(Collectors.toList());
|
||||
for (HoodieFileGroup fileGroup : fileGroups) {
|
||||
List<FileSlice> fileSliceList = fileGroup.getAllFileSlices().collect(Collectors.toList());
|
||||
HoodieDataFile dataFile = fileSliceList.get(0).getDataFile().get();
|
||||
String lastVersion = dataFile.getCommitTime();
|
||||
|
||||
if (fileSliceList.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String lastVersion = fileSliceList.get(0).getBaseInstantTime();
|
||||
String lastVersionBeforeEarliestCommitToRetain = getLatestVersionBeforeCommit(fileSliceList,
|
||||
earliestCommitToRetain);
|
||||
|
||||
// Ensure there are more than 1 version of the file (we only clean old files from updates)
|
||||
// i.e always spare the last commit.
|
||||
for (FileSlice aSlice : fileSliceList) {
|
||||
HoodieDataFile aFile = aSlice.getDataFile().get();
|
||||
String fileCommitTime = aFile.getCommitTime();
|
||||
if (savepointedFiles.contains(aFile.getFileName())) {
|
||||
Optional<HoodieDataFile> aFile = aSlice.getDataFile();
|
||||
String fileCommitTime = aSlice.getBaseInstantTime();
|
||||
if (aFile.isPresent() && savepointedFiles.contains(aFile.get().getFileName())) {
|
||||
// do not clean up a savepoint data file
|
||||
continue;
|
||||
}
|
||||
@@ -159,11 +175,14 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
}
|
||||
|
||||
// Always keep the last commit
|
||||
if (HoodieTimeline
|
||||
if (!isFileSliceNeededForPendingCompaction(aSlice)
|
||||
&& HoodieTimeline
|
||||
.compareTimestamps(earliestCommitToRetain.getTimestamp(), fileCommitTime,
|
||||
HoodieTimeline.GREATER)) {
|
||||
// this is a commit, that should be cleaned.
|
||||
deletePaths.add(aFile.getFileStatus().getPath().toString());
|
||||
if (aFile.isPresent()) {
|
||||
deletePaths.add(aFile.get().getFileStatus().getPath().toString());
|
||||
}
|
||||
if (hoodieTable.getMetaClient().getTableType() == HoodieTableType.MERGE_ON_READ) {
|
||||
// If merge on read, then clean the log files for the commits as well
|
||||
deletePaths.addAll(aSlice.getLogFiles().map(file -> file.getPath().toString())
|
||||
@@ -183,7 +202,7 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
private String getLatestVersionBeforeCommit(List<FileSlice> fileSliceList,
|
||||
HoodieInstant commitTime) {
|
||||
for (FileSlice file : fileSliceList) {
|
||||
String fileCommitTime = file.getDataFile().get().getCommitTime();
|
||||
String fileCommitTime = file.getBaseInstantTime();
|
||||
if (HoodieTimeline
|
||||
.compareTimestamps(commitTime.getTimestamp(), fileCommitTime, HoodieTimeline.GREATER)) {
|
||||
// fileList is sorted on the reverse, so the first commit we find <= commitTime is the
|
||||
@@ -226,4 +245,19 @@ public class HoodieCleanHelper<T extends HoodieRecordPayload<T>> {
|
||||
}
|
||||
return earliestCommitToRetain;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if file slice needed to be preserved for pending compaction
|
||||
* @param fileSlice File Slice
|
||||
* @return true if file slice needs to be preserved, false otherwise.
|
||||
*/
|
||||
private boolean isFileSliceNeededForPendingCompaction(FileSlice fileSlice) {
|
||||
CompactionOperation op = fileIdToPendingCompactionOperations.get(fileSlice.getFileId());
|
||||
if (null != op) {
|
||||
// If file slice's instant time is newer or same as that of operation, do not clean
|
||||
return HoodieTimeline.compareTimestamps(fileSlice.getBaseInstantTime(), op.getBaseInstantTime(),
|
||||
HoodieTimeline.GREATER_OR_EQUAL);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -16,6 +16,10 @@
|
||||
|
||||
package com.uber.hoodie.io;
|
||||
|
||||
import static com.uber.hoodie.common.table.HoodieTimeline.COMMIT_ACTION;
|
||||
import static com.uber.hoodie.common.table.HoodieTimeline.DELTA_COMMIT_ACTION;
|
||||
import static com.uber.hoodie.common.table.HoodieTimeline.LESSER_OR_EQUAL;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
@@ -32,6 +36,7 @@ import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.util.AvroUtils;
|
||||
@@ -42,6 +47,7 @@ import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.table.HoodieTable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
@@ -100,7 +106,7 @@ public class HoodieCommitArchiveLog {
|
||||
/**
|
||||
* Check if commits need to be archived. If yes, archive commits.
|
||||
*/
|
||||
public boolean archiveIfRequired(final JavaSparkContext jsc) {
|
||||
public boolean archiveIfRequired(final JavaSparkContext jsc) throws IOException {
|
||||
try {
|
||||
List<HoodieInstant> instantsToArchive = getInstantsToArchive(jsc).collect(Collectors.toList());
|
||||
boolean success = true;
|
||||
@@ -144,23 +150,34 @@ public class HoodieCommitArchiveLog {
|
||||
//TODO (na) : Add a way to return actions associated with a timeline and then merge/unify
|
||||
// with logic above to avoid Stream.concats
|
||||
HoodieTimeline commitTimeline = table.getCompletedCommitTimeline();
|
||||
Optional<HoodieInstant> oldestPendingCompactionInstant =
|
||||
table.getActiveTimeline().filterPendingCompactionTimeline().firstInstant();
|
||||
|
||||
// We cannot have any holes in the commit timeline. We cannot archive any commits which are
|
||||
// made after the first savepoint present.
|
||||
Optional<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
|
||||
if (!commitTimeline.empty() && commitTimeline.countInstants() > maxCommitsToKeep) {
|
||||
// Actually do the commits
|
||||
instants = Stream.concat(instants, commitTimeline.getInstants().filter(s -> {
|
||||
// if no savepoint present, then dont filter
|
||||
return !(firstSavepoint.isPresent() && HoodieTimeline
|
||||
.compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
|
||||
HoodieTimeline.LESSER_OR_EQUAL));
|
||||
}).limit(commitTimeline.countInstants() - minCommitsToKeep));
|
||||
instants = Stream.concat(instants, commitTimeline.getInstants()
|
||||
.filter(s -> {
|
||||
// if no savepoint present, then dont filter
|
||||
return !(firstSavepoint.isPresent() && HoodieTimeline
|
||||
.compareTimestamps(firstSavepoint.get().getTimestamp(), s.getTimestamp(),
|
||||
HoodieTimeline.LESSER_OR_EQUAL));
|
||||
})
|
||||
.filter(s -> {
|
||||
// Ensure commits >= oldest pending compaction commit is retained
|
||||
return oldestPendingCompactionInstant.map(instant -> {
|
||||
return HoodieTimeline.compareTimestamps(instant.getTimestamp(), s.getTimestamp(), HoodieTimeline.GREATER);
|
||||
}).orElse(true);
|
||||
})
|
||||
.limit(commitTimeline.countInstants() - minCommitsToKeep));
|
||||
}
|
||||
|
||||
return instants;
|
||||
}
|
||||
|
||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) {
|
||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException {
|
||||
log.info("Deleting instants " + archivedInstants);
|
||||
boolean success = true;
|
||||
for (HoodieInstant archivedInstant : archivedInstants) {
|
||||
@@ -174,6 +191,48 @@ public class HoodieCommitArchiveLog {
|
||||
throw new HoodieIOException("Failed to delete archived instant " + archivedInstant, e);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove older meta-data from auxiliary path too
|
||||
Optional<HoodieInstant> latestCommitted =
|
||||
archivedInstants.stream()
|
||||
.filter(i -> {
|
||||
return i.isCompleted()
|
||||
&& (i.getAction().equals(COMMIT_ACTION) || (i.getAction().equals(DELTA_COMMIT_ACTION)));
|
||||
})
|
||||
.sorted(Comparator.comparing(HoodieInstant::getTimestamp).reversed()).findFirst();
|
||||
if (latestCommitted.isPresent()) {
|
||||
success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get());
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove older instants from auxiliary meta folder
|
||||
*
|
||||
* @param thresholdInstant Hoodie Instant
|
||||
* @return success if all eligible file deleted successfully
|
||||
* @throws IOException in case of error
|
||||
*/
|
||||
private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant)
|
||||
throws IOException {
|
||||
List<HoodieInstant> instants =
|
||||
HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
|
||||
new Path(metaClient.getMetaAuxiliaryPath()),
|
||||
HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
|
||||
|
||||
List<HoodieInstant> instantsToBeDeleted =
|
||||
instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
|
||||
thresholdInstant.getTimestamp(), LESSER_OR_EQUAL)).collect(Collectors.toList());
|
||||
|
||||
boolean success = true;
|
||||
for (HoodieInstant deleteInstant : instantsToBeDeleted) {
|
||||
log.info("Deleting instant " + deleteInstant + " in auxiliary meta path " + metaClient.getMetaAuxiliaryPath());
|
||||
Path metaFile = new Path(metaClient.getMetaAuxiliaryPath(), deleteInstant.getFileName());
|
||||
if (metaClient.getFs().exists(metaFile)) {
|
||||
success &= metaClient.getFs().delete(metaFile, false);
|
||||
log.info("Deleted instant file in auxiliary metapath : " + metaFile);
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
@@ -212,7 +271,7 @@ public class HoodieCommitArchiveLog {
|
||||
archivedMetaWrapper.setActionType(ActionType.clean.name());
|
||||
break;
|
||||
}
|
||||
case HoodieTimeline.COMMIT_ACTION: {
|
||||
case COMMIT_ACTION: {
|
||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
|
||||
.fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get());
|
||||
archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata));
|
||||
|
||||
Reference in New Issue
Block a user