Table rollback for inflight compactions MUST not delete instant files at any time to avoid race conditions
This commit is contained in:
committed by
vinoth chandar
parent
defcf6a0b9
commit
8adaca3454
@@ -839,7 +839,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
// Remove interleaving pending compactions before rolling back commits
|
||||
pendingCompactionToRollback.forEach(this::deletePendingCompaction);
|
||||
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, commitsToRollback);
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, commitsToRollback, true);
|
||||
|
||||
// cleanup index entries
|
||||
commitsToRollback.forEach(s -> {
|
||||
@@ -1206,8 +1206,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> implements Seriali
|
||||
* @param inflightInstant Inflight Compaction Instant
|
||||
* @param table Hoodie Table
|
||||
*/
|
||||
private void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) throws IOException {
|
||||
table.rollback(jsc, ImmutableList.copyOf(new String[] { inflightInstant.getTimestamp() }));
|
||||
@VisibleForTesting
|
||||
void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) throws IOException {
|
||||
table.rollback(jsc, ImmutableList.copyOf(new String[] { inflightInstant.getTimestamp() }), false);
|
||||
// Revert instant state file
|
||||
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||
}
|
||||
|
||||
@@ -347,7 +347,7 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits, boolean deleteInstants)
|
||||
throws IOException {
|
||||
String actionType = metaClient.getCommitActionType();
|
||||
HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
|
||||
@@ -375,13 +375,29 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
// clean temporary data files
|
||||
cleanTemporaryDataFiles(jsc);
|
||||
|
||||
// Remove the rolled back inflight commits
|
||||
commits.stream().map(s -> new HoodieInstant(true, actionType, s))
|
||||
.forEach(activeTimeline::deleteInflight);
|
||||
logger.info("Deleted inflight commits " + commits);
|
||||
// Delete Inflight instants if enabled
|
||||
deleteInflightInstants(deleteInstants, activeTimeline,
|
||||
commits.stream().map(s -> new HoodieInstant(true, actionType, s)).collect(Collectors.toList()));
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete Inflight instants if enabled
|
||||
* @param deleteInstants Enable Deletion of Inflight instants
|
||||
* @param activeTimeline Hoodie active timeline
|
||||
* @param instantsToBeDeleted Instants to be deleted
|
||||
*/
|
||||
protected static void deleteInflightInstants(boolean deleteInstants, HoodieActiveTimeline activeTimeline,
|
||||
List<HoodieInstant> instantsToBeDeleted) {
|
||||
// Remove the rolled back inflight commits
|
||||
if (deleteInstants) {
|
||||
instantsToBeDeleted.forEach(activeTimeline::deleteInflight);
|
||||
logger.info("Deleted inflight commits " + instantsToBeDeleted);
|
||||
} else {
|
||||
logger.warn("Rollback finished without deleting inflight instant files. Instants=" + instantsToBeDeleted);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize the written data files
|
||||
*
|
||||
|
||||
@@ -165,7 +165,7 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits, boolean deleteInstants)
|
||||
throws IOException {
|
||||
|
||||
//At the moment, MOR table type does not support nested rollbacks
|
||||
@@ -274,11 +274,13 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends
|
||||
return hoodieRollbackStats;
|
||||
}).collect(Collectors.toList())).flatMap(List::iterator).filter(Objects::nonNull).collect();
|
||||
|
||||
commitsAndCompactions.entrySet().stream().map(
|
||||
entry -> new HoodieInstant(true, entry.getValue().getAction(),
|
||||
entry.getValue().getTimestamp())).forEach(this.getActiveTimeline()::deleteInflight);
|
||||
logger
|
||||
.debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||
// Delete Inflight instants if enabled
|
||||
deleteInflightInstants(deleteInstants, this.getActiveTimeline(),
|
||||
commitsAndCompactions.entrySet().stream().map(
|
||||
entry -> new HoodieInstant(true, entry.getValue().getAction(), entry.getValue().getTimestamp()))
|
||||
.collect(Collectors.toList()));
|
||||
|
||||
logger.debug("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||
|
||||
return allRollbackStats;
|
||||
}
|
||||
|
||||
@@ -251,9 +251,9 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
/**
|
||||
* Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1)
|
||||
* Atomically unpublish this commit (2) clean indexing data (3) clean new generated parquet files
|
||||
* / log blocks (4) Finally, delete .<action>.commit or .<action>.inflight file
|
||||
* / log blocks (4) Finally, delete .<action>.commit or .<action>.inflight file if deleteInstants = true
|
||||
*/
|
||||
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits)
|
||||
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, List<String> commits, boolean deleteInstants)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user