[HUDI-308] Avoid Renames for tracking state transitions of all actions on dataset
This commit is contained in:
committed by
Balaji Varadarajan
parent
8963a68e6a
commit
9a1f698eef
@@ -179,9 +179,10 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
// revert if in inflight state
|
||||
metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
|
||||
}
|
||||
// Overwrite compaction plan with updated info
|
||||
metaClient.getActiveTimeline().saveToCompactionRequested(
|
||||
new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()),
|
||||
AvroUtils.serializeCompactionPlan(newPlan));
|
||||
AvroUtils.serializeCompactionPlan(newPlan), true);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@@ -218,8 +219,9 @@ public class CompactionAdminClient extends AbstractHoodieClient {
|
||||
*/
|
||||
private static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
|
||||
throws IOException {
|
||||
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(metaClient.getActiveTimeline()
|
||||
.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
|
||||
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
|
||||
metaClient.getActiveTimeline().readPlanAsBytes(
|
||||
HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
|
||||
return compactionPlan;
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* cleaned)
|
||||
*/
|
||||
public void clean() throws HoodieIOException {
|
||||
String startCleanTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
String startCleanTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
clean(startCleanTime);
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
// If there are inflight(failed) or previously requested clean operation, first perform them
|
||||
table.getCleanTimeline().filterInflightsAndRequested().getInstants().forEach(hoodieInstant -> {
|
||||
LOG.info("There were previously unfinished cleaner operations. Finishing Instant=" + hoodieInstant);
|
||||
runClean(table, hoodieInstant.getTimestamp());
|
||||
runClean(table, hoodieInstant);
|
||||
});
|
||||
|
||||
Option<HoodieCleanerPlan> cleanerPlanOpt = scheduleClean(startCleanTime);
|
||||
@@ -96,7 +96,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
if ((cleanerPlan.getFilesToBeDeletedPerPartition() != null)
|
||||
&& !cleanerPlan.getFilesToBeDeletedPerPartition().isEmpty()) {
|
||||
final HoodieTable<T> hoodieTable = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||
return runClean(hoodieTable, startCleanTime);
|
||||
return runClean(hoodieTable, HoodieTimeline.getCleanRequestedInstant(startCleanTime), cleanerPlan);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
@@ -136,13 +136,20 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
* Executes the Cleaner plan stored in the instant metadata.
|
||||
*
|
||||
* @param table Hoodie Table
|
||||
* @param cleanInstantTs Cleaner Instant Timestamp
|
||||
* @param cleanInstant Cleaner Instant
|
||||
*/
|
||||
@VisibleForTesting
|
||||
protected HoodieCleanMetadata runClean(HoodieTable<T> table, String cleanInstantTs) {
|
||||
HoodieInstant cleanInstant =
|
||||
table.getCleanTimeline().getInstants().filter(x -> x.getTimestamp().equals(cleanInstantTs)).findFirst().get();
|
||||
protected HoodieCleanMetadata runClean(HoodieTable<T> table, HoodieInstant cleanInstant) {
|
||||
try {
|
||||
HoodieCleanerPlan cleanerPlan = CleanerUtils.getCleanerPlan(table.getMetaClient(), cleanInstant);
|
||||
return runClean(table, cleanInstant, cleanerPlan);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private HoodieCleanMetadata runClean(HoodieTable<T> table, HoodieInstant cleanInstant,
|
||||
HoodieCleanerPlan cleanerPlan) {
|
||||
Preconditions.checkArgument(
|
||||
cleanInstant.getState().equals(State.REQUESTED) || cleanInstant.getState().equals(State.INFLIGHT));
|
||||
|
||||
@@ -152,10 +159,11 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
|
||||
if (!cleanInstant.isInflight()) {
|
||||
// Mark as inflight first
|
||||
cleanInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant);
|
||||
cleanInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant,
|
||||
AvroUtils.serializeCleanerPlan(cleanerPlan));
|
||||
}
|
||||
|
||||
List<HoodieCleanStat> cleanStats = table.clean(jsc, cleanInstant);
|
||||
List<HoodieCleanStat> cleanStats = table.clean(jsc, cleanInstant, cleanerPlan);
|
||||
|
||||
if (cleanStats.isEmpty()) {
|
||||
return HoodieCleanMetadata.newBuilder().build();
|
||||
|
||||
@@ -80,8 +80,6 @@ import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.ParseException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -101,7 +99,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
private static final Logger LOG = LogManager.getLogger(HoodieWriteClient.class);
|
||||
private static final String UPDATE_STR = "update";
|
||||
private static final String LOOKUP_STR = "lookup";
|
||||
private final boolean rollbackInFlight;
|
||||
private final boolean rollbackPending;
|
||||
private final transient HoodieMetrics metrics;
|
||||
private final transient HoodieIndex<T> index;
|
||||
private final transient HoodieCleanClient<T> cleanClient;
|
||||
@@ -119,21 +117,21 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight) {
|
||||
this(jsc, clientConfig, rollbackInFlight, HoodieIndex.createIndex(clientConfig, jsc));
|
||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending) {
|
||||
this(jsc, clientConfig, rollbackPending, HoodieIndex.createIndex(clientConfig, jsc));
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) {
|
||||
this(jsc, clientConfig, rollbackInFlight, index, Option.empty());
|
||||
HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending, HoodieIndex index) {
|
||||
this(jsc, clientConfig, rollbackPending, index, Option.empty());
|
||||
}
|
||||
|
||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight,
|
||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending,
|
||||
HoodieIndex index, Option<EmbeddedTimelineService> timelineService) {
|
||||
super(jsc, clientConfig, timelineService);
|
||||
this.index = index;
|
||||
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||
this.rollbackInFlight = rollbackInFlight;
|
||||
this.rollbackPending = rollbackPending;
|
||||
this.cleanClient = new HoodieCleanClient<>(jsc, config, metrics, timelineService);
|
||||
}
|
||||
|
||||
@@ -356,7 +354,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
return upsertRecordsInternal(taggedValidRecords, commitTime, table, true);
|
||||
} else {
|
||||
// if entire set of keys are non existent
|
||||
JavaRDD<WriteStatus> writeStatusRDD = jsc.parallelize(Collections.EMPTY_LIST, 1);
|
||||
saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, commitTime);
|
||||
JavaRDD<WriteStatus> writeStatusRDD = jsc.emptyRDD();
|
||||
commitOnAutoCommit(commitTime, writeStatusRDD, table.getMetaClient().getCommitActionType());
|
||||
return writeStatusRDD;
|
||||
}
|
||||
@@ -388,6 +387,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
final List<String> fileIDPrefixes =
|
||||
IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList());
|
||||
|
||||
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
|
||||
table.getMetaClient().getCommitActionType(), commitTime), Option.empty());
|
||||
|
||||
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
|
||||
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table, fileIDPrefixes), true)
|
||||
.flatMap(writeStatuses -> writeStatuses.iterator());
|
||||
@@ -435,9 +437,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
});
|
||||
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
Option<HoodieInstant> instant =
|
||||
activeTimeline.getCommitsTimeline().filterInflightsExcludingCompaction().lastInstant();
|
||||
activeTimeline.saveToInflight(instant.get(), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
String commitActionType = table.getMetaClient().getCommitActionType();
|
||||
HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, commitTime);
|
||||
activeTimeline.transitionRequestedToInflight(requested,
|
||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||
} catch (IOException io) {
|
||||
throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io);
|
||||
}
|
||||
@@ -664,8 +667,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
|
||||
HoodieSavepointMetadata metadata = AvroUtils.convertSavepointMetadata(user, comment, latestFilesMap);
|
||||
// Nothing to save in the savepoint
|
||||
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
|
||||
AvroUtils.serializeSavepointMetadata(metadata));
|
||||
table.getActiveTimeline().createNewInstant(
|
||||
new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime));
|
||||
table.getActiveTimeline()
|
||||
.saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
|
||||
AvroUtils.serializeSavepointMetadata(metadata));
|
||||
LOG.info("Savepoint " + commitTime + " created");
|
||||
return true;
|
||||
} catch (IOException e) {
|
||||
@@ -792,23 +798,25 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
.filter(instant -> HoodieActiveTimeline.GREATER.test(instant.getTimestamp(), instantTime))
|
||||
.collect(Collectors.toList());
|
||||
// Start a rollback instant for all commits to be rolled back
|
||||
String startRollbackInstant = startInstant();
|
||||
String startRollbackInstant = HoodieActiveTimeline.createNewInstantTime();
|
||||
// Start the timer
|
||||
final Timer.Context context = startContext();
|
||||
ImmutableMap.Builder<String, List<HoodieRollbackStat>> instantsToStats = ImmutableMap.builder();
|
||||
table.getActiveTimeline().createNewInstant(
|
||||
new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRollbackInstant));
|
||||
instantsToRollback.stream().forEach(instant -> {
|
||||
try {
|
||||
switch (instant.getAction()) {
|
||||
case HoodieTimeline.COMMIT_ACTION:
|
||||
case HoodieTimeline.DELTA_COMMIT_ACTION:
|
||||
List<HoodieRollbackStat> statsForInstant = doRollbackAndGetStats(instant.getTimestamp());
|
||||
List<HoodieRollbackStat> statsForInstant = doRollbackAndGetStats(instant);
|
||||
instantsToStats.put(instant.getTimestamp(), statsForInstant);
|
||||
break;
|
||||
case HoodieTimeline.COMPACTION_ACTION:
|
||||
// TODO : Get file status and create a rollback stat and file
|
||||
// TODO : Delete the .aux files along with the instant file, okay for now since the archival process will
|
||||
// delete these files when it does not see a corresponding instant file under .hoodie
|
||||
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant.getTimestamp());
|
||||
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant);
|
||||
instantsToStats.put(instant.getTimestamp(), statsForCompaction);
|
||||
LOG.info("Deleted compaction instant " + instant);
|
||||
break;
|
||||
@@ -828,17 +836,16 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
}
|
||||
|
||||
private String startInstant() {
|
||||
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
|
||||
}
|
||||
|
||||
private Timer.Context startContext() {
|
||||
return metrics.getRollbackCtx();
|
||||
}
|
||||
|
||||
private List<HoodieRollbackStat> doRollbackAndGetStats(final String commitToRollback) throws IOException {
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||
HoodieTimeline inflightCommitTimeline = table.getInflightCommitTimeline();
|
||||
private List<HoodieRollbackStat> doRollbackAndGetStats(final HoodieInstant instantToRollback) throws
|
||||
IOException {
|
||||
final String commitToRollback = instantToRollback.getTimestamp();
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
createMetaClient(true), config, jsc);
|
||||
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
|
||||
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
|
||||
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
||||
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
|
||||
@@ -850,7 +857,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
});
|
||||
|
||||
if (commitTimeline.empty() && inflightCommitTimeline.empty()) {
|
||||
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
|
||||
// nothing to rollback
|
||||
LOG.info("No commits to rollback " + commitToRollback);
|
||||
}
|
||||
@@ -865,14 +872,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
"Found commits after time :" + lastCommit + ", please rollback greater commits first");
|
||||
}
|
||||
|
||||
List<String> inflights =
|
||||
inflightCommitTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
if ((lastCommit != null) && !inflights.isEmpty() && (inflights.indexOf(lastCommit) != inflights.size() - 1)) {
|
||||
throw new HoodieRollbackException(
|
||||
"Found in-flight commits after time :" + lastCommit + ", please rollback greater commits first");
|
||||
}
|
||||
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, commitToRollback, true);
|
||||
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
|
||||
|
||||
LOG.info("Deleted inflight commits " + commitToRollback);
|
||||
|
||||
@@ -893,9 +900,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
|
||||
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
||||
}
|
||||
HoodieRollbackMetadata rollbackMetadata =
|
||||
AvroUtils.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
|
||||
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
||||
HoodieRollbackMetadata rollbackMetadata = AvroUtils
|
||||
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
|
||||
//TODO: varadarb - This will be fixed in subsequent PR when Rollback and Clean transition mimics that of commit
|
||||
table.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION,
|
||||
startRollbackTime));
|
||||
table.getActiveTimeline().saveAsComplete(
|
||||
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
||||
AvroUtils.serializeRollbackMetadata(rollbackMetadata));
|
||||
LOG.info("Commits " + commitsToRollback + " rollback is complete");
|
||||
|
||||
@@ -937,17 +948,22 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
private void rollbackInternal(String commitToRollback) {
|
||||
if (commitToRollback.isEmpty()) {
|
||||
LOG.info("List of commits to rollback is empty");
|
||||
return;
|
||||
}
|
||||
final String startRollbackTime = startInstant();
|
||||
final String startRollbackTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
final Timer.Context context = startContext();
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
try {
|
||||
List<HoodieRollbackStat> stats = doRollbackAndGetStats(commitToRollback);
|
||||
Map<String, List<HoodieRollbackStat>> statToCommit = new HashMap<>();
|
||||
finishRollback(context, stats, Arrays.asList(commitToRollback), startRollbackTime);
|
||||
// Create a Hoodie table which encapsulated the commits and files visible
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||
createMetaClient(true), config, jsc);
|
||||
Option<HoodieInstant> rollbackInstantOpt =
|
||||
Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitToRollback))
|
||||
.findFirst());
|
||||
|
||||
if (rollbackInstantOpt.isPresent()) {
|
||||
List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackInstantOpt.get());
|
||||
finishRollback(context, stats, Arrays.asList(commitToRollback), startRollbackTime);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitToRollback,
|
||||
e);
|
||||
@@ -992,20 +1008,20 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
*/
|
||||
public String startCommit() {
|
||||
// NOTE : Need to ensure that rollback is done before a new commit is started
|
||||
if (rollbackInFlight) {
|
||||
// Only rollback inflight commit/delta-commits. Do not touch compaction commits
|
||||
rollbackInflightCommits();
|
||||
if (rollbackPending) {
|
||||
// Only rollback pending commit/delta-commits. Do not touch compaction commits
|
||||
rollbackPendingCommits();
|
||||
}
|
||||
String commitTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
startCommit(commitTime);
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public void startCommitWithTime(String instantTime) {
|
||||
// NOTE : Need to ensure that rollback is done before a new commit is started
|
||||
if (rollbackInFlight) {
|
||||
if (rollbackPending) {
|
||||
// Only rollback inflight commit/delta-commits. Do not touch compaction commits
|
||||
rollbackInflightCommits();
|
||||
rollbackPendingCommits();
|
||||
}
|
||||
startCommit(instantTime);
|
||||
}
|
||||
@@ -1023,14 +1039,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||
String commitActionType = table.getMetaClient().getCommitActionType();
|
||||
activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime));
|
||||
activeTimeline.createNewInstant(new HoodieInstant(State.REQUESTED, commitActionType, instantTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* Schedules a new compaction instant.
|
||||
*/
|
||||
public Option<String> scheduleCompaction(Option<Map<String, String>> extraMetadata) throws IOException {
|
||||
String instantTime = HoodieActiveTimeline.createNewCommitTime();
|
||||
String instantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||
LOG.info("Generate a new instant time " + instantTime);
|
||||
boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata);
|
||||
return notEmpty ? Option.of(instantTime) : Option.empty();
|
||||
@@ -1046,7 +1062,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
throws IOException {
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
// if there are inflight writes, their instantTime must not be less than that of compaction instant time
|
||||
metaClient.getCommitsTimeline().filterInflightsExcludingCompaction().firstInstant().ifPresent(earliestInflight -> {
|
||||
metaClient.getCommitsTimeline().filterPendingExcludingCompaction().firstInstant().ifPresent(earliestInflight -> {
|
||||
Preconditions.checkArgument(
|
||||
HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), instantTime, HoodieTimeline.GREATER),
|
||||
"Earliest write inflight instant time must be later than compaction time. Earliest :" + earliestInflight
|
||||
@@ -1091,7 +1107,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||
HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
|
||||
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
|
||||
timeline.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
|
||||
timeline.readPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
|
||||
// Merge extra meta-data passed by user with the one already in inflight compaction
|
||||
Option<Map<String, String>> mergedMetaData = extraMetadata.map(m -> {
|
||||
Map<String, String> merged = new HashMap<>();
|
||||
@@ -1141,11 +1157,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup all inflight commits.
|
||||
* Cleanup all pending commits.
|
||||
*/
|
||||
private void rollbackInflightCommits() {
|
||||
private void rollbackPendingCommits() {
|
||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterInflightsExcludingCompaction();
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toList());
|
||||
for (String commit : commits) {
|
||||
@@ -1238,7 +1254,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||
HoodieCompactionPlan compactionPlan =
|
||||
CompactionUtils.getCompactionPlan(metaClient, compactionInstant.getTimestamp());
|
||||
|
||||
// Mark instant as compaction inflight
|
||||
activeTimeline.transitionCompactionRequestedToInflight(compactionInstant);
|
||||
compactionTimer = metrics.getCompactionCtx();
|
||||
@@ -1306,7 +1321,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
||||
*/
|
||||
@VisibleForTesting
|
||||
void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) throws IOException {
|
||||
table.rollback(jsc, inflightInstant.getTimestamp(), false);
|
||||
table.rollback(jsc, inflightInstant, false);
|
||||
// Revert instant state file
|
||||
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||
}
|
||||
|
||||
@@ -18,7 +18,9 @@
|
||||
|
||||
package org.apache.hudi.client.utils;
|
||||
|
||||
import org.apache.hudi.common.model.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
@@ -35,6 +37,6 @@ public class ClientUtils {
|
||||
public static HoodieTableMetaClient createMetaClient(JavaSparkContext jsc, HoodieWriteConfig config,
|
||||
boolean loadActiveTimelineOnLoad) {
|
||||
return new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), loadActiveTimelineOnLoad,
|
||||
config.getConsistencyGuardConfig());
|
||||
config.getConsistencyGuardConfig(), Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.config;
|
||||
import org.apache.hudi.HoodieWriteClient;
|
||||
import org.apache.hudi.WriteStatus;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.TimelineLayoutVersion;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.util.ConsistencyGuardConfig;
|
||||
import org.apache.hudi.common.util.ReflectionUtils;
|
||||
@@ -48,6 +49,7 @@ import java.util.Properties;
|
||||
public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
|
||||
public static final String TABLE_NAME = "hoodie.table.name";
|
||||
private static final String TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
|
||||
private static final String BASE_PATH_PROP = "hoodie.base.path";
|
||||
private static final String AVRO_SCHEMA = "hoodie.avro.schema";
|
||||
private static final String DEFAULT_PARALLELISM = "1500";
|
||||
@@ -141,6 +143,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
|
||||
}
|
||||
|
||||
public Integer getTimelineLayoutVersion() {
|
||||
return Integer.parseInt(props.getProperty(TIMELINE_LAYOUT_VERSION));
|
||||
}
|
||||
|
||||
public int getBulkInsertShuffleParallelism() {
|
||||
return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
|
||||
}
|
||||
@@ -587,6 +593,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withTimelineLayoutVersion(int version) {
|
||||
props.setProperty(TIMELINE_LAYOUT_VERSION, String.valueOf(version));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
|
||||
props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
|
||||
return this;
|
||||
@@ -693,7 +704,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
DEFAULT_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM, DEFAULT_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(DELETE_PARALLELISM), DELETE_PARALLELISM, DEFAULT_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM, DEFAULT_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM,
|
||||
DEFAULT_ROLLBACK_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP), COMBINE_BEFORE_INSERT_PROP,
|
||||
DEFAULT_COMBINE_BEFORE_INSERT);
|
||||
setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP), COMBINE_BEFORE_UPSERT_PROP,
|
||||
@@ -733,6 +745,12 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
setDefaultOnCondition(props, !isConsistencyGuardSet,
|
||||
ConsistencyGuardConfig.newBuilder().fromProperties(props).build());
|
||||
|
||||
setDefaultOnCondition(props, !props.containsKey(TIMELINE_LAYOUT_VERSION), TIMELINE_LAYOUT_VERSION,
|
||||
String.valueOf(TimelineLayoutVersion.CURR_VERSION));
|
||||
String layoutVersion = props.getProperty(TIMELINE_LAYOUT_VERSION);
|
||||
// Ensure Layout Version is good
|
||||
new TimelineLayoutVersion(Integer.parseInt(layoutVersion));
|
||||
|
||||
// Build WriteConfig at the end
|
||||
HoodieWriteConfig config = new HoodieWriteConfig(props);
|
||||
Preconditions.checkArgument(config.getBasePath() != null);
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
package org.apache.hudi.io;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||
import org.apache.hudi.common.model.ActionType;
|
||||
@@ -37,7 +37,10 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.AvroUtils;
|
||||
import org.apache.hudi.common.util.CleanerUtils;
|
||||
import org.apache.hudi.common.util.CompactionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieCommitException;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
@@ -111,15 +114,18 @@ public class HoodieCommitArchiveLog {
|
||||
public boolean archiveIfRequired(final JavaSparkContext jsc) throws IOException {
|
||||
try {
|
||||
List<HoodieInstant> instantsToArchive = getInstantsToArchive(jsc).collect(Collectors.toList());
|
||||
|
||||
boolean success = true;
|
||||
if (instantsToArchive.iterator().hasNext()) {
|
||||
if (!instantsToArchive.isEmpty()) {
|
||||
this.writer = openWriter();
|
||||
LOG.info("Archiving instants " + instantsToArchive);
|
||||
archive(instantsToArchive);
|
||||
LOG.info("Deleting archived instants " + instantsToArchive);
|
||||
success = deleteArchivedInstants(instantsToArchive);
|
||||
} else {
|
||||
LOG.info("No Instants to archive");
|
||||
}
|
||||
|
||||
return success;
|
||||
} finally {
|
||||
close();
|
||||
@@ -171,7 +177,15 @@ public class HoodieCommitArchiveLog {
|
||||
}).limit(commitTimeline.countInstants() - minCommitsToKeep));
|
||||
}
|
||||
|
||||
return instants;
|
||||
// For archiving and cleaning instants, we need to include intermediate state files if they exist
|
||||
HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
|
||||
Map<Pair<String, String>, List<HoodieInstant>> groupByTsAction = rawActiveTimeline.getInstants()
|
||||
.collect(Collectors.groupingBy(i -> Pair.of(i.getTimestamp(),
|
||||
HoodieInstant.getComparableAction(i.getAction()))));
|
||||
|
||||
return instants.flatMap(hoodieInstant ->
|
||||
groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
|
||||
HoodieInstant.getComparableAction(hoodieInstant.getAction()))).stream());
|
||||
}
|
||||
|
||||
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException {
|
||||
@@ -194,6 +208,7 @@ public class HoodieCommitArchiveLog {
|
||||
return i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
|
||||
|| (i.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION)));
|
||||
}).max(Comparator.comparing(HoodieInstant::getTimestamp)));
|
||||
LOG.info("Latest Committed Instant=" + latestCommitted);
|
||||
if (latestCommitted.isPresent()) {
|
||||
success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get());
|
||||
}
|
||||
@@ -208,8 +223,8 @@ public class HoodieCommitArchiveLog {
|
||||
* @throws IOException in case of error
|
||||
*/
|
||||
private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
|
||||
List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
|
||||
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
|
||||
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
|
||||
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
|
||||
|
||||
List<HoodieInstant> instantsToBeDeleted =
|
||||
instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
|
||||
@@ -270,10 +285,14 @@ public class HoodieCommitArchiveLog {
|
||||
throws IOException {
|
||||
HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
|
||||
archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
|
||||
archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
|
||||
switch (hoodieInstant.getAction()) {
|
||||
case HoodieTimeline.CLEAN_ACTION: {
|
||||
archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils
|
||||
.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class));
|
||||
if (hoodieInstant.isCompleted()) {
|
||||
archivedMetaWrapper.setHoodieCleanMetadata(CleanerUtils.getCleanerMetadata(metaClient, hoodieInstant));
|
||||
} else {
|
||||
archivedMetaWrapper.setHoodieCleanerPlan(CleanerUtils.getCleanerPlan(metaClient, hoodieInstant));
|
||||
}
|
||||
archivedMetaWrapper.setActionType(ActionType.clean.name());
|
||||
break;
|
||||
}
|
||||
@@ -303,8 +322,15 @@ public class HoodieCommitArchiveLog {
|
||||
archivedMetaWrapper.setActionType(ActionType.commit.name());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
case HoodieTimeline.COMPACTION_ACTION: {
|
||||
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, hoodieInstant.getTimestamp());
|
||||
archivedMetaWrapper.setHoodieCompactionPlan(plan);
|
||||
archivedMetaWrapper.setActionType(ActionType.compaction.name());
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
throw new UnsupportedOperationException("Action not fully supported yet");
|
||||
}
|
||||
}
|
||||
return archivedMetaWrapper;
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ import org.apache.hudi.common.model.HoodieRollingStatMetadata;
|
||||
import org.apache.hudi.common.table.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.AvroUtils;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||
import org.apache.hudi.common.util.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
@@ -109,14 +109,15 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
Tuple2<String, String> partitionDelFileTuple = iter.next();
|
||||
String partitionPath = partitionDelFileTuple._1();
|
||||
String delFileName = partitionDelFileTuple._2();
|
||||
String deletePathStr = new Path(new Path(basePath, partitionPath), delFileName).toString();
|
||||
Path deletePath = new Path(new Path(basePath, partitionPath), delFileName);
|
||||
String deletePathStr = deletePath.toString();
|
||||
Boolean deletedFileResult = deleteFileAndGetResult(fs, deletePathStr);
|
||||
if (!partitionCleanStatMap.containsKey(partitionPath)) {
|
||||
partitionCleanStatMap.put(partitionPath, new PartitionCleanStat(partitionPath));
|
||||
}
|
||||
PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath);
|
||||
partitionCleanStat.addDeleteFilePatterns(deletePathStr);
|
||||
partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
|
||||
partitionCleanStat.addDeleteFilePatterns(deletePath.getName());
|
||||
partitionCleanStat.addDeletedFileResult(deletePath.getName(), deletedFileResult);
|
||||
}
|
||||
return partitionCleanStatMap.entrySet().stream().map(e -> new Tuple2<>(e.getKey(), e.getValue()))
|
||||
.collect(Collectors.toList()).iterator();
|
||||
@@ -312,82 +313,78 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
* @throws IllegalArgumentException if unknown cleaning policy is provided
|
||||
*/
|
||||
@Override
|
||||
public List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant) {
|
||||
try {
|
||||
HoodieCleanerPlan cleanerPlan = AvroUtils.deserializeCleanerPlan(getActiveTimeline()
|
||||
.getInstantAuxiliaryDetails(HoodieTimeline.getCleanRequestedInstant(cleanInstant.getTimestamp())).get());
|
||||
public List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
|
||||
int cleanerParallelism = Math.min(
|
||||
(int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()),
|
||||
config.getCleanerParallelism());
|
||||
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
|
||||
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
|
||||
.parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream()
|
||||
.flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y)))
|
||||
.collect(Collectors.toList()), cleanerParallelism)
|
||||
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey((e1, e2) -> e1.merge(e2)).collect();
|
||||
|
||||
int cleanerParallelism = Math.min(
|
||||
(int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()),
|
||||
config.getCleanerParallelism());
|
||||
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
|
||||
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
|
||||
.parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream()
|
||||
.flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y)))
|
||||
.collect(Collectors.toList()), cleanerParallelism)
|
||||
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey((e1, e2) -> e1.merge(e2)).collect();
|
||||
Map<String, PartitionCleanStat> partitionCleanStatsMap =
|
||||
partitionCleanStats.stream().collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
|
||||
|
||||
Map<String, PartitionCleanStat> partitionCleanStatsMap =
|
||||
partitionCleanStats.stream().collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
|
||||
|
||||
// Return PartitionCleanStat for each partition passed.
|
||||
return cleanerPlan.getFilesToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
|
||||
PartitionCleanStat partitionCleanStat =
|
||||
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap.get(partitionPath)
|
||||
: new PartitionCleanStat(partitionPath);
|
||||
HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
|
||||
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath)
|
||||
.withEarliestCommitRetained(Option.ofNullable(
|
||||
actionInstant != null
|
||||
? new HoodieInstant(HoodieInstant.State.valueOf(actionInstant.getState()),
|
||||
actionInstant.getAction(), actionInstant.getTimestamp())
|
||||
: null))
|
||||
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
|
||||
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
|
||||
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
|
||||
}).collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to clean up after commit", e);
|
||||
}
|
||||
// Return PartitionCleanStat for each partition passed.
|
||||
return cleanerPlan.getFilesToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
|
||||
PartitionCleanStat partitionCleanStat =
|
||||
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap.get(partitionPath)
|
||||
: new PartitionCleanStat(partitionPath);
|
||||
HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
|
||||
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath)
|
||||
.withEarliestCommitRetained(Option.ofNullable(
|
||||
actionInstant != null
|
||||
? new HoodieInstant(State.valueOf(actionInstant.getState()),
|
||||
actionInstant.getAction(), actionInstant.getTimestamp())
|
||||
: null))
|
||||
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
|
||||
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
|
||||
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
|
||||
throws IOException {
|
||||
Long startTime = System.currentTimeMillis();
|
||||
List<HoodieRollbackStat> stats = new ArrayList<>();
|
||||
String actionType = metaClient.getCommitActionType();
|
||||
HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
|
||||
List<String> inflights =
|
||||
this.getInflightCommitTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
// Atomically unpublish the commits
|
||||
if (!inflights.contains(commit)) {
|
||||
LOG.info("Unpublishing " + commit);
|
||||
activeTimeline.revertToInflight(new HoodieInstant(false, actionType, commit));
|
||||
|
||||
if (instant.isCompleted()) {
|
||||
LOG.info("Unpublishing instant " + instant);
|
||||
instant = activeTimeline.revertToInflight(instant);
|
||||
}
|
||||
|
||||
HoodieInstant instantToRollback = new HoodieInstant(false, actionType, commit);
|
||||
Long startTime = System.currentTimeMillis();
|
||||
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
||||
// deleting the timeline file
|
||||
if (!instant.isRequested()) {
|
||||
String commit = instant.getTimestamp();
|
||||
|
||||
// delete all the data files for this commit
|
||||
LOG.info("Clean out all parquet files generated for commit: " + commit);
|
||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instantToRollback);
|
||||
|
||||
// TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||
List<HoodieRollbackStat> stats =
|
||||
new RollbackExecutor(metaClient, config).performRollback(jsc, instantToRollback, rollbackRequests);
|
||||
// delete all the data files for this commit
|
||||
LOG.info("Clean out all parquet files generated for commit: " + commit);
|
||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instant);
|
||||
|
||||
//TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||
stats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests);
|
||||
}
|
||||
// Delete Inflight instant if enabled
|
||||
deleteInflightInstant(deleteInstants, activeTimeline, new HoodieInstant(true, actionType, commit));
|
||||
deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, instant);
|
||||
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||
return stats;
|
||||
}
|
||||
|
||||
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback) throws IOException {
|
||||
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback)
|
||||
throws IOException {
|
||||
return FSUtils.getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
|
||||
config.shouldAssumeDatePartitioning()).stream().map(partitionPath -> {
|
||||
return RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback);
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Delete Inflight instant if enabled.
|
||||
*
|
||||
@@ -395,15 +392,22 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
* @param activeTimeline Hoodie active timeline
|
||||
* @param instantToBeDeleted Instant to be deleted
|
||||
*/
|
||||
protected void deleteInflightInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
|
||||
protected void deleteInflightAndRequestedInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
|
||||
HoodieInstant instantToBeDeleted) {
|
||||
// Remove marker files always on rollback
|
||||
deleteMarkerDir(instantToBeDeleted.getTimestamp());
|
||||
|
||||
// Remove the rolled back inflight commits
|
||||
if (deleteInstant) {
|
||||
activeTimeline.deleteInflight(instantToBeDeleted);
|
||||
LOG.info("Deleted inflight commit " + instantToBeDeleted);
|
||||
LOG.info("Deleting instant=" + instantToBeDeleted);
|
||||
activeTimeline.deletePending(instantToBeDeleted);
|
||||
if (instantToBeDeleted.isInflight() && !metaClient.getTimelineLayoutVersion().isNullVersion()) {
|
||||
// Delete corresponding requested instant
|
||||
instantToBeDeleted = new HoodieInstant(State.REQUESTED, instantToBeDeleted.getAction(),
|
||||
instantToBeDeleted.getTimestamp());
|
||||
activeTimeline.deletePending(instantToBeDeleted);
|
||||
}
|
||||
LOG.info("Deleted pending commit " + instantToBeDeleted);
|
||||
} else {
|
||||
LOG.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
|
||||
}
|
||||
|
||||
@@ -44,7 +44,6 @@ import org.apache.hudi.io.compact.HoodieRealtimeTableCompactor;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.spark.Partitioner;
|
||||
@@ -167,32 +166,39 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
|
||||
throws IOException {
|
||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant,
|
||||
boolean deleteInstants) throws IOException {
|
||||
Long startTime = System.currentTimeMillis();
|
||||
|
||||
String commit = instant.getTimestamp();
|
||||
LOG.error("Rolling back instant " + instant);
|
||||
|
||||
// Atomically un-publish all non-inflight commits
|
||||
if (instant.isCompleted()) {
|
||||
LOG.error("Un-publishing instant " + instant + ", deleteInstants=" + deleteInstants);
|
||||
instant = this.getActiveTimeline().revertToInflight(instant);
|
||||
}
|
||||
|
||||
List<HoodieRollbackStat> allRollbackStats = new ArrayList<>();
|
||||
|
||||
// At the moment, MOR table type does not support bulk nested rollbacks. Nested rollbacks is an experimental
|
||||
// feature that is expensive. To perform nested rollbacks, initiate multiple requests of client.rollback
|
||||
// (commitToRollback).
|
||||
// NOTE {@link HoodieCompactionConfig#withCompactionLazyBlockReadEnabled} needs to be set to TRUE. This is
|
||||
// required to avoid OOM when merging multiple LogBlocks performed during nested rollbacks.
|
||||
// Atomically un-publish all non-inflight commits
|
||||
Option<HoodieInstant> commitOrCompactionOption = Option.fromJavaOptional(this.getActiveTimeline()
|
||||
.getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
|
||||
HoodieActiveTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION))
|
||||
.getInstants().filter(i -> commit.equals(i.getTimestamp())).findFirst());
|
||||
HoodieInstant instantToRollback = commitOrCompactionOption.get();
|
||||
// Atomically un-publish all non-inflight commits
|
||||
if (!instantToRollback.isInflight()) {
|
||||
this.getActiveTimeline().revertToInflight(instantToRollback);
|
||||
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
||||
// deleting the timeline file
|
||||
if (!instant.isRequested()) {
|
||||
LOG.info("Unpublished " + commit);
|
||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instant);
|
||||
// TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||
allRollbackStats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests);
|
||||
}
|
||||
LOG.info("Unpublished " + commit);
|
||||
Long startTime = System.currentTimeMillis();
|
||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instantToRollback);
|
||||
// TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||
List<HoodieRollbackStat> allRollbackStats =
|
||||
new RollbackExecutor(metaClient, config).performRollback(jsc, instantToRollback, rollbackRequests);
|
||||
|
||||
// Delete Inflight instants if enabled
|
||||
deleteInflightInstant(deleteInstants, this.getActiveTimeline(),
|
||||
new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()));
|
||||
deleteInflightAndRequestedInstant(deleteInstants, this.getActiveTimeline(), instant);
|
||||
|
||||
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||
|
||||
|
||||
@@ -182,8 +182,8 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
/**
|
||||
* Get only the inflights (no-completed) commit timeline.
|
||||
*/
|
||||
public HoodieTimeline getInflightCommitTimeline() {
|
||||
return metaClient.getCommitsTimeline().filterInflightsExcludingCompaction();
|
||||
public HoodieTimeline getPendingCommitTimeline() {
|
||||
return metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -287,16 +287,18 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
||||
*
|
||||
* @param jsc Java Spark Context
|
||||
* @param cleanInstant Clean Instant
|
||||
* @param cleanerPlan Cleaner Plan
|
||||
* @return list of Clean Stats
|
||||
*/
|
||||
public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant);
|
||||
public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant,
|
||||
HoodieCleanerPlan cleanerPlan);
|
||||
|
||||
/**
|
||||
* Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1) Atomically unpublish
|
||||
* this commit (2) clean indexing data (3) clean new generated parquet files / log blocks (4) Finally, delete
|
||||
* .<action>.commit or .<action>.inflight file if deleteInstants = true
|
||||
*/
|
||||
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
|
||||
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user