[HUDI-761] Refactoring rollback and restore actions using the ActionExecutor abstraction (#1492)
- rollback() and restore() table level APIs introduced - Restore is implemented by wrapping calls to rollback executor - Existing tests transparently cover this, since its just a refactor
This commit is contained in:
@@ -283,7 +283,7 @@ public class SparkMain {
|
|||||||
|
|
||||||
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
|
private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTime, String basePath) throws Exception {
|
||||||
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
HoodieWriteClient client = createHoodieClient(jsc, basePath);
|
||||||
if (client.rollbackToSavepoint(savepointTime)) {
|
if (client.restoreToSavepoint(savepointTime)) {
|
||||||
LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
|
LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -18,11 +18,9 @@
|
|||||||
|
|
||||||
package org.apache.hudi.client;
|
package org.apache.hudi.client;
|
||||||
|
|
||||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
import com.codahale.metrics.Timer;
|
||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||||
import org.apache.hudi.client.utils.SparkConfigUtils;
|
import org.apache.hudi.client.utils.SparkConfigUtils;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
|
||||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
import org.apache.hudi.common.model.HoodieRollingStat;
|
import org.apache.hudi.common.model.HoodieRollingStat;
|
||||||
@@ -32,19 +30,14 @@ import org.apache.hudi.common.model.WriteOperationType;
|
|||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieCommitException;
|
import org.apache.hudi.exception.HoodieCommitException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
import org.apache.hudi.exception.HoodieRollbackException;
|
|
||||||
import org.apache.hudi.index.HoodieIndex;
|
import org.apache.hudi.index.HoodieIndex;
|
||||||
import org.apache.hudi.metrics.HoodieMetrics;
|
import org.apache.hudi.metrics.HoodieMetrics;
|
||||||
import org.apache.hudi.table.HoodieTable;
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
|
||||||
import com.codahale.metrics.Timer;
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
@@ -53,10 +46,8 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract Write Client providing functionality for performing commit, index updates and rollback
|
* Abstract Write Client providing functionality for performing commit, index updates and rollback
|
||||||
@@ -82,12 +73,6 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
return this.operationType;
|
return this.operationType;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig) {
|
|
||||||
super(jsc, clientConfig);
|
|
||||||
this.metrics = new HoodieMetrics(config, config.getTableName());
|
|
||||||
this.index = index;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig,
|
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig,
|
||||||
Option<EmbeddedTimelineService> timelineServer) {
|
Option<EmbeddedTimelineService> timelineServer) {
|
||||||
super(jsc, clientConfig, timelineServer);
|
super(jsc, clientConfig, timelineServer);
|
||||||
@@ -320,105 +305,4 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> e
|
|||||||
// before this point
|
// before this point
|
||||||
this.index.close();
|
this.index.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void rollbackInternal(String commitToRollback) {
|
|
||||||
final String startRollbackTime = HoodieActiveTimeline.createNewInstantTime();
|
|
||||||
final Timer.Context context = metrics.getRollbackCtx();
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
try {
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
|
||||||
Option<HoodieInstant> rollbackInstantOpt =
|
|
||||||
Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
|
||||||
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitToRollback))
|
|
||||||
.findFirst());
|
|
||||||
|
|
||||||
if (rollbackInstantOpt.isPresent()) {
|
|
||||||
List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackInstantOpt.get());
|
|
||||||
finishRollback(context, stats, Collections.singletonList(commitToRollback), startRollbackTime);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitToRollback,
|
|
||||||
e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected List<HoodieRollbackStat> doRollbackAndGetStats(final HoodieInstant instantToRollback) throws
|
|
||||||
IOException {
|
|
||||||
final String commitToRollback = instantToRollback.getTimestamp();
|
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
|
||||||
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
|
|
||||||
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
|
|
||||||
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
|
||||||
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
savepoints.forEach(s -> {
|
|
||||||
if (s.contains(commitToRollback)) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Could not rollback a savepointed commit. Delete savepoint first before rolling back" + s);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
|
|
||||||
// nothing to rollback
|
|
||||||
LOG.info("No commits to rollback " + commitToRollback);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure only the last n commits are being rolled back
|
|
||||||
// If there is a commit in-between or after that is not rolled back, then abort
|
|
||||||
|
|
||||||
if ((commitToRollback != null) && !commitTimeline.empty()
|
|
||||||
&& !commitTimeline.findInstantsAfter(commitToRollback, Integer.MAX_VALUE).empty()) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Found commits after time :" + commitToRollback + ", please rollback greater commits first");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if ((commitToRollback != null) && !inflights.isEmpty()
|
|
||||||
&& (inflights.indexOf(commitToRollback) != inflights.size() - 1)) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Found in-flight commits after time :" + commitToRollback + ", please rollback greater commits first");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
|
|
||||||
|
|
||||||
LOG.info("Deleted inflight commits " + commitToRollback);
|
|
||||||
|
|
||||||
// cleanup index entries
|
|
||||||
if (!getIndex().rollbackCommit(commitToRollback)) {
|
|
||||||
throw new HoodieRollbackException("Rollback index changes failed, for time :" + commitToRollback);
|
|
||||||
}
|
|
||||||
LOG.info("Index rolled back for commits " + commitToRollback);
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void finishRollback(final Timer.Context context, List<HoodieRollbackStat> rollbackStats,
|
|
||||||
List<String> commitsToRollback, final String startRollbackTime) throws IOException {
|
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
|
||||||
Option<Long> durationInMs = Option.empty();
|
|
||||||
long numFilesDeleted = rollbackStats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
|
|
||||||
if (context != null) {
|
|
||||||
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
|
|
||||||
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
|
||||||
}
|
|
||||||
HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils
|
|
||||||
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
|
|
||||||
//TODO: varadarb - This will be fixed when Rollback transition mimics that of commit
|
|
||||||
table.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION,
|
|
||||||
startRollbackTime));
|
|
||||||
table.getActiveTimeline().saveAsComplete(
|
|
||||||
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
|
||||||
TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
|
|
||||||
LOG.info("Rollback of Commits " + commitsToRollback + " is complete");
|
|
||||||
|
|
||||||
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
|
||||||
LOG.info("Cleaning up older rollback meta files");
|
|
||||||
// Cleanup of older cleaner meta files
|
|
||||||
// TODO - make the commit archival generic and archive rollback metadata
|
|
||||||
FSUtils.deleteOlderRollbackMetaFiles(fs, table.getMetaClient().getMetaPath(),
|
|
||||||
table.getActiveTimeline().getRollbackTimeline().getInstants());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,13 +18,14 @@
|
|||||||
|
|
||||||
package org.apache.hudi.client;
|
package org.apache.hudi.client;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Timer;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||||
import org.apache.hudi.client.utils.SparkConfigUtils;
|
import org.apache.hudi.client.utils.SparkConfigUtils;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||||
@@ -49,6 +50,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
|||||||
import org.apache.hudi.exception.HoodieCommitException;
|
import org.apache.hudi.exception.HoodieCommitException;
|
||||||
import org.apache.hudi.exception.HoodieCompactionException;
|
import org.apache.hudi.exception.HoodieCompactionException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
import org.apache.hudi.exception.HoodieRestoreException;
|
||||||
import org.apache.hudi.exception.HoodieRollbackException;
|
import org.apache.hudi.exception.HoodieRollbackException;
|
||||||
import org.apache.hudi.exception.HoodieSavepointException;
|
import org.apache.hudi.exception.HoodieSavepointException;
|
||||||
import org.apache.hudi.index.HoodieIndex;
|
import org.apache.hudi.index.HoodieIndex;
|
||||||
@@ -56,8 +58,6 @@ import org.apache.hudi.metrics.HoodieMetrics;
|
|||||||
import org.apache.hudi.table.HoodieCommitArchiveLog;
|
import org.apache.hudi.table.HoodieCommitArchiveLog;
|
||||||
import org.apache.hudi.table.HoodieTable;
|
import org.apache.hudi.table.HoodieTable;
|
||||||
import org.apache.hudi.table.UserDefinedBulkInsertPartitioner;
|
import org.apache.hudi.table.UserDefinedBulkInsertPartitioner;
|
||||||
|
|
||||||
import com.codahale.metrics.Timer;
|
|
||||||
import org.apache.hudi.table.action.commit.HoodieWriteMetadata;
|
import org.apache.hudi.table.action.commit.HoodieWriteMetadata;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
@@ -65,18 +65,17 @@ import org.apache.spark.SparkConf;
|
|||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.api.java.function.PairFunction;
|
import org.apache.spark.api.java.function.PairFunction;
|
||||||
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.Collections;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Hoodie Write Client helps you build tables on HDFS [insert()] and then perform efficient mutations on an HDFS
|
* Hoodie Write Client helps you build tables on HDFS [insert()] and then perform efficient mutations on an HDFS
|
||||||
* table [upsert()]
|
* table [upsert()]
|
||||||
@@ -505,13 +504,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback the state to the savepoint. WARNING: This rollsback recent commits and deleted data files. Queries
|
* Restore the state to the savepoint. WARNING: This rollsback recent commits and deleted data files. Queries
|
||||||
* accessing the files will mostly fail. This should be done during a downtime.
|
* accessing the files will mostly fail. This should be done during a downtime.
|
||||||
*
|
*
|
||||||
* @param savepointTime - savepoint time to rollback to
|
* @param savepointTime - savepoint time to rollback to
|
||||||
* @return true if the savepoint was rollecback to successfully
|
* @return true if the savepoint was rollecback to successfully
|
||||||
*/
|
*/
|
||||||
public boolean rollbackToSavepoint(String savepointTime) {
|
public boolean restoreToSavepoint(String savepointTime) {
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
||||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||||
|
|
||||||
@@ -544,103 +543,60 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback the (inflight/committed) record changes with the given commit time. Three steps: (1) Atomically unpublish
|
* Rollback the inflight record changes with the given commit time.
|
||||||
* this commit (2) clean indexing data, (3) clean new generated parquet files. (4) Finally delete .commit or .inflight
|
|
||||||
* file.
|
|
||||||
*
|
*
|
||||||
* @param instantTime Instant time of the commit
|
* @param commitInstantTime Instant time of the commit
|
||||||
* @return {@code true} If rollback the record changes successfully. {@code false} otherwise
|
* @throws HoodieRollbackException if rollback cannot be performed successfully
|
||||||
*/
|
*/
|
||||||
public boolean rollback(final String instantTime) throws HoodieRollbackException {
|
public boolean rollback(final String commitInstantTime) throws HoodieRollbackException {
|
||||||
rollbackInternal(instantTime);
|
LOG.info("Begin rollback of instant " + commitInstantTime);
|
||||||
return true;
|
final String rollbackInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
final Timer.Context context = this.metrics.getRollbackCtx();
|
||||||
|
try {
|
||||||
|
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
||||||
|
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||||
|
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitInstantTime))
|
||||||
|
.findFirst());
|
||||||
|
if (commitInstantOpt.isPresent()) {
|
||||||
|
HoodieRollbackMetadata rollbackMetadata = table.rollback(jsc, rollbackInstantTime, commitInstantOpt.get(), true);
|
||||||
|
if (context != null) {
|
||||||
|
long durationInMs = metrics.getDurationInMs(context.stop());
|
||||||
|
metrics.updateRollbackMetrics(durationInMs, rollbackMetadata.getTotalFilesDeleted());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
LOG.info("Cannot find instant " + commitInstantTime + " in the timeline, for rollback");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitInstantTime, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* NOTE : This action requires all writers (ingest and compact) to a table to be stopped before proceeding. Revert
|
* NOTE : This action requires all writers (ingest and compact) to a table to be stopped before proceeding. Revert
|
||||||
* the (inflight/committed) record changes for all commits after the provided @param. Four steps: (1) Atomically
|
* the (inflight/committed) record changes for all commits after the provided instant time.
|
||||||
* unpublish this commit (2) clean indexing data, (3) clean new generated parquet/log files and/or append rollback to
|
|
||||||
* existing log files. (4) Finally delete .commit, .inflight, .compaction.inflight or .compaction.requested file
|
|
||||||
*
|
*
|
||||||
* @param instantTime Instant time to which restoration is requested
|
* @param instantTime Instant time to which restoration is requested
|
||||||
*/
|
*/
|
||||||
public void restoreToInstant(final String instantTime) throws HoodieRollbackException {
|
public HoodieRestoreMetadata restoreToInstant(final String instantTime) throws HoodieRestoreException {
|
||||||
|
LOG.info("Begin restore to instant " + instantTime);
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
final String restoreInstantTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
Timer.Context context = metrics.getRollbackCtx();
|
||||||
// Get all the commits on the timeline after the provided commit time
|
|
||||||
List<HoodieInstant> instantsToRollback = table.getActiveTimeline().getCommitsAndCompactionTimeline()
|
|
||||||
.getReverseOrderedInstants()
|
|
||||||
.filter(instant -> HoodieActiveTimeline.GREATER.test(instant.getTimestamp(), instantTime))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
// Start a rollback instant for all commits to be rolled back
|
|
||||||
String startRollbackInstant = HoodieActiveTimeline.createNewInstantTime();
|
|
||||||
// Start the timer
|
|
||||||
final Timer.Context context = startContext();
|
|
||||||
Map<String, List<HoodieRollbackStat>> instantsToStats = new HashMap<>();
|
|
||||||
table.getActiveTimeline().createNewInstant(
|
|
||||||
new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRollbackInstant));
|
|
||||||
instantsToRollback.forEach(instant -> {
|
|
||||||
try {
|
|
||||||
switch (instant.getAction()) {
|
|
||||||
case HoodieTimeline.COMMIT_ACTION:
|
|
||||||
case HoodieTimeline.DELTA_COMMIT_ACTION:
|
|
||||||
List<HoodieRollbackStat> statsForInstant = doRollbackAndGetStats(instant);
|
|
||||||
instantsToStats.put(instant.getTimestamp(), statsForInstant);
|
|
||||||
break;
|
|
||||||
case HoodieTimeline.COMPACTION_ACTION:
|
|
||||||
// TODO : Get file status and create a rollback stat and file
|
|
||||||
// TODO : Delete the .aux files along with the instant file, okay for now since the archival process will
|
|
||||||
// delete these files when it does not see a corresponding instant file under .hoodie
|
|
||||||
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant);
|
|
||||||
instantsToStats.put(instant.getTimestamp(), statsForCompaction);
|
|
||||||
LOG.info("Deleted compaction instant " + instant);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("invalid action name " + instant.getAction());
|
|
||||||
}
|
|
||||||
} catch (IOException io) {
|
|
||||||
throw new HoodieRollbackException("unable to rollback instant " + instant, io);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
try {
|
try {
|
||||||
finishRestore(context, Collections.unmodifiableMap(instantsToStats),
|
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
||||||
instantsToRollback.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()),
|
HoodieRestoreMetadata restoreMetadata = table.restore(jsc, restoreInstantTime, instantTime);
|
||||||
startRollbackInstant, instantTime);
|
if (context != null) {
|
||||||
} catch (IOException io) {
|
final long durationInMs = metrics.getDurationInMs(context.stop());
|
||||||
throw new HoodieRollbackException("unable to rollback instants " + instantsToRollback, io);
|
final long totalFilesDeleted = restoreMetadata.getHoodieRestoreMetadata().values().stream()
|
||||||
}
|
.flatMap(Collection::stream)
|
||||||
}
|
.mapToLong(HoodieRollbackMetadata::getTotalFilesDeleted)
|
||||||
|
.sum();
|
||||||
private Timer.Context startContext() {
|
metrics.updateRollbackMetrics(durationInMs, totalFilesDeleted);
|
||||||
return metrics.getRollbackCtx();
|
}
|
||||||
}
|
return restoreMetadata;
|
||||||
|
} catch (Exception e) {
|
||||||
private void finishRestore(final Timer.Context context, Map<String, List<HoodieRollbackStat>> commitToStats,
|
throw new HoodieRestoreException("Failed to restore to " + instantTime, e);
|
||||||
List<String> commitsToRollback, final String startRestoreTime, final String restoreToInstant) throws IOException {
|
|
||||||
HoodieTable<T> table = HoodieTable.create(config, jsc);
|
|
||||||
Option<Long> durationInMs = Option.empty();
|
|
||||||
long numFilesDeleted = 0L;
|
|
||||||
for (Map.Entry<String, List<HoodieRollbackStat>> commitToStat : commitToStats.entrySet()) {
|
|
||||||
List<HoodieRollbackStat> stats = commitToStat.getValue();
|
|
||||||
numFilesDeleted = stats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
|
|
||||||
}
|
|
||||||
if (context != null) {
|
|
||||||
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
|
|
||||||
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
|
||||||
}
|
|
||||||
HoodieRestoreMetadata restoreMetadata =
|
|
||||||
TimelineMetadataUtils.convertRestoreMetadata(startRestoreTime, durationInMs, commitsToRollback, commitToStats);
|
|
||||||
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRestoreTime),
|
|
||||||
TimelineMetadataUtils.serializeRestoreMetadata(restoreMetadata));
|
|
||||||
LOG.info("Commits " + commitsToRollback + " rollback is complete. Restored table to " + restoreToInstant);
|
|
||||||
|
|
||||||
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
|
||||||
LOG.info("Cleaning up older restore meta files");
|
|
||||||
// Cleanup of older cleaner meta files
|
|
||||||
// TODO - make the commit archival generic and archive rollback metadata
|
|
||||||
FSUtils.deleteOlderRollbackMetaFiles(fs, table.getMetaClient().getMetaPath(),
|
|
||||||
table.getActiveTimeline().getRestoreTimeline().getInstants());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -835,11 +791,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
HoodieTimeline pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline();
|
HoodieTimeline pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline();
|
||||||
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
HoodieInstant inflightInstant = HoodieTimeline.getCompactionInflightInstant(compactionInstantTime);
|
||||||
if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
|
if (pendingCompactionTimeline.containsInstant(inflightInstant)) {
|
||||||
// inflight compaction - Needs to rollback first deleting new parquet files before we run compaction.
|
|
||||||
rollbackInflightCompaction(inflightInstant, table);
|
rollbackInflightCompaction(inflightInstant, table);
|
||||||
// refresh table
|
metaClient.reloadActiveTimeline();
|
||||||
metaClient = createMetaClient(true);
|
|
||||||
table = HoodieTable.create(metaClient, config, jsc);
|
|
||||||
pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline();
|
pendingCompactionTimeline = metaClient.getActiveTimeline().filterPendingCompactionTimeline();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -914,9 +867,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* @param inflightInstant Inflight Compaction Instant
|
* @param inflightInstant Inflight Compaction Instant
|
||||||
* @param table Hoodie Table
|
* @param table Hoodie Table
|
||||||
*/
|
*/
|
||||||
public void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) throws IOException {
|
public void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) {
|
||||||
table.rollback(jsc, inflightInstant, false);
|
table.rollback(jsc, HoodieActiveTimeline.createNewInstantTime(), inflightInstant, false);
|
||||||
// Revert instant state file
|
|
||||||
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,26 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.exception;
|
||||||
|
|
||||||
|
public class HoodieRestoreException extends HoodieException {
|
||||||
|
|
||||||
|
public HoodieRestoreException(String msg, Throwable e) {
|
||||||
|
super(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -22,10 +22,10 @@ import org.apache.avro.generic.GenericRecord;
|
|||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
import org.apache.hudi.client.utils.ParquetReaderIterator;
|
import org.apache.hudi.client.utils.ParquetReaderIterator;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
|
||||||
import org.apache.hudi.common.model.HoodieBaseFile;
|
import org.apache.hudi.common.model.HoodieBaseFile;
|
||||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
@@ -33,9 +33,7 @@ import org.apache.hudi.common.model.HoodieRecord;
|
|||||||
import org.apache.hudi.common.model.HoodieRecordLocation;
|
import org.apache.hudi.common.model.HoodieRecordLocation;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
|
import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
|
||||||
@@ -56,8 +54,8 @@ import org.apache.hudi.table.action.commit.InsertCommitActionExecutor;
|
|||||||
import org.apache.hudi.table.action.commit.InsertPreppedCommitActionExecutor;
|
import org.apache.hudi.table.action.commit.InsertPreppedCommitActionExecutor;
|
||||||
import org.apache.hudi.table.action.commit.UpsertCommitActionExecutor;
|
import org.apache.hudi.table.action.commit.UpsertCommitActionExecutor;
|
||||||
import org.apache.hudi.table.action.commit.UpsertPreppedCommitActionExecutor;
|
import org.apache.hudi.table.action.commit.UpsertPreppedCommitActionExecutor;
|
||||||
import org.apache.hudi.table.rollback.RollbackHelper;
|
import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
|
||||||
import org.apache.hudi.table.rollback.RollbackRequest;
|
import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.parquet.avro.AvroParquetReader;
|
import org.apache.parquet.avro.AvroParquetReader;
|
||||||
@@ -68,12 +66,10 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implementation of a very heavily read-optimized Hoodie Table where, all data is stored in base files, with
|
* Implementation of a very heavily read-optimized Hoodie Table where, all data is stored in base files, with
|
||||||
@@ -202,69 +198,12 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
|
public HoodieRollbackMetadata rollback(JavaSparkContext jsc, String rollbackInstantTime, HoodieInstant commitInstant, boolean deleteInstants) {
|
||||||
throws IOException {
|
return new CopyOnWriteRollbackActionExecutor(jsc, config, this, rollbackInstantTime, commitInstant, deleteInstants).execute();
|
||||||
long startTime = System.currentTimeMillis();
|
|
||||||
List<HoodieRollbackStat> stats = new ArrayList<>();
|
|
||||||
HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
|
|
||||||
|
|
||||||
if (instant.isCompleted()) {
|
|
||||||
LOG.info("Unpublishing instant " + instant);
|
|
||||||
instant = activeTimeline.revertToInflight(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
|
||||||
// deleting the timeline file
|
|
||||||
if (!instant.isRequested()) {
|
|
||||||
String commit = instant.getTimestamp();
|
|
||||||
|
|
||||||
// delete all the data files for this commit
|
|
||||||
LOG.info("Clean out all parquet files generated for commit: " + commit);
|
|
||||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instant);
|
|
||||||
|
|
||||||
//TODO: We need to persist this as rollback workload and use it in case of partial failures
|
|
||||||
stats = new RollbackHelper(metaClient, config).performRollback(jsc, instant, rollbackRequests);
|
|
||||||
}
|
|
||||||
// Delete Inflight instant if enabled
|
|
||||||
deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, instant);
|
|
||||||
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
|
||||||
return stats;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback)
|
public HoodieRestoreMetadata restore(JavaSparkContext jsc, String restoreInstantTime, String instantToRestore) {
|
||||||
throws IOException {
|
return new CopyOnWriteRestoreActionExecutor(jsc, config, this, restoreInstantTime, instantToRestore).execute();
|
||||||
return FSUtils.getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
|
|
||||||
config.shouldAssumeDatePartitioning()).stream().map(partitionPath -> RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete Inflight instant if enabled.
|
|
||||||
*
|
|
||||||
* @param deleteInstant Enable Deletion of Inflight instant
|
|
||||||
* @param activeTimeline Hoodie active timeline
|
|
||||||
* @param instantToBeDeleted Instant to be deleted
|
|
||||||
*/
|
|
||||||
protected void deleteInflightAndRequestedInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
|
|
||||||
HoodieInstant instantToBeDeleted) {
|
|
||||||
// Remove marker files always on rollback
|
|
||||||
deleteMarkerDir(instantToBeDeleted.getTimestamp());
|
|
||||||
|
|
||||||
// Remove the rolled back inflight commits
|
|
||||||
if (deleteInstant) {
|
|
||||||
LOG.info("Deleting instant=" + instantToBeDeleted);
|
|
||||||
activeTimeline.deletePending(instantToBeDeleted);
|
|
||||||
if (instantToBeDeleted.isInflight() && !metaClient.getTimelineLayoutVersion().isNullVersion()) {
|
|
||||||
// Delete corresponding requested instant
|
|
||||||
instantToBeDeleted = new HoodieInstant(State.REQUESTED, instantToBeDeleted.getAction(),
|
|
||||||
instantToBeDeleted.getTimestamp());
|
|
||||||
activeTimeline.deletePending(instantToBeDeleted);
|
|
||||||
}
|
|
||||||
LOG.info("Deleted pending commit " + instantToBeDeleted);
|
|
||||||
} else {
|
|
||||||
LOG.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
enum BucketType {
|
enum BucketType {
|
||||||
@@ -296,8 +235,6 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper class for a small file's location and its actual size on disk.
|
* Helper class for a small file's location and its actual size on disk.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -19,22 +19,17 @@
|
|||||||
package org.apache.hudi.table;
|
package org.apache.hudi.table;
|
||||||
|
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
|
||||||
import org.apache.hudi.common.model.FileSlice;
|
|
||||||
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
|
||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
|
||||||
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
import org.apache.hudi.common.util.ValidationUtils;
|
|
||||||
import org.apache.hudi.config.HoodieWriteConfig;
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
import org.apache.hudi.exception.HoodieCompactionException;
|
import org.apache.hudi.exception.HoodieCompactionException;
|
||||||
import org.apache.hudi.exception.HoodieIOException;
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
@@ -46,21 +41,16 @@ import org.apache.hudi.table.action.deltacommit.InsertDeltaCommitActionExecutor;
|
|||||||
import org.apache.hudi.table.action.deltacommit.InsertPreppedDeltaCommitActionExecutor;
|
import org.apache.hudi.table.action.deltacommit.InsertPreppedDeltaCommitActionExecutor;
|
||||||
import org.apache.hudi.table.action.deltacommit.UpsertDeltaCommitActionExecutor;
|
import org.apache.hudi.table.action.deltacommit.UpsertDeltaCommitActionExecutor;
|
||||||
import org.apache.hudi.table.action.deltacommit.UpsertPreppedDeltaCommitActionExecutor;
|
import org.apache.hudi.table.action.deltacommit.UpsertPreppedDeltaCommitActionExecutor;
|
||||||
|
import org.apache.hudi.table.action.restore.MergeOnReadRestoreActionExecutor;
|
||||||
|
import org.apache.hudi.table.action.rollback.MergeOnReadRollbackActionExecutor;
|
||||||
import org.apache.hudi.table.compact.HoodieMergeOnReadTableCompactor;
|
import org.apache.hudi.table.compact.HoodieMergeOnReadTableCompactor;
|
||||||
import org.apache.hudi.table.rollback.RollbackHelper;
|
|
||||||
import org.apache.hudi.table.rollback.RollbackRequest;
|
|
||||||
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UncheckedIOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -172,146 +162,15 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant,
|
public HoodieRollbackMetadata rollback(JavaSparkContext jsc,
|
||||||
boolean deleteInstants) throws IOException {
|
String rollbackInstantTime,
|
||||||
long startTime = System.currentTimeMillis();
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants) {
|
||||||
String commit = instant.getTimestamp();
|
return new MergeOnReadRollbackActionExecutor(jsc, config, this, rollbackInstantTime, commitInstant, deleteInstants).execute();
|
||||||
LOG.error("Rolling back instant " + instant);
|
|
||||||
|
|
||||||
// Atomically un-publish all non-inflight commits
|
|
||||||
if (instant.isCompleted()) {
|
|
||||||
LOG.error("Un-publishing instant " + instant + ", deleteInstants=" + deleteInstants);
|
|
||||||
instant = this.getActiveTimeline().revertToInflight(instant);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<HoodieRollbackStat> allRollbackStats = new ArrayList<>();
|
|
||||||
|
|
||||||
// At the moment, MOR table type does not support bulk nested rollbacks. Nested rollbacks is an experimental
|
|
||||||
// feature that is expensive. To perform nested rollbacks, initiate multiple requests of client.rollback
|
|
||||||
// (commitToRollback).
|
|
||||||
// NOTE {@link HoodieCompactionConfig#withCompactionLazyBlockReadEnabled} needs to be set to TRUE. This is
|
|
||||||
// required to avoid OOM when merging multiple LogBlocks performed during nested rollbacks.
|
|
||||||
// Atomically un-publish all non-inflight commits
|
|
||||||
// Atomically un-publish all non-inflight commits
|
|
||||||
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
|
||||||
// deleting the timeline file
|
|
||||||
if (!instant.isRequested()) {
|
|
||||||
LOG.info("Unpublished " + commit);
|
|
||||||
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instant);
|
|
||||||
// TODO: We need to persist this as rollback workload and use it in case of partial failures
|
|
||||||
allRollbackStats = new RollbackHelper(metaClient, config).performRollback(jsc, instant, rollbackRequests);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete Inflight instants if enabled
|
|
||||||
deleteInflightAndRequestedInstant(deleteInstants, this.getActiveTimeline(), instant);
|
|
||||||
|
|
||||||
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
|
||||||
|
|
||||||
return allRollbackStats;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public HoodieRestoreMetadata restore(JavaSparkContext jsc, String restoreInstantTime, String instantToRestore) {
|
||||||
* Generate all rollback requests that we need to perform for rolling back this action without actually performing
|
return new MergeOnReadRestoreActionExecutor(jsc, config, this, restoreInstantTime, instantToRestore).execute();
|
||||||
* rolling back.
|
|
||||||
*
|
|
||||||
* @param jsc JavaSparkContext
|
|
||||||
* @param instantToRollback Instant to Rollback
|
|
||||||
* @return list of rollback requests
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private List<RollbackRequest> generateRollbackRequests(JavaSparkContext jsc, HoodieInstant instantToRollback)
|
|
||||||
throws IOException {
|
|
||||||
String commit = instantToRollback.getTimestamp();
|
|
||||||
List<String> partitions = FSUtils.getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
|
|
||||||
config.shouldAssumeDatePartitioning());
|
|
||||||
int sparkPartitions = Math.max(Math.min(partitions.size(), config.getRollbackParallelism()), 1);
|
|
||||||
return jsc.parallelize(partitions, Math.min(partitions.size(), sparkPartitions)).flatMap(partitionPath -> {
|
|
||||||
HoodieActiveTimeline activeTimeline = this.getActiveTimeline().reload();
|
|
||||||
List<RollbackRequest> partitionRollbackRequests = new ArrayList<>();
|
|
||||||
switch (instantToRollback.getAction()) {
|
|
||||||
case HoodieTimeline.COMMIT_ACTION:
|
|
||||||
LOG.info(
|
|
||||||
"Rolling back commit action. There are higher delta commits. So only rolling back this instant");
|
|
||||||
partitionRollbackRequests.add(
|
|
||||||
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
|
||||||
break;
|
|
||||||
case HoodieTimeline.COMPACTION_ACTION:
|
|
||||||
// If there is no delta commit present after the current commit (if compaction), no action, else we
|
|
||||||
// need to make sure that a compaction commit rollback also deletes any log files written as part of the
|
|
||||||
// succeeding deltacommit.
|
|
||||||
boolean higherDeltaCommits =
|
|
||||||
!activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1).empty();
|
|
||||||
if (higherDeltaCommits) {
|
|
||||||
// Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
|
|
||||||
// and has not yet finished. In this scenario we should delete only the newly created parquet files
|
|
||||||
// and not corresponding base commit log files created with this as baseCommit since updates would
|
|
||||||
// have been written to the log files.
|
|
||||||
LOG.info("Rolling back compaction. There are higher delta commits. So only deleting data files");
|
|
||||||
partitionRollbackRequests.add(
|
|
||||||
RollbackRequest.createRollbackRequestWithDeleteDataFilesOnlyAction(partitionPath, instantToRollback));
|
|
||||||
} else {
|
|
||||||
// No deltacommits present after this compaction commit (inflight or requested). In this case, we
|
|
||||||
// can also delete any log files that were created with this compaction commit as base
|
|
||||||
// commit.
|
|
||||||
LOG.info("Rolling back compaction plan. There are NO higher delta commits. So deleting both data and"
|
|
||||||
+ " log files");
|
|
||||||
partitionRollbackRequests.add(
|
|
||||||
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case HoodieTimeline.DELTA_COMMIT_ACTION:
|
|
||||||
// --------------------------------------------------------------------------------------------------
|
|
||||||
// (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
|
|
||||||
// --------------------------------------------------------------------------------------------------
|
|
||||||
// (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
|
|
||||||
// this scenario we would want to delete these log files.
|
|
||||||
// (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
|
|
||||||
// HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
|
|
||||||
// (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
|
|
||||||
// being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
|
|
||||||
// and hence will end up deleting these log files. This is done so there are no orphan log files
|
|
||||||
// lying around.
|
|
||||||
// (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
|
|
||||||
// taken in this scenario is a combination of (A.2) and (A.3)
|
|
||||||
// ---------------------------------------------------------------------------------------------------
|
|
||||||
// (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
|
|
||||||
// ---------------------------------------------------------------------------------------------------
|
|
||||||
// (B.1) Failed first commit - Inserts were written to parquet files and HoodieWriteStat has no entries.
|
|
||||||
// In this scenario, we delete all the parquet files written for the failed commit.
|
|
||||||
// (B.2) Failed recurring commits - Inserts were written to parquet files and updates to log files. In
|
|
||||||
// this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
|
|
||||||
// (B.3) Rollback triggered for first commit - Same as (B.1)
|
|
||||||
// (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
|
|
||||||
// as well if the base parquet file gets deleted.
|
|
||||||
try {
|
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
|
||||||
metaClient.getCommitTimeline()
|
|
||||||
.getInstantDetails(
|
|
||||||
new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()))
|
|
||||||
.get(),
|
|
||||||
HoodieCommitMetadata.class);
|
|
||||||
|
|
||||||
// In case all data was inserts and the commit failed, delete the file belonging to that commit
|
|
||||||
// We do not know fileIds for inserts (first inserts are either log files or parquet files),
|
|
||||||
// delete all files for the corresponding failed commit, if present (same as COW)
|
|
||||||
partitionRollbackRequests.add(
|
|
||||||
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
|
||||||
|
|
||||||
// append rollback blocks for updates
|
|
||||||
if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
|
|
||||||
partitionRollbackRequests
|
|
||||||
.addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
} catch (IOException io) {
|
|
||||||
throw new UncheckedIOException("Failed to collect rollback actions for commit " + commit, io);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return partitionRollbackRequests.iterator();
|
|
||||||
}).filter(Objects::nonNull).collect();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -320,39 +179,4 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
|
|||||||
// delegate to base class for MOR tables
|
// delegate to base class for MOR tables
|
||||||
super.finalizeWrite(jsc, instantTs, stats);
|
super.finalizeWrite(jsc, instantTs, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<RollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant,
|
|
||||||
HoodieCommitMetadata commitMetadata) {
|
|
||||||
ValidationUtils.checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
|
|
||||||
|
|
||||||
// wStat.getPrevCommit() might not give the right commit time in the following
|
|
||||||
// scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
|
|
||||||
// used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
|
|
||||||
// But the index (global) might store the baseCommit of the parquet and not the requested, hence get the
|
|
||||||
// baseCommit always by listing the file slice
|
|
||||||
Map<String, String> fileIdToBaseCommitTimeForLogMap = this.getSliceView().getLatestFileSlices(partitionPath)
|
|
||||||
.collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseInstantTime));
|
|
||||||
return commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> {
|
|
||||||
|
|
||||||
// Filter out stats without prevCommit since they are all inserts
|
|
||||||
boolean validForRollback = (wStat != null) && (!wStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
|
|
||||||
&& (wStat.getPrevCommit() != null) && fileIdToBaseCommitTimeForLogMap.containsKey(wStat.getFileId());
|
|
||||||
|
|
||||||
if (validForRollback) {
|
|
||||||
// For sanity, log instant time can never be less than base-commit on which we are rolling back
|
|
||||||
ValidationUtils
|
|
||||||
.checkArgument(HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId()),
|
|
||||||
rollbackInstant.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL));
|
|
||||||
}
|
|
||||||
|
|
||||||
return validForRollback && HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(
|
|
||||||
// Base Ts should be strictly less. If equal (for inserts-to-logs), the caller employs another option
|
|
||||||
// to delete and we should not step on it
|
|
||||||
wStat.getFileId()), rollbackInstant.getTimestamp(), HoodieTimeline.LESSER);
|
|
||||||
}).map(wStat -> {
|
|
||||||
String baseCommitTime = fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId());
|
|
||||||
return RollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath, wStat.getFileId(),
|
|
||||||
baseCommitTime, rollbackInstant);
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,10 +23,11 @@ import org.apache.hadoop.fs.FileSystem;
|
|||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||||
import org.apache.hudi.client.SparkTaskContextSupplier;
|
import org.apache.hudi.client.SparkTaskContextSupplier;
|
||||||
import org.apache.hudi.client.WriteStatus;
|
import org.apache.hudi.client.WriteStatus;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
|
||||||
import org.apache.hudi.common.config.SerializableConfiguration;
|
import org.apache.hudi.common.config.SerializableConfiguration;
|
||||||
import org.apache.hudi.common.fs.ConsistencyGuard;
|
import org.apache.hudi.common.fs.ConsistencyGuard;
|
||||||
import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility;
|
import org.apache.hudi.common.fs.ConsistencyGuard.FileVisibility;
|
||||||
@@ -350,12 +351,27 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
public abstract HoodieCleanMetadata clean(JavaSparkContext jsc, String cleanInstantTime);
|
public abstract HoodieCleanMetadata clean(JavaSparkContext jsc, String cleanInstantTime);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1) Atomically unpublish
|
* Rollback the (inflight/committed) record changes with the given commit time.
|
||||||
* this commit (2) clean indexing data (3) clean new generated parquet files / log blocks (4) Finally, delete
|
* <pre>
|
||||||
* .<action>.commit or .<action>.inflight file if deleteInstants = true
|
* Three steps:
|
||||||
|
* (1) Atomically unpublish this commit
|
||||||
|
* (2) clean indexing data
|
||||||
|
* (3) clean new generated parquet files.
|
||||||
|
* (4) Finally delete .commit or .inflight file, if deleteInstants = true
|
||||||
|
* </pre>
|
||||||
*/
|
*/
|
||||||
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
|
public abstract HoodieRollbackMetadata rollback(JavaSparkContext jsc,
|
||||||
throws IOException;
|
String rollbackInstantTime,
|
||||||
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restore the table to the given instant. Note that this is a admin table recovery operation
|
||||||
|
* that would cause any running queries that are accessing file slices written after the instant to fail.
|
||||||
|
*/
|
||||||
|
public abstract HoodieRestoreMetadata restore(JavaSparkContext jsc,
|
||||||
|
String restoreInstantTime,
|
||||||
|
String instantToRestore);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finalize the written data onto storage. Perform any final cleanups.
|
* Finalize the written data onto storage. Perform any final cleanups.
|
||||||
@@ -364,8 +380,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
* @param stats List of HoodieWriteStats
|
* @param stats List of HoodieWriteStats
|
||||||
* @throws HoodieIOException if some paths can't be finalized on storage
|
* @throws HoodieIOException if some paths can't be finalized on storage
|
||||||
*/
|
*/
|
||||||
public void finalizeWrite(JavaSparkContext jsc, String instantTs, List<HoodieWriteStat> stats)
|
public void finalizeWrite(JavaSparkContext jsc, String instantTs, List<HoodieWriteStat> stats) throws HoodieIOException {
|
||||||
throws HoodieIOException {
|
|
||||||
cleanFailedWrites(jsc, instantTs, stats, config.getConsistencyGuardConfig().isConsistencyCheckEnabled());
|
cleanFailedWrites(jsc, instantTs, stats, config.getConsistencyGuardConfig().isConsistencyCheckEnabled());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,7 +389,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
*
|
*
|
||||||
* @param instantTs Instant Time
|
* @param instantTs Instant Time
|
||||||
*/
|
*/
|
||||||
protected void deleteMarkerDir(String instantTs) {
|
public void deleteMarkerDir(String instantTs) {
|
||||||
try {
|
try {
|
||||||
FileSystem fs = getMetaClient().getFs();
|
FileSystem fs = getMetaClient().getFs();
|
||||||
Path markerDir = new Path(metaClient.getMarkerFolderPath(instantTs));
|
Path markerDir = new Path(metaClient.getMarkerFolderPath(instantTs));
|
||||||
@@ -473,8 +488,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
* @param groupByPartition Files grouped by partition
|
* @param groupByPartition Files grouped by partition
|
||||||
* @param visibility Appear/Disappear
|
* @param visibility Appear/Disappear
|
||||||
*/
|
*/
|
||||||
private void waitForAllFiles(JavaSparkContext jsc, Map<String, List<Pair<String, String>>> groupByPartition,
|
private void waitForAllFiles(JavaSparkContext jsc, Map<String, List<Pair<String, String>>> groupByPartition, FileVisibility visibility) {
|
||||||
FileVisibility visibility) {
|
|
||||||
// This will either ensure all files to be deleted are present.
|
// This will either ensure all files to be deleted are present.
|
||||||
boolean checkPassed =
|
boolean checkPassed =
|
||||||
jsc.parallelize(new ArrayList<>(groupByPartition.entrySet()), config.getFinalizeWriteParallelism())
|
jsc.parallelize(new ArrayList<>(groupByPartition.entrySet()), config.getFinalizeWriteParallelism())
|
||||||
@@ -486,8 +500,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean waitForCondition(String partitionPath, Stream<Pair<String, String>> partitionFilePaths,
|
private boolean waitForCondition(String partitionPath, Stream<Pair<String, String>> partitionFilePaths, FileVisibility visibility) {
|
||||||
FileVisibility visibility) {
|
|
||||||
final FileSystem fileSystem = metaClient.getRawFs();
|
final FileSystem fileSystem = metaClient.getRawFs();
|
||||||
List<String> fileList = partitionFilePaths.map(Pair::getValue).collect(Collectors.toList());
|
List<String> fileList = partitionFilePaths.map(Pair::getValue).collect(Collectors.toList());
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -57,6 +57,10 @@ public class CleanActionExecutor extends BaseActionExecutor<HoodieCleanMetadata>
|
|||||||
private static final long serialVersionUID = 1L;
|
private static final long serialVersionUID = 1L;
|
||||||
private static final Logger LOG = LogManager.getLogger(CleanActionExecutor.class);
|
private static final Logger LOG = LogManager.getLogger(CleanActionExecutor.class);
|
||||||
|
|
||||||
|
public CleanActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config, HoodieTable<?> table, String instantTime) {
|
||||||
|
super(jsc, config, table, instantTime);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates List of files to be cleaned.
|
* Generates List of files to be cleaned.
|
||||||
*
|
*
|
||||||
@@ -172,10 +176,6 @@ public class CleanActionExecutor extends BaseActionExecutor<HoodieCleanMetadata>
|
|||||||
}).collect(Collectors.toList());
|
}).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
public CleanActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config, HoodieTable<?> table, String instantTime) {
|
|
||||||
super(jsc, config, table, instantTime);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a Cleaner plan if there are files to be cleaned and stores them in instant file.
|
* Creates a Cleaner plan if there are files to be cleaned and stores them in instant file.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -0,0 +1,111 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.restore;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||||
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.exception.HoodieRollbackException;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public abstract class BaseRestoreActionExecutor extends BaseActionExecutor<HoodieRestoreMetadata> {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(BaseRestoreActionExecutor.class);
|
||||||
|
|
||||||
|
private final String restoreInstantTime;
|
||||||
|
|
||||||
|
public BaseRestoreActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
String restoreInstantTime) {
|
||||||
|
super(jsc, config, table, instantTime);
|
||||||
|
this.restoreInstantTime = restoreInstantTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieRestoreMetadata execute() {
|
||||||
|
HoodieTimer restoreTimer = new HoodieTimer();
|
||||||
|
restoreTimer.startTimer();
|
||||||
|
|
||||||
|
// Get all the commits on the timeline after the provided commit time
|
||||||
|
List<HoodieInstant> instantsToRollback = table.getActiveTimeline().getCommitsAndCompactionTimeline()
|
||||||
|
.getReverseOrderedInstants()
|
||||||
|
.filter(instant -> HoodieActiveTimeline.GREATER.test(instant.getTimestamp(), restoreInstantTime))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
Map<String, List<HoodieRollbackMetadata>> instantToMetadata = new HashMap<>();
|
||||||
|
table.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, instantTime));
|
||||||
|
instantsToRollback.forEach(instant -> {
|
||||||
|
instantToMetadata.put(instant.getTimestamp(), Collections.singletonList(rollbackInstant(instant)));
|
||||||
|
LOG.info("Deleted instant " + instant);
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
return finishRestore(instantToMetadata,
|
||||||
|
instantsToRollback.stream().map(HoodieInstant::getTimestamp).collect(Collectors.toList()),
|
||||||
|
restoreTimer.endTimer()
|
||||||
|
);
|
||||||
|
} catch (IOException io) {
|
||||||
|
throw new HoodieRollbackException("unable to rollback instants " + instantsToRollback, io);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract HoodieRollbackMetadata rollbackInstant(HoodieInstant rollbackInstant);
|
||||||
|
|
||||||
|
private HoodieRestoreMetadata finishRestore(Map<String, List<HoodieRollbackMetadata>> instantToMetadata,
|
||||||
|
List<String> instantsRolledBack,
|
||||||
|
long durationInMs) throws IOException {
|
||||||
|
|
||||||
|
HoodieRestoreMetadata restoreMetadata = TimelineMetadataUtils.convertRestoreMetadata(
|
||||||
|
instantTime, durationInMs, instantsRolledBack, instantToMetadata);
|
||||||
|
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, instantTime),
|
||||||
|
TimelineMetadataUtils.serializeRestoreMetadata(restoreMetadata));
|
||||||
|
LOG.info("Commits " + instantsRolledBack + " rollback is complete. Restored table to " + restoreInstantTime);
|
||||||
|
|
||||||
|
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
||||||
|
LOG.info("Cleaning up older restore meta files");
|
||||||
|
// Cleanup of older cleaner meta files
|
||||||
|
// TODO - make the commit archival generic and archive rollback metadata
|
||||||
|
FSUtils.deleteOlderRollbackMetaFiles(
|
||||||
|
table.getMetaClient().getFs(),
|
||||||
|
table.getMetaClient().getMetaPath(),
|
||||||
|
table.getActiveTimeline().getRestoreTimeline().getInstants()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return restoreMetadata;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.restore;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.exception.HoodieRollbackException;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
public class CopyOnWriteRestoreActionExecutor extends BaseRestoreActionExecutor {
|
||||||
|
|
||||||
|
public CopyOnWriteRestoreActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
String restoreInstantTime) {
|
||||||
|
super(jsc, config, table, instantTime, restoreInstantTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected HoodieRollbackMetadata rollbackInstant(HoodieInstant instantToRollback) {
|
||||||
|
table.getMetaClient().reloadActiveTimeline();
|
||||||
|
CopyOnWriteRollbackActionExecutor rollbackActionExecutor = new CopyOnWriteRollbackActionExecutor(
|
||||||
|
jsc,
|
||||||
|
config,
|
||||||
|
table,
|
||||||
|
HoodieActiveTimeline.createNewInstantTime(),
|
||||||
|
instantToRollback,
|
||||||
|
true,
|
||||||
|
true);
|
||||||
|
if (!instantToRollback.getAction().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||||
|
throw new HoodieRollbackException("Unsupported action in rollback instant:" + instantToRollback);
|
||||||
|
}
|
||||||
|
return rollbackActionExecutor.execute();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.restore;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.hudi.table.action.rollback.MergeOnReadRollbackActionExecutor;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
public class MergeOnReadRestoreActionExecutor extends BaseRestoreActionExecutor {
|
||||||
|
|
||||||
|
public MergeOnReadRestoreActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
String restoreInstantTime) {
|
||||||
|
super(jsc, config, table, instantTime, restoreInstantTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected HoodieRollbackMetadata rollbackInstant(HoodieInstant instantToRollback) {
|
||||||
|
table.getMetaClient().reloadActiveTimeline();
|
||||||
|
MergeOnReadRollbackActionExecutor rollbackActionExecutor = new MergeOnReadRollbackActionExecutor(
|
||||||
|
jsc,
|
||||||
|
config,
|
||||||
|
table,
|
||||||
|
HoodieActiveTimeline.createNewInstantTime(),
|
||||||
|
instantToRollback,
|
||||||
|
true,
|
||||||
|
true);
|
||||||
|
|
||||||
|
switch (instantToRollback.getAction()) {
|
||||||
|
case HoodieTimeline.COMMIT_ACTION:
|
||||||
|
case HoodieTimeline.DELTA_COMMIT_ACTION:
|
||||||
|
case HoodieTimeline.COMPACTION_ACTION:
|
||||||
|
// TODO : Get file status and create a rollback stat and file
|
||||||
|
// TODO : Delete the .aux files along with the instant file, okay for now since the archival process will
|
||||||
|
// delete these files when it does not see a corresponding instant file under .hoodie
|
||||||
|
return rollbackActionExecutor.execute();
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException("invalid action name " + instantToRollback.getAction());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,192 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.rollback;
|
||||||
|
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
|
||||||
|
import org.apache.hudi.common.util.HoodieTimer;
|
||||||
|
import org.apache.hudi.common.util.Option;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
import org.apache.hudi.exception.HoodieRollbackException;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public abstract class BaseRollbackActionExecutor extends BaseActionExecutor<HoodieRollbackMetadata> {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(BaseRollbackActionExecutor.class);
|
||||||
|
|
||||||
|
protected final HoodieInstant instantToRollback;
|
||||||
|
protected final boolean deleteInstants;
|
||||||
|
protected final boolean skipTimelinePublish;
|
||||||
|
|
||||||
|
public BaseRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant instantToRollback,
|
||||||
|
boolean deleteInstants) {
|
||||||
|
this(jsc, config, table, instantTime, instantToRollback, deleteInstants, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BaseRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant instantToRollback,
|
||||||
|
boolean deleteInstants,
|
||||||
|
boolean skipTimelinePublish) {
|
||||||
|
super(jsc, config, table, instantTime);
|
||||||
|
this.instantToRollback = instantToRollback;
|
||||||
|
this.deleteInstants = deleteInstants;
|
||||||
|
this.skipTimelinePublish = skipTimelinePublish;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract List<HoodieRollbackStat> executeRollback() throws IOException;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HoodieRollbackMetadata execute() {
|
||||||
|
HoodieTimer rollbackTimer = new HoodieTimer().startTimer();
|
||||||
|
HoodieRollbackMetadata rollbackMetadata = TimelineMetadataUtils.convertRollbackMetadata(
|
||||||
|
instantTime,
|
||||||
|
Option.of(rollbackTimer.endTimer()),
|
||||||
|
Collections.singletonList(instantToRollback.getTimestamp()),
|
||||||
|
doRollbackAndGetStats());
|
||||||
|
if (!skipTimelinePublish) {
|
||||||
|
finishRollback(rollbackMetadata);
|
||||||
|
}
|
||||||
|
return rollbackMetadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<HoodieRollbackStat> doRollbackAndGetStats() {
|
||||||
|
final String instantTimeToRollback = instantToRollback.getTimestamp();
|
||||||
|
final boolean isPendingCompaction = Objects.equals(HoodieTimeline.COMPACTION_ACTION, instantToRollback.getAction())
|
||||||
|
&& !instantToRollback.isCompleted();
|
||||||
|
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
|
||||||
|
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
|
||||||
|
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
||||||
|
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants()
|
||||||
|
.map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
savepoints.forEach(s -> {
|
||||||
|
if (s.contains(instantTimeToRollback)) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Could not rollback a savepointed commit. Delete savepoint first before rolling back" + s);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
|
||||||
|
LOG.info("No commits to rollback " + instantTimeToRollback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure only the last n commits are being rolled back
|
||||||
|
// If there is a commit in-between or after that is not rolled back, then abort
|
||||||
|
if (!isPendingCompaction) {
|
||||||
|
if ((instantTimeToRollback != null) && !commitTimeline.empty()
|
||||||
|
&& !commitTimeline.findInstantsAfter(instantTimeToRollback, Integer.MAX_VALUE).empty()) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Found commits after time :" + instantTimeToRollback + ", please rollback greater commits first");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if ((instantTimeToRollback != null) && !inflights.isEmpty()
|
||||||
|
&& (inflights.indexOf(instantTimeToRollback) != inflights.size() - 1)) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Found in-flight commits after time :" + instantTimeToRollback + ", please rollback greater commits first");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<HoodieRollbackStat> stats = executeRollback();
|
||||||
|
LOG.info("Rolled back inflight instant " + instantTimeToRollback);
|
||||||
|
if (!isPendingCompaction) {
|
||||||
|
if (!table.getIndex().rollbackCommit(instantTimeToRollback)) {
|
||||||
|
throw new HoodieRollbackException("Rollback index changes failed, for time :" + instantTimeToRollback);
|
||||||
|
}
|
||||||
|
LOG.info("Index rolled back for commits " + instantTimeToRollback);
|
||||||
|
}
|
||||||
|
return stats;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Unable to execute rollback ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void finishRollback(HoodieRollbackMetadata rollbackMetadata) throws HoodieIOException {
|
||||||
|
try {
|
||||||
|
table.getActiveTimeline().createNewInstant(
|
||||||
|
new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION, instantTime));
|
||||||
|
table.getActiveTimeline().saveAsComplete(
|
||||||
|
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, instantTime),
|
||||||
|
TimelineMetadataUtils.serializeRollbackMetadata(rollbackMetadata));
|
||||||
|
LOG.info("Rollback of Commits " + rollbackMetadata.getCommitsRollback() + " is complete");
|
||||||
|
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
||||||
|
LOG.info("Cleaning up older rollback meta files");
|
||||||
|
FSUtils.deleteOlderRollbackMetaFiles(table.getMetaClient().getFs(),
|
||||||
|
table.getMetaClient().getMetaPath(),
|
||||||
|
table.getActiveTimeline().getRollbackTimeline().getInstants());
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("Error executing rollback at instant " + instantTime, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete Inflight instant if enabled.
|
||||||
|
*
|
||||||
|
* @param deleteInstant Enable Deletion of Inflight instant
|
||||||
|
* @param activeTimeline Hoodie active timeline
|
||||||
|
* @param instantToBeDeleted Instant to be deleted
|
||||||
|
*/
|
||||||
|
protected void deleteInflightAndRequestedInstant(boolean deleteInstant,
|
||||||
|
HoodieActiveTimeline activeTimeline,
|
||||||
|
HoodieInstant instantToBeDeleted) {
|
||||||
|
// Remove marker files always on rollback
|
||||||
|
table.deleteMarkerDir(instantToBeDeleted.getTimestamp());
|
||||||
|
|
||||||
|
// Remove the rolled back inflight commits
|
||||||
|
if (deleteInstant) {
|
||||||
|
LOG.info("Deleting instant=" + instantToBeDeleted);
|
||||||
|
activeTimeline.deletePending(instantToBeDeleted);
|
||||||
|
if (instantToBeDeleted.isInflight() && !table.getMetaClient().getTimelineLayoutVersion().isNullVersion()) {
|
||||||
|
// Delete corresponding requested instant
|
||||||
|
instantToBeDeleted = new HoodieInstant(HoodieInstant.State.REQUESTED, instantToBeDeleted.getAction(),
|
||||||
|
instantToBeDeleted.getTimestamp());
|
||||||
|
activeTimeline.deletePending(instantToBeDeleted);
|
||||||
|
}
|
||||||
|
LOG.info("Deleted pending commit " + instantToBeDeleted);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.rollback;
|
||||||
|
|
||||||
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class CopyOnWriteRollbackActionExecutor extends BaseRollbackActionExecutor {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(CopyOnWriteRollbackActionExecutor.class);
|
||||||
|
|
||||||
|
public CopyOnWriteRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants) {
|
||||||
|
super(jsc, config, table, instantTime, commitInstant, deleteInstants);
|
||||||
|
}
|
||||||
|
|
||||||
|
public CopyOnWriteRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants,
|
||||||
|
boolean skipTimelinePublish) {
|
||||||
|
super(jsc, config, table, instantTime, commitInstant, deleteInstants, skipTimelinePublish);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<HoodieRollbackStat> executeRollback() throws IOException {
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
List<HoodieRollbackStat> stats = new ArrayList<>();
|
||||||
|
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||||
|
HoodieInstant resolvedInstant = instantToRollback;
|
||||||
|
|
||||||
|
if (instantToRollback.isCompleted()) {
|
||||||
|
LOG.info("Unpublishing instant " + instantToRollback);
|
||||||
|
resolvedInstant = activeTimeline.revertToInflight(instantToRollback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
||||||
|
// deleting the timeline file
|
||||||
|
if (!resolvedInstant.isRequested()) {
|
||||||
|
// delete all the data files for this commit
|
||||||
|
LOG.info("Clean out all parquet files generated for commit: " + resolvedInstant);
|
||||||
|
List<RollbackRequest> rollbackRequests = generateRollbackRequests(resolvedInstant);
|
||||||
|
|
||||||
|
//TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||||
|
stats = new RollbackHelper(table.getMetaClient(), config).performRollback(jsc, resolvedInstant, rollbackRequests);
|
||||||
|
}
|
||||||
|
// Delete Inflight instant if enabled
|
||||||
|
deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, resolvedInstant);
|
||||||
|
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback)
|
||||||
|
throws IOException {
|
||||||
|
return FSUtils.getAllPartitionPaths(table.getMetaClient().getFs(), table.getMetaClient().getBasePath(),
|
||||||
|
config.shouldAssumeDatePartitioning()).stream()
|
||||||
|
.map(partitionPath -> RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,241 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi.table.action.rollback;
|
||||||
|
|
||||||
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
|
import org.apache.hudi.common.model.FileSlice;
|
||||||
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||||
|
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.util.ValidationUtils;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class MergeOnReadRollbackActionExecutor extends BaseRollbackActionExecutor {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(MergeOnReadRollbackActionExecutor.class);
|
||||||
|
|
||||||
|
public MergeOnReadRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants) {
|
||||||
|
super(jsc, config, table, instantTime, commitInstant, deleteInstants);
|
||||||
|
}
|
||||||
|
|
||||||
|
public MergeOnReadRollbackActionExecutor(JavaSparkContext jsc,
|
||||||
|
HoodieWriteConfig config,
|
||||||
|
HoodieTable<?> table,
|
||||||
|
String instantTime,
|
||||||
|
HoodieInstant commitInstant,
|
||||||
|
boolean deleteInstants,
|
||||||
|
boolean skipTimelinePublish) {
|
||||||
|
super(jsc, config, table, instantTime, commitInstant, deleteInstants, skipTimelinePublish);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<HoodieRollbackStat> executeRollback() throws IOException {
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
LOG.error("Rolling back instant " + instantToRollback.getTimestamp());
|
||||||
|
|
||||||
|
HoodieInstant resolvedInstant = instantToRollback;
|
||||||
|
// Atomically un-publish all non-inflight commits
|
||||||
|
if (instantToRollback.isCompleted()) {
|
||||||
|
LOG.error("Un-publishing instant " + instantToRollback + ", deleteInstants=" + deleteInstants);
|
||||||
|
resolvedInstant = table.getActiveTimeline().revertToInflight(instantToRollback);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<HoodieRollbackStat> allRollbackStats = new ArrayList<>();
|
||||||
|
|
||||||
|
// At the moment, MOR table type does not support bulk nested rollbacks. Nested rollbacks is an experimental
|
||||||
|
// feature that is expensive. To perform nested rollbacks, initiate multiple requests of client.rollback
|
||||||
|
// (commitToRollback).
|
||||||
|
// NOTE {@link HoodieCompactionConfig#withCompactionLazyBlockReadEnabled} needs to be set to TRUE. This is
|
||||||
|
// required to avoid OOM when merging multiple LogBlocks performed during nested rollbacks.
|
||||||
|
// Atomically un-publish all non-inflight commits
|
||||||
|
// Atomically un-publish all non-inflight commits
|
||||||
|
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
|
||||||
|
// deleting the timeline file
|
||||||
|
if (!resolvedInstant.isRequested()) {
|
||||||
|
LOG.info("Unpublished " + resolvedInstant);
|
||||||
|
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, resolvedInstant);
|
||||||
|
// TODO: We need to persist this as rollback workload and use it in case of partial failures
|
||||||
|
allRollbackStats = new RollbackHelper(table.getMetaClient(), config).performRollback(jsc, resolvedInstant, rollbackRequests);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete Inflight instants if enabled
|
||||||
|
deleteInflightAndRequestedInstant(deleteInstants, table.getActiveTimeline(), resolvedInstant);
|
||||||
|
|
||||||
|
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
|
||||||
|
|
||||||
|
return allRollbackStats;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate all rollback requests that we need to perform for rolling back this action without actually performing
|
||||||
|
* rolling back.
|
||||||
|
*
|
||||||
|
* @param jsc JavaSparkContext
|
||||||
|
* @param instantToRollback Instant to Rollback
|
||||||
|
* @return list of rollback requests
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private List<RollbackRequest> generateRollbackRequests(JavaSparkContext jsc, HoodieInstant instantToRollback)
|
||||||
|
throws IOException {
|
||||||
|
String commit = instantToRollback.getTimestamp();
|
||||||
|
List<String> partitions = FSUtils.getAllPartitionPaths(table.getMetaClient().getFs(), table.getMetaClient().getBasePath(),
|
||||||
|
config.shouldAssumeDatePartitioning());
|
||||||
|
int sparkPartitions = Math.max(Math.min(partitions.size(), config.getRollbackParallelism()), 1);
|
||||||
|
return jsc.parallelize(partitions, Math.min(partitions.size(), sparkPartitions)).flatMap(partitionPath -> {
|
||||||
|
HoodieActiveTimeline activeTimeline = table.getActiveTimeline().reload();
|
||||||
|
List<RollbackRequest> partitionRollbackRequests = new ArrayList<>();
|
||||||
|
switch (instantToRollback.getAction()) {
|
||||||
|
case HoodieTimeline.COMMIT_ACTION:
|
||||||
|
LOG.info("Rolling back commit action. There are higher delta commits. So only rolling back this instant");
|
||||||
|
partitionRollbackRequests.add(
|
||||||
|
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
||||||
|
break;
|
||||||
|
case HoodieTimeline.COMPACTION_ACTION:
|
||||||
|
// If there is no delta commit present after the current commit (if compaction), no action, else we
|
||||||
|
// need to make sure that a compaction commit rollback also deletes any log files written as part of the
|
||||||
|
// succeeding deltacommit.
|
||||||
|
boolean higherDeltaCommits =
|
||||||
|
!activeTimeline.getDeltaCommitTimeline().filterCompletedInstants().findInstantsAfter(commit, 1).empty();
|
||||||
|
if (higherDeltaCommits) {
|
||||||
|
// Rollback of a compaction action with no higher deltacommit means that the compaction is scheduled
|
||||||
|
// and has not yet finished. In this scenario we should delete only the newly created parquet files
|
||||||
|
// and not corresponding base commit log files created with this as baseCommit since updates would
|
||||||
|
// have been written to the log files.
|
||||||
|
LOG.info("Rolling back compaction. There are higher delta commits. So only deleting data files");
|
||||||
|
partitionRollbackRequests.add(
|
||||||
|
RollbackRequest.createRollbackRequestWithDeleteDataFilesOnlyAction(partitionPath, instantToRollback));
|
||||||
|
} else {
|
||||||
|
// No deltacommits present after this compaction commit (inflight or requested). In this case, we
|
||||||
|
// can also delete any log files that were created with this compaction commit as base
|
||||||
|
// commit.
|
||||||
|
LOG.info("Rolling back compaction plan. There are NO higher delta commits. So deleting both data and"
|
||||||
|
+ " log files");
|
||||||
|
partitionRollbackRequests.add(
|
||||||
|
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HoodieTimeline.DELTA_COMMIT_ACTION:
|
||||||
|
// --------------------------------------------------------------------------------------------------
|
||||||
|
// (A) The following cases are possible if index.canIndexLogFiles and/or index.isGlobal
|
||||||
|
// --------------------------------------------------------------------------------------------------
|
||||||
|
// (A.1) Failed first commit - Inserts were written to log files and HoodieWriteStat has no entries. In
|
||||||
|
// this scenario we would want to delete these log files.
|
||||||
|
// (A.2) Failed recurring commit - Inserts/Updates written to log files. In this scenario,
|
||||||
|
// HoodieWriteStat will have the baseCommitTime for the first log file written, add rollback blocks.
|
||||||
|
// (A.3) Rollback triggered for first commit - Inserts were written to the log files but the commit is
|
||||||
|
// being reverted. In this scenario, HoodieWriteStat will be `null` for the attribute prevCommitTime and
|
||||||
|
// and hence will end up deleting these log files. This is done so there are no orphan log files
|
||||||
|
// lying around.
|
||||||
|
// (A.4) Rollback triggered for recurring commits - Inserts/Updates are being rolled back, the actions
|
||||||
|
// taken in this scenario is a combination of (A.2) and (A.3)
|
||||||
|
// ---------------------------------------------------------------------------------------------------
|
||||||
|
// (B) The following cases are possible if !index.canIndexLogFiles and/or !index.isGlobal
|
||||||
|
// ---------------------------------------------------------------------------------------------------
|
||||||
|
// (B.1) Failed first commit - Inserts were written to parquet files and HoodieWriteStat has no entries.
|
||||||
|
// In this scenario, we delete all the parquet files written for the failed commit.
|
||||||
|
// (B.2) Failed recurring commits - Inserts were written to parquet files and updates to log files. In
|
||||||
|
// this scenario, perform (A.1) and for updates written to log files, write rollback blocks.
|
||||||
|
// (B.3) Rollback triggered for first commit - Same as (B.1)
|
||||||
|
// (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
|
||||||
|
// as well if the base parquet file gets deleted.
|
||||||
|
try {
|
||||||
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
||||||
|
table.getMetaClient().getCommitTimeline()
|
||||||
|
.getInstantDetails(
|
||||||
|
new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()))
|
||||||
|
.get(),
|
||||||
|
HoodieCommitMetadata.class);
|
||||||
|
|
||||||
|
// In case all data was inserts and the commit failed, delete the file belonging to that commit
|
||||||
|
// We do not know fileIds for inserts (first inserts are either log files or parquet files),
|
||||||
|
// delete all files for the corresponding failed commit, if present (same as COW)
|
||||||
|
partitionRollbackRequests.add(
|
||||||
|
RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback));
|
||||||
|
|
||||||
|
// append rollback blocks for updates
|
||||||
|
if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
|
||||||
|
partitionRollbackRequests
|
||||||
|
.addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} catch (IOException io) {
|
||||||
|
throw new UncheckedIOException("Failed to collect rollback actions for commit " + commit, io);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return partitionRollbackRequests.iterator();
|
||||||
|
}).filter(Objects::nonNull).collect();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant,
|
||||||
|
HoodieCommitMetadata commitMetadata) {
|
||||||
|
ValidationUtils.checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
|
||||||
|
|
||||||
|
// wStat.getPrevCommit() might not give the right commit time in the following
|
||||||
|
// scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
|
||||||
|
// used to write the new log files. In this case, the commit time for the log file is the compaction requested time.
|
||||||
|
// But the index (global) might store the baseCommit of the parquet and not the requested, hence get the
|
||||||
|
// baseCommit always by listing the file slice
|
||||||
|
Map<String, String> fileIdToBaseCommitTimeForLogMap = table.getSliceView().getLatestFileSlices(partitionPath)
|
||||||
|
.collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseInstantTime));
|
||||||
|
return commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> {
|
||||||
|
|
||||||
|
// Filter out stats without prevCommit since they are all inserts
|
||||||
|
boolean validForRollback = (wStat != null) && (!wStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
|
||||||
|
&& (wStat.getPrevCommit() != null) && fileIdToBaseCommitTimeForLogMap.containsKey(wStat.getFileId());
|
||||||
|
|
||||||
|
if (validForRollback) {
|
||||||
|
// For sanity, log instant time can never be less than base-commit on which we are rolling back
|
||||||
|
ValidationUtils
|
||||||
|
.checkArgument(HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId()),
|
||||||
|
rollbackInstant.getTimestamp(), HoodieTimeline.LESSER_OR_EQUAL));
|
||||||
|
}
|
||||||
|
|
||||||
|
return validForRollback && HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(
|
||||||
|
// Base Ts should be strictly less. If equal (for inserts-to-logs), the caller employs another option
|
||||||
|
// to delete and we should not step on it
|
||||||
|
wStat.getFileId()), rollbackInstant.getTimestamp(), HoodieTimeline.LESSER);
|
||||||
|
}).map(wStat -> {
|
||||||
|
String baseCommitTime = fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId());
|
||||||
|
return RollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath, wStat.getFileId(),
|
||||||
|
baseCommitTime, rollbackInstant);
|
||||||
|
}).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hudi.table.rollback;
|
package org.apache.hudi.table.action.rollback;
|
||||||
|
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
import org.apache.hudi.common.fs.FSUtils;
|
import org.apache.hudi.common.fs.FSUtils;
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.hudi.table.rollback;
|
package org.apache.hudi.table.action.rollback;
|
||||||
|
|
||||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
import org.apache.hudi.common.util.Option;
|
import org.apache.hudi.common.util.Option;
|
||||||
@@ -132,7 +132,7 @@ public class TestClientRollback extends TestHoodieClientBase {
|
|||||||
|
|
||||||
// rolling back to a non existent savepoint must not succeed
|
// rolling back to a non existent savepoint must not succeed
|
||||||
try {
|
try {
|
||||||
client.rollbackToSavepoint("001");
|
client.restoreToSavepoint("001");
|
||||||
fail("Rolling back to non-existent savepoint should not be allowed");
|
fail("Rolling back to non-existent savepoint should not be allowed");
|
||||||
} catch (HoodieRollbackException e) {
|
} catch (HoodieRollbackException e) {
|
||||||
// this is good
|
// this is good
|
||||||
@@ -140,24 +140,18 @@ public class TestClientRollback extends TestHoodieClientBase {
|
|||||||
|
|
||||||
// rollback to savepoint 002
|
// rollback to savepoint 002
|
||||||
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
|
HoodieInstant savepoint = table.getCompletedSavepointTimeline().getInstants().findFirst().get();
|
||||||
client.rollbackToSavepoint(savepoint.getTimestamp());
|
client.restoreToSavepoint(savepoint.getTimestamp());
|
||||||
|
|
||||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||||
table = HoodieTable.create(metaClient, getConfig(), jsc);
|
table = HoodieTable.create(metaClient, getConfig(), jsc);
|
||||||
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
|
final BaseFileOnlyView view3 = table.getBaseFileOnlyView();
|
||||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"))).collect(Collectors.toList());
|
||||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("002"));
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
|
assertEquals("The data files for commit 002 be available", 3, dataFiles.size());
|
||||||
|
|
||||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"))).collect(Collectors.toList());
|
||||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("003"));
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
|
assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size());
|
||||||
|
|
||||||
dataFiles = partitionPaths.stream().flatMap(s -> {
|
dataFiles = partitionPaths.stream().flatMap(s -> view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"))).collect(Collectors.toList());
|
||||||
return view3.getAllBaseFiles(s).filter(f -> f.getCommitTime().equals("004"));
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
|
assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -900,7 +900,7 @@ public class TestCleaner extends TestHoodieClientBase {
|
|||||||
table.getActiveTimeline().transitionRequestedToInflight(
|
table.getActiveTimeline().transitionRequestedToInflight(
|
||||||
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty());
|
new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "000"), Option.empty());
|
||||||
metaClient.reloadActiveTimeline();
|
metaClient.reloadActiveTimeline();
|
||||||
table.rollback(jsc, new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true);
|
table.rollback(jsc, "001", new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "000"), true);
|
||||||
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
|
assertEquals("All temp files are deleted.", 0, getTotalTempFiles());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -51,15 +51,12 @@ public class TimelineMetadataUtils {
|
|||||||
|
|
||||||
private static final Integer DEFAULT_VERSION = 1;
|
private static final Integer DEFAULT_VERSION = 1;
|
||||||
|
|
||||||
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime, Option<Long> durationInMs,
|
public static HoodieRestoreMetadata convertRestoreMetadata(String startRestoreTime,
|
||||||
List<String> commits, Map<String, List<HoodieRollbackStat>> commitToStats) {
|
long durationInMs,
|
||||||
Map<String, List<HoodieRollbackMetadata>> commitToStatsMap = new HashMap<>();
|
List<String> commits,
|
||||||
for (Map.Entry<String, List<HoodieRollbackStat>> commitToStat : commitToStats.entrySet()) {
|
Map<String, List<HoodieRollbackMetadata>> instantToRollbackMetadata) {
|
||||||
commitToStatsMap.put(commitToStat.getKey(),
|
return new HoodieRestoreMetadata(startRestoreTime, durationInMs, commits,
|
||||||
Collections.singletonList(convertRollbackMetadata(startRestoreTime, durationInMs, commits, commitToStat.getValue())));
|
Collections.unmodifiableMap(instantToRollbackMetadata), DEFAULT_VERSION);
|
||||||
}
|
|
||||||
return new HoodieRestoreMetadata(startRestoreTime, durationInMs.orElseGet(() -> -1L), commits,
|
|
||||||
Collections.unmodifiableMap(commitToStatsMap), DEFAULT_VERSION);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime, Option<Long> durationInMs,
|
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime, Option<Long> durationInMs,
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileStatus;
|
|||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@@ -115,8 +116,8 @@ public class TestInLineFileSystem {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Test
|
@Test
|
||||||
// Disabling flaky test for now https://issues.apache.org/jira/browse/HUDI-786
|
@Ignore // Disabling flaky test for now https://issues.apache.org/jira/browse/HUDI-786
|
||||||
public void testFileSystemApis() throws IOException {
|
public void testFileSystemApis() throws IOException {
|
||||||
OuterPathInfo outerPathInfo = generateOuterFileAndGetInfo(1000);
|
OuterPathInfo outerPathInfo = generateOuterFileAndGetInfo(1000);
|
||||||
Path inlinePath = FileSystemTestUtils.getPhantomFile(outerPathInfo.outerPath, outerPathInfo.startOffset, outerPathInfo.length);
|
Path inlinePath = FileSystemTestUtils.getPhantomFile(outerPathInfo.outerPath, outerPathInfo.startOffset, outerPathInfo.length);
|
||||||
|
|||||||
Reference in New Issue
Block a user