1
0

[HUDI-308] Avoid Renames for tracking state transitions of all actions on dataset

This commit is contained in:
Balaji Varadarajan
2019-12-04 01:02:17 -08:00
committed by Balaji Varadarajan
parent 8963a68e6a
commit 9a1f698eef
47 changed files with 1121 additions and 403 deletions

View File

@@ -179,9 +179,10 @@ public class CompactionAdminClient extends AbstractHoodieClient {
// revert if in inflight state
metaClient.getActiveTimeline().revertCompactionInflightToRequested(inflight);
}
// Overwrite compaction plan with updated info
metaClient.getActiveTimeline().saveToCompactionRequested(
new HoodieInstant(State.REQUESTED, COMPACTION_ACTION, compactionOperationWithInstant.getLeft()),
AvroUtils.serializeCompactionPlan(newPlan));
AvroUtils.serializeCompactionPlan(newPlan), true);
}
return res;
}
@@ -218,8 +219,9 @@ public class CompactionAdminClient extends AbstractHoodieClient {
*/
private static HoodieCompactionPlan getCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant)
throws IOException {
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(metaClient.getActiveTimeline()
.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
metaClient.getActiveTimeline().readPlanAsBytes(
HoodieTimeline.getCompactionRequestedInstant(compactionInstant)).get());
return compactionPlan;
}

View File

@@ -67,7 +67,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
* cleaned)
*/
public void clean() throws HoodieIOException {
String startCleanTime = HoodieActiveTimeline.createNewCommitTime();
String startCleanTime = HoodieActiveTimeline.createNewInstantTime();
clean(startCleanTime);
}
@@ -86,7 +86,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
// If there are inflight(failed) or previously requested clean operation, first perform them
table.getCleanTimeline().filterInflightsAndRequested().getInstants().forEach(hoodieInstant -> {
LOG.info("There were previously unfinished cleaner operations. Finishing Instant=" + hoodieInstant);
runClean(table, hoodieInstant.getTimestamp());
runClean(table, hoodieInstant);
});
Option<HoodieCleanerPlan> cleanerPlanOpt = scheduleClean(startCleanTime);
@@ -96,7 +96,7 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
if ((cleanerPlan.getFilesToBeDeletedPerPartition() != null)
&& !cleanerPlan.getFilesToBeDeletedPerPartition().isEmpty()) {
final HoodieTable<T> hoodieTable = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
return runClean(hoodieTable, startCleanTime);
return runClean(hoodieTable, HoodieTimeline.getCleanRequestedInstant(startCleanTime), cleanerPlan);
}
}
return null;
@@ -136,13 +136,20 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
* Executes the Cleaner plan stored in the instant metadata.
*
* @param table Hoodie Table
* @param cleanInstantTs Cleaner Instant Timestamp
* @param cleanInstant Cleaner Instant
*/
@VisibleForTesting
protected HoodieCleanMetadata runClean(HoodieTable<T> table, String cleanInstantTs) {
HoodieInstant cleanInstant =
table.getCleanTimeline().getInstants().filter(x -> x.getTimestamp().equals(cleanInstantTs)).findFirst().get();
protected HoodieCleanMetadata runClean(HoodieTable<T> table, HoodieInstant cleanInstant) {
try {
HoodieCleanerPlan cleanerPlan = CleanerUtils.getCleanerPlan(table.getMetaClient(), cleanInstant);
return runClean(table, cleanInstant, cleanerPlan);
} catch (IOException e) {
throw new HoodieIOException(e.getMessage(), e);
}
}
private HoodieCleanMetadata runClean(HoodieTable<T> table, HoodieInstant cleanInstant,
HoodieCleanerPlan cleanerPlan) {
Preconditions.checkArgument(
cleanInstant.getState().equals(State.REQUESTED) || cleanInstant.getState().equals(State.INFLIGHT));
@@ -152,10 +159,11 @@ public class HoodieCleanClient<T extends HoodieRecordPayload> extends AbstractHo
if (!cleanInstant.isInflight()) {
// Mark as inflight first
cleanInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant);
cleanInstant = table.getActiveTimeline().transitionCleanRequestedToInflight(cleanInstant,
AvroUtils.serializeCleanerPlan(cleanerPlan));
}
List<HoodieCleanStat> cleanStats = table.clean(jsc, cleanInstant);
List<HoodieCleanStat> cleanStats = table.clean(jsc, cleanInstant, cleanerPlan);
if (cleanStats.isEmpty()) {
return HoodieCleanMetadata.newBuilder().build();

View File

@@ -80,8 +80,6 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -101,7 +99,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
private static final Logger LOG = LogManager.getLogger(HoodieWriteClient.class);
private static final String UPDATE_STR = "update";
private static final String LOOKUP_STR = "lookup";
private final boolean rollbackInFlight;
private final boolean rollbackPending;
private final transient HoodieMetrics metrics;
private final transient HoodieIndex<T> index;
private final transient HoodieCleanClient<T> cleanClient;
@@ -119,21 +117,21 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
/**
*
*/
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight) {
this(jsc, clientConfig, rollbackInFlight, HoodieIndex.createIndex(clientConfig, jsc));
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending) {
this(jsc, clientConfig, rollbackPending, HoodieIndex.createIndex(clientConfig, jsc));
}
@VisibleForTesting
HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight, HoodieIndex index) {
this(jsc, clientConfig, rollbackInFlight, index, Option.empty());
HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending, HoodieIndex index) {
this(jsc, clientConfig, rollbackPending, index, Option.empty());
}
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackInFlight,
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending,
HoodieIndex index, Option<EmbeddedTimelineService> timelineService) {
super(jsc, clientConfig, timelineService);
this.index = index;
this.metrics = new HoodieMetrics(config, config.getTableName());
this.rollbackInFlight = rollbackInFlight;
this.rollbackPending = rollbackPending;
this.cleanClient = new HoodieCleanClient<>(jsc, config, metrics, timelineService);
}
@@ -356,7 +354,8 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
return upsertRecordsInternal(taggedValidRecords, commitTime, table, true);
} else {
// if entire set of keys are non existent
JavaRDD<WriteStatus> writeStatusRDD = jsc.parallelize(Collections.EMPTY_LIST, 1);
saveWorkloadProfileMetadataToInflight(new WorkloadProfile(jsc.emptyRDD()), table, commitTime);
JavaRDD<WriteStatus> writeStatusRDD = jsc.emptyRDD();
commitOnAutoCommit(commitTime, writeStatusRDD, table.getMetaClient().getCommitActionType());
return writeStatusRDD;
}
@@ -388,6 +387,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
final List<String> fileIDPrefixes =
IntStream.range(0, parallelism).mapToObj(i -> FSUtils.createNewFileIdPfx()).collect(Collectors.toList());
table.getActiveTimeline().transitionRequestedToInflight(new HoodieInstant(State.REQUESTED,
table.getMetaClient().getCommitActionType(), commitTime), Option.empty());
JavaRDD<WriteStatus> writeStatusRDD = repartitionedRecords
.mapPartitionsWithIndex(new BulkInsertMapFunction<T>(commitTime, config, table, fileIDPrefixes), true)
.flatMap(writeStatuses -> writeStatuses.iterator());
@@ -435,9 +437,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
});
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
Option<HoodieInstant> instant =
activeTimeline.getCommitsTimeline().filterInflightsExcludingCompaction().lastInstant();
activeTimeline.saveToInflight(instant.get(), Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
String commitActionType = table.getMetaClient().getCommitActionType();
HoodieInstant requested = new HoodieInstant(State.REQUESTED, commitActionType, commitTime);
activeTimeline.transitionRequestedToInflight(requested,
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
} catch (IOException io) {
throw new HoodieCommitException("Failed to commit " + commitTime + " unable to save inflight metadata ", io);
}
@@ -664,8 +667,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
HoodieSavepointMetadata metadata = AvroUtils.convertSavepointMetadata(user, comment, latestFilesMap);
// Nothing to save in the savepoint
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
AvroUtils.serializeSavepointMetadata(metadata));
table.getActiveTimeline().createNewInstant(
new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime));
table.getActiveTimeline()
.saveAsComplete(new HoodieInstant(true, HoodieTimeline.SAVEPOINT_ACTION, commitTime),
AvroUtils.serializeSavepointMetadata(metadata));
LOG.info("Savepoint " + commitTime + " created");
return true;
} catch (IOException e) {
@@ -792,23 +798,25 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
.filter(instant -> HoodieActiveTimeline.GREATER.test(instant.getTimestamp(), instantTime))
.collect(Collectors.toList());
// Start a rollback instant for all commits to be rolled back
String startRollbackInstant = startInstant();
String startRollbackInstant = HoodieActiveTimeline.createNewInstantTime();
// Start the timer
final Timer.Context context = startContext();
ImmutableMap.Builder<String, List<HoodieRollbackStat>> instantsToStats = ImmutableMap.builder();
table.getActiveTimeline().createNewInstant(
new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, startRollbackInstant));
instantsToRollback.stream().forEach(instant -> {
try {
switch (instant.getAction()) {
case HoodieTimeline.COMMIT_ACTION:
case HoodieTimeline.DELTA_COMMIT_ACTION:
List<HoodieRollbackStat> statsForInstant = doRollbackAndGetStats(instant.getTimestamp());
List<HoodieRollbackStat> statsForInstant = doRollbackAndGetStats(instant);
instantsToStats.put(instant.getTimestamp(), statsForInstant);
break;
case HoodieTimeline.COMPACTION_ACTION:
// TODO : Get file status and create a rollback stat and file
// TODO : Delete the .aux files along with the instant file, okay for now since the archival process will
// delete these files when it does not see a corresponding instant file under .hoodie
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant.getTimestamp());
List<HoodieRollbackStat> statsForCompaction = doRollbackAndGetStats(instant);
instantsToStats.put(instant.getTimestamp(), statsForCompaction);
LOG.info("Deleted compaction instant " + instant);
break;
@@ -828,17 +836,16 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
}
}
private String startInstant() {
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
}
private Timer.Context startContext() {
return metrics.getRollbackCtx();
}
private List<HoodieRollbackStat> doRollbackAndGetStats(final String commitToRollback) throws IOException {
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
HoodieTimeline inflightCommitTimeline = table.getInflightCommitTimeline();
private List<HoodieRollbackStat> doRollbackAndGetStats(final HoodieInstant instantToRollback) throws
IOException {
final String commitToRollback = instantToRollback.getTimestamp();
HoodieTable<T> table = HoodieTable.getHoodieTable(
createMetaClient(true), config, jsc);
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
// Check if any of the commits is a savepoint - do not allow rollback on those commits
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
@@ -850,7 +857,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
}
});
if (commitTimeline.empty() && inflightCommitTimeline.empty()) {
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
// nothing to rollback
LOG.info("No commits to rollback " + commitToRollback);
}
@@ -865,14 +872,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
"Found commits after time :" + lastCommit + ", please rollback greater commits first");
}
List<String> inflights =
inflightCommitTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList());
if ((lastCommit != null) && !inflights.isEmpty() && (inflights.indexOf(lastCommit) != inflights.size() - 1)) {
throw new HoodieRollbackException(
"Found in-flight commits after time :" + lastCommit + ", please rollback greater commits first");
}
List<HoodieRollbackStat> stats = table.rollback(jsc, commitToRollback, true);
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
LOG.info("Deleted inflight commits " + commitToRollback);
@@ -893,9 +900,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
}
HoodieRollbackMetadata rollbackMetadata =
AvroUtils.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
HoodieRollbackMetadata rollbackMetadata = AvroUtils
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
//TODO: varadarb - This will be fixed in subsequent PR when Rollback and Clean transition mimics that of commit
table.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION,
startRollbackTime));
table.getActiveTimeline().saveAsComplete(
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
AvroUtils.serializeRollbackMetadata(rollbackMetadata));
LOG.info("Commits " + commitsToRollback + " rollback is complete");
@@ -937,17 +948,22 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
}
private void rollbackInternal(String commitToRollback) {
if (commitToRollback.isEmpty()) {
LOG.info("List of commits to rollback is empty");
return;
}
final String startRollbackTime = startInstant();
final String startRollbackTime = HoodieActiveTimeline.createNewInstantTime();
final Timer.Context context = startContext();
// Create a Hoodie table which encapsulated the commits and files visible
try {
List<HoodieRollbackStat> stats = doRollbackAndGetStats(commitToRollback);
Map<String, List<HoodieRollbackStat>> statToCommit = new HashMap<>();
finishRollback(context, stats, Arrays.asList(commitToRollback), startRollbackTime);
// Create a Hoodie table which encapsulated the commits and files visible
HoodieTable<T> table = HoodieTable.getHoodieTable(
createMetaClient(true), config, jsc);
Option<HoodieInstant> rollbackInstantOpt =
Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitToRollback))
.findFirst());
if (rollbackInstantOpt.isPresent()) {
List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackInstantOpt.get());
finishRollback(context, stats, Arrays.asList(commitToRollback), startRollbackTime);
}
} catch (IOException e) {
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitToRollback,
e);
@@ -992,20 +1008,20 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
*/
public String startCommit() {
// NOTE : Need to ensure that rollback is done before a new commit is started
if (rollbackInFlight) {
// Only rollback inflight commit/delta-commits. Do not touch compaction commits
rollbackInflightCommits();
if (rollbackPending) {
// Only rollback pending commit/delta-commits. Do not touch compaction commits
rollbackPendingCommits();
}
String commitTime = HoodieActiveTimeline.createNewCommitTime();
String commitTime = HoodieActiveTimeline.createNewInstantTime();
startCommit(commitTime);
return commitTime;
}
public void startCommitWithTime(String instantTime) {
// NOTE : Need to ensure that rollback is done before a new commit is started
if (rollbackInFlight) {
if (rollbackPending) {
// Only rollback inflight commit/delta-commits. Do not touch compaction commits
rollbackInflightCommits();
rollbackPendingCommits();
}
startCommit(instantTime);
}
@@ -1023,14 +1039,14 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
String commitActionType = table.getMetaClient().getCommitActionType();
activeTimeline.createInflight(new HoodieInstant(true, commitActionType, instantTime));
activeTimeline.createNewInstant(new HoodieInstant(State.REQUESTED, commitActionType, instantTime));
}
/**
* Schedules a new compaction instant.
*/
public Option<String> scheduleCompaction(Option<Map<String, String>> extraMetadata) throws IOException {
String instantTime = HoodieActiveTimeline.createNewCommitTime();
String instantTime = HoodieActiveTimeline.createNewInstantTime();
LOG.info("Generate a new instant time " + instantTime);
boolean notEmpty = scheduleCompactionAtInstant(instantTime, extraMetadata);
return notEmpty ? Option.of(instantTime) : Option.empty();
@@ -1046,7 +1062,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
throws IOException {
HoodieTableMetaClient metaClient = createMetaClient(true);
// if there are inflight writes, their instantTime must not be less than that of compaction instant time
metaClient.getCommitsTimeline().filterInflightsExcludingCompaction().firstInstant().ifPresent(earliestInflight -> {
metaClient.getCommitsTimeline().filterPendingExcludingCompaction().firstInstant().ifPresent(earliestInflight -> {
Preconditions.checkArgument(
HoodieTimeline.compareTimestamps(earliestInflight.getTimestamp(), instantTime, HoodieTimeline.GREATER),
"Earliest write inflight instant time must be later than compaction time. Earliest :" + earliestInflight
@@ -1091,7 +1107,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
HoodieTable<T> table = HoodieTable.getHoodieTable(metaClient, config, jsc);
HoodieActiveTimeline timeline = metaClient.getActiveTimeline();
HoodieCompactionPlan compactionPlan = AvroUtils.deserializeCompactionPlan(
timeline.getInstantAuxiliaryDetails(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
timeline.readPlanAsBytes(HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime)).get());
// Merge extra meta-data passed by user with the one already in inflight compaction
Option<Map<String, String>> mergedMetaData = extraMetadata.map(m -> {
Map<String, String> merged = new HashMap<>();
@@ -1141,11 +1157,11 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
}
/**
* Cleanup all inflight commits.
* Cleanup all pending commits.
*/
private void rollbackInflightCommits() {
private void rollbackPendingCommits() {
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterInflightsExcludingCompaction();
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
List<String> commits = inflightTimeline.getReverseOrderedInstants().map(HoodieInstant::getTimestamp)
.collect(Collectors.toList());
for (String commit : commits) {
@@ -1238,7 +1254,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
HoodieTableMetaClient metaClient = createMetaClient(true);
HoodieCompactionPlan compactionPlan =
CompactionUtils.getCompactionPlan(metaClient, compactionInstant.getTimestamp());
// Mark instant as compaction inflight
activeTimeline.transitionCompactionRequestedToInflight(compactionInstant);
compactionTimer = metrics.getCompactionCtx();
@@ -1306,7 +1321,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
*/
@VisibleForTesting
void rollbackInflightCompaction(HoodieInstant inflightInstant, HoodieTable table) throws IOException {
table.rollback(jsc, inflightInstant.getTimestamp(), false);
table.rollback(jsc, inflightInstant, false);
// Revert instant state file
table.getActiveTimeline().revertCompactionInflightToRequested(inflightInstant);
}

View File

@@ -18,7 +18,9 @@
package org.apache.hudi.client.utils;
import org.apache.hudi.common.model.TimelineLayoutVersion;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.spark.api.java.JavaSparkContext;
@@ -35,6 +37,6 @@ public class ClientUtils {
public static HoodieTableMetaClient createMetaClient(JavaSparkContext jsc, HoodieWriteConfig config,
boolean loadActiveTimelineOnLoad) {
return new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), loadActiveTimelineOnLoad,
config.getConsistencyGuardConfig());
config.getConsistencyGuardConfig(), Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())));
}
}

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.config;
import org.apache.hudi.HoodieWriteClient;
import org.apache.hudi.WriteStatus;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.TimelineLayoutVersion;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.util.ConsistencyGuardConfig;
import org.apache.hudi.common.util.ReflectionUtils;
@@ -48,6 +49,7 @@ import java.util.Properties;
public class HoodieWriteConfig extends DefaultHoodieConfig {
public static final String TABLE_NAME = "hoodie.table.name";
private static final String TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
private static final String BASE_PATH_PROP = "hoodie.base.path";
private static final String AVRO_SCHEMA = "hoodie.avro.schema";
private static final String DEFAULT_PARALLELISM = "1500";
@@ -141,6 +143,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return Boolean.parseBoolean(props.getProperty(HOODIE_ASSUME_DATE_PARTITIONING_PROP));
}
public Integer getTimelineLayoutVersion() {
return Integer.parseInt(props.getProperty(TIMELINE_LAYOUT_VERSION));
}
public int getBulkInsertShuffleParallelism() {
return Integer.parseInt(props.getProperty(BULKINSERT_PARALLELISM));
}
@@ -587,6 +593,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return this;
}
public Builder withTimelineLayoutVersion(int version) {
props.setProperty(TIMELINE_LAYOUT_VERSION, String.valueOf(version));
return this;
}
public Builder withBulkInsertParallelism(int bulkInsertParallelism) {
props.setProperty(BULKINSERT_PARALLELISM, String.valueOf(bulkInsertParallelism));
return this;
@@ -693,7 +704,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
DEFAULT_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(UPSERT_PARALLELISM), UPSERT_PARALLELISM, DEFAULT_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(DELETE_PARALLELISM), DELETE_PARALLELISM, DEFAULT_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM, DEFAULT_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM,
DEFAULT_ROLLBACK_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_INSERT_PROP), COMBINE_BEFORE_INSERT_PROP,
DEFAULT_COMBINE_BEFORE_INSERT);
setDefaultOnCondition(props, !props.containsKey(COMBINE_BEFORE_UPSERT_PROP), COMBINE_BEFORE_UPSERT_PROP,
@@ -733,6 +745,12 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
setDefaultOnCondition(props, !isConsistencyGuardSet,
ConsistencyGuardConfig.newBuilder().fromProperties(props).build());
setDefaultOnCondition(props, !props.containsKey(TIMELINE_LAYOUT_VERSION), TIMELINE_LAYOUT_VERSION,
String.valueOf(TimelineLayoutVersion.CURR_VERSION));
String layoutVersion = props.getProperty(TIMELINE_LAYOUT_VERSION);
// Ensure Layout Version is good
new TimelineLayoutVersion(Integer.parseInt(layoutVersion));
// Build WriteConfig at the end
HoodieWriteConfig config = new HoodieWriteConfig(props);
Preconditions.checkArgument(config.getBasePath() != null);

View File

@@ -19,7 +19,7 @@
package org.apache.hudi.io;
import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieCompactionPlan;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.common.model.ActionType;
@@ -37,7 +37,10 @@ import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.AvroUtils;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.CompactionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
@@ -111,15 +114,18 @@ public class HoodieCommitArchiveLog {
public boolean archiveIfRequired(final JavaSparkContext jsc) throws IOException {
try {
List<HoodieInstant> instantsToArchive = getInstantsToArchive(jsc).collect(Collectors.toList());
boolean success = true;
if (instantsToArchive.iterator().hasNext()) {
if (!instantsToArchive.isEmpty()) {
this.writer = openWriter();
LOG.info("Archiving instants " + instantsToArchive);
archive(instantsToArchive);
LOG.info("Deleting archived instants " + instantsToArchive);
success = deleteArchivedInstants(instantsToArchive);
} else {
LOG.info("No Instants to archive");
}
return success;
} finally {
close();
@@ -171,7 +177,15 @@ public class HoodieCommitArchiveLog {
}).limit(commitTimeline.countInstants() - minCommitsToKeep));
}
return instants;
// For archiving and cleaning instants, we need to include intermediate state files if they exist
HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
Map<Pair<String, String>, List<HoodieInstant>> groupByTsAction = rawActiveTimeline.getInstants()
.collect(Collectors.groupingBy(i -> Pair.of(i.getTimestamp(),
HoodieInstant.getComparableAction(i.getAction()))));
return instants.flatMap(hoodieInstant ->
groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
HoodieInstant.getComparableAction(hoodieInstant.getAction()))).stream());
}
private boolean deleteArchivedInstants(List<HoodieInstant> archivedInstants) throws IOException {
@@ -194,6 +208,7 @@ public class HoodieCommitArchiveLog {
return i.isCompleted() && (i.getAction().equals(HoodieTimeline.COMMIT_ACTION)
|| (i.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION)));
}).max(Comparator.comparing(HoodieInstant::getTimestamp)));
LOG.info("Latest Committed Instant=" + latestCommitted);
if (latestCommitted.isPresent()) {
success &= deleteAllInstantsOlderorEqualsInAuxMetaFolder(latestCommitted.get());
}
@@ -208,8 +223,8 @@ public class HoodieCommitArchiveLog {
* @throws IOException in case of error
*/
private boolean deleteAllInstantsOlderorEqualsInAuxMetaFolder(HoodieInstant thresholdInstant) throws IOException {
List<HoodieInstant> instants = HoodieTableMetaClient.scanHoodieInstantsFromFileSystem(metaClient.getFs(),
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE);
List<HoodieInstant> instants = metaClient.scanHoodieInstantsFromFileSystem(
new Path(metaClient.getMetaAuxiliaryPath()), HoodieActiveTimeline.VALID_EXTENSIONS_IN_ACTIVE_TIMELINE, false);
List<HoodieInstant> instantsToBeDeleted =
instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
@@ -270,10 +285,14 @@ public class HoodieCommitArchiveLog {
throws IOException {
HoodieArchivedMetaEntry archivedMetaWrapper = new HoodieArchivedMetaEntry();
archivedMetaWrapper.setCommitTime(hoodieInstant.getTimestamp());
archivedMetaWrapper.setActionState(hoodieInstant.getState().name());
switch (hoodieInstant.getAction()) {
case HoodieTimeline.CLEAN_ACTION: {
archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils
.deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class));
if (hoodieInstant.isCompleted()) {
archivedMetaWrapper.setHoodieCleanMetadata(CleanerUtils.getCleanerMetadata(metaClient, hoodieInstant));
} else {
archivedMetaWrapper.setHoodieCleanerPlan(CleanerUtils.getCleanerPlan(metaClient, hoodieInstant));
}
archivedMetaWrapper.setActionType(ActionType.clean.name());
break;
}
@@ -303,8 +322,15 @@ public class HoodieCommitArchiveLog {
archivedMetaWrapper.setActionType(ActionType.commit.name());
break;
}
default:
case HoodieTimeline.COMPACTION_ACTION: {
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, hoodieInstant.getTimestamp());
archivedMetaWrapper.setHoodieCompactionPlan(plan);
archivedMetaWrapper.setActionType(ActionType.compaction.name());
break;
}
default: {
throw new UnsupportedOperationException("Action not fully supported yet");
}
}
return archivedMetaWrapper;
}

View File

@@ -35,7 +35,7 @@ import org.apache.hudi.common.model.HoodieRollingStatMetadata;
import org.apache.hudi.common.table.HoodieTimeline;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.AvroUtils;
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
import org.apache.hudi.common.util.FSUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
@@ -109,14 +109,15 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
Tuple2<String, String> partitionDelFileTuple = iter.next();
String partitionPath = partitionDelFileTuple._1();
String delFileName = partitionDelFileTuple._2();
String deletePathStr = new Path(new Path(basePath, partitionPath), delFileName).toString();
Path deletePath = new Path(new Path(basePath, partitionPath), delFileName);
String deletePathStr = deletePath.toString();
Boolean deletedFileResult = deleteFileAndGetResult(fs, deletePathStr);
if (!partitionCleanStatMap.containsKey(partitionPath)) {
partitionCleanStatMap.put(partitionPath, new PartitionCleanStat(partitionPath));
}
PartitionCleanStat partitionCleanStat = partitionCleanStatMap.get(partitionPath);
partitionCleanStat.addDeleteFilePatterns(deletePathStr);
partitionCleanStat.addDeletedFileResult(deletePathStr, deletedFileResult);
partitionCleanStat.addDeleteFilePatterns(deletePath.getName());
partitionCleanStat.addDeletedFileResult(deletePath.getName(), deletedFileResult);
}
return partitionCleanStatMap.entrySet().stream().map(e -> new Tuple2<>(e.getKey(), e.getValue()))
.collect(Collectors.toList()).iterator();
@@ -312,82 +313,78 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
* @throws IllegalArgumentException if unknown cleaning policy is provided
*/
@Override
public List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant) {
try {
HoodieCleanerPlan cleanerPlan = AvroUtils.deserializeCleanerPlan(getActiveTimeline()
.getInstantAuxiliaryDetails(HoodieTimeline.getCleanRequestedInstant(cleanInstant.getTimestamp())).get());
public List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant, HoodieCleanerPlan cleanerPlan) {
int cleanerParallelism = Math.min(
(int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()),
config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
.parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream()
.flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y)))
.collect(Collectors.toList()), cleanerParallelism)
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey((e1, e2) -> e1.merge(e2)).collect();
int cleanerParallelism = Math.min(
(int) (cleanerPlan.getFilesToBeDeletedPerPartition().values().stream().mapToInt(x -> x.size()).count()),
config.getCleanerParallelism());
LOG.info("Using cleanerParallelism: " + cleanerParallelism);
List<Tuple2<String, PartitionCleanStat>> partitionCleanStats = jsc
.parallelize(cleanerPlan.getFilesToBeDeletedPerPartition().entrySet().stream()
.flatMap(x -> x.getValue().stream().map(y -> new Tuple2<String, String>(x.getKey(), y)))
.collect(Collectors.toList()), cleanerParallelism)
.mapPartitionsToPair(deleteFilesFunc(this)).reduceByKey((e1, e2) -> e1.merge(e2)).collect();
Map<String, PartitionCleanStat> partitionCleanStatsMap =
partitionCleanStats.stream().collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
Map<String, PartitionCleanStat> partitionCleanStatsMap =
partitionCleanStats.stream().collect(Collectors.toMap(Tuple2::_1, Tuple2::_2));
// Return PartitionCleanStat for each partition passed.
return cleanerPlan.getFilesToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
PartitionCleanStat partitionCleanStat =
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap.get(partitionPath)
: new PartitionCleanStat(partitionPath);
HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath)
.withEarliestCommitRetained(Option.ofNullable(
actionInstant != null
? new HoodieInstant(HoodieInstant.State.valueOf(actionInstant.getState()),
actionInstant.getAction(), actionInstant.getTimestamp())
: null))
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
}).collect(Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException("Failed to clean up after commit", e);
}
// Return PartitionCleanStat for each partition passed.
return cleanerPlan.getFilesToBeDeletedPerPartition().keySet().stream().map(partitionPath -> {
PartitionCleanStat partitionCleanStat =
(partitionCleanStatsMap.containsKey(partitionPath)) ? partitionCleanStatsMap.get(partitionPath)
: new PartitionCleanStat(partitionPath);
HoodieActionInstant actionInstant = cleanerPlan.getEarliestInstantToRetain();
return HoodieCleanStat.newBuilder().withPolicy(config.getCleanerPolicy()).withPartitionPath(partitionPath)
.withEarliestCommitRetained(Option.ofNullable(
actionInstant != null
? new HoodieInstant(State.valueOf(actionInstant.getState()),
actionInstant.getAction(), actionInstant.getTimestamp())
: null))
.withDeletePathPattern(partitionCleanStat.deletePathPatterns)
.withSuccessfulDeletes(partitionCleanStat.successDeleteFiles)
.withFailedDeletes(partitionCleanStat.failedDeleteFiles).build();
}).collect(Collectors.toList());
}
@Override
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
throws IOException {
Long startTime = System.currentTimeMillis();
List<HoodieRollbackStat> stats = new ArrayList<>();
String actionType = metaClient.getCommitActionType();
HoodieActiveTimeline activeTimeline = this.getActiveTimeline();
List<String> inflights =
this.getInflightCommitTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
// Atomically unpublish the commits
if (!inflights.contains(commit)) {
LOG.info("Unpublishing " + commit);
activeTimeline.revertToInflight(new HoodieInstant(false, actionType, commit));
if (instant.isCompleted()) {
LOG.info("Unpublishing instant " + instant);
instant = activeTimeline.revertToInflight(instant);
}
HoodieInstant instantToRollback = new HoodieInstant(false, actionType, commit);
Long startTime = System.currentTimeMillis();
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
// deleting the timeline file
if (!instant.isRequested()) {
String commit = instant.getTimestamp();
// delete all the data files for this commit
LOG.info("Clean out all parquet files generated for commit: " + commit);
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instantToRollback);
// TODO: We need to persist this as rollback workload and use it in case of partial failures
List<HoodieRollbackStat> stats =
new RollbackExecutor(metaClient, config).performRollback(jsc, instantToRollback, rollbackRequests);
// delete all the data files for this commit
LOG.info("Clean out all parquet files generated for commit: " + commit);
List<RollbackRequest> rollbackRequests = generateRollbackRequests(instant);
//TODO: We need to persist this as rollback workload and use it in case of partial failures
stats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests);
}
// Delete Inflight instant if enabled
deleteInflightInstant(deleteInstants, activeTimeline, new HoodieInstant(true, actionType, commit));
deleteInflightAndRequestedInstant(deleteInstants, activeTimeline, instant);
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));
return stats;
}
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback) throws IOException {
private List<RollbackRequest> generateRollbackRequests(HoodieInstant instantToRollback)
throws IOException {
return FSUtils.getAllPartitionPaths(this.metaClient.getFs(), this.getMetaClient().getBasePath(),
config.shouldAssumeDatePartitioning()).stream().map(partitionPath -> {
return RollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath, instantToRollback);
}).collect(Collectors.toList());
}
/**
* Delete Inflight instant if enabled.
*
@@ -395,15 +392,22 @@ public class HoodieCopyOnWriteTable<T extends HoodieRecordPayload> extends Hoodi
* @param activeTimeline Hoodie active timeline
* @param instantToBeDeleted Instant to be deleted
*/
protected void deleteInflightInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
protected void deleteInflightAndRequestedInstant(boolean deleteInstant, HoodieActiveTimeline activeTimeline,
HoodieInstant instantToBeDeleted) {
// Remove marker files always on rollback
deleteMarkerDir(instantToBeDeleted.getTimestamp());
// Remove the rolled back inflight commits
if (deleteInstant) {
activeTimeline.deleteInflight(instantToBeDeleted);
LOG.info("Deleted inflight commit " + instantToBeDeleted);
LOG.info("Deleting instant=" + instantToBeDeleted);
activeTimeline.deletePending(instantToBeDeleted);
if (instantToBeDeleted.isInflight() && !metaClient.getTimelineLayoutVersion().isNullVersion()) {
// Delete corresponding requested instant
instantToBeDeleted = new HoodieInstant(State.REQUESTED, instantToBeDeleted.getAction(),
instantToBeDeleted.getTimestamp());
activeTimeline.deletePending(instantToBeDeleted);
}
LOG.info("Deleted pending commit " + instantToBeDeleted);
} else {
LOG.warn("Rollback finished without deleting inflight instant file. Instant=" + instantToBeDeleted);
}

View File

@@ -44,7 +44,6 @@ import org.apache.hudi.io.compact.HoodieRealtimeTableCompactor;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Partitioner;
@@ -167,32 +166,39 @@ public class HoodieMergeOnReadTable<T extends HoodieRecordPayload> extends Hoodi
}
@Override
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
throws IOException {
public List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant,
boolean deleteInstants) throws IOException {
Long startTime = System.currentTimeMillis();
String commit = instant.getTimestamp();
LOG.error("Rolling back instant " + instant);
// Atomically un-publish all non-inflight commits
if (instant.isCompleted()) {
LOG.error("Un-publishing instant " + instant + ", deleteInstants=" + deleteInstants);
instant = this.getActiveTimeline().revertToInflight(instant);
}
List<HoodieRollbackStat> allRollbackStats = new ArrayList<>();
// At the moment, MOR table type does not support bulk nested rollbacks. Nested rollbacks is an experimental
// feature that is expensive. To perform nested rollbacks, initiate multiple requests of client.rollback
// (commitToRollback).
// NOTE {@link HoodieCompactionConfig#withCompactionLazyBlockReadEnabled} needs to be set to TRUE. This is
// required to avoid OOM when merging multiple LogBlocks performed during nested rollbacks.
// Atomically un-publish all non-inflight commits
Option<HoodieInstant> commitOrCompactionOption = Option.fromJavaOptional(this.getActiveTimeline()
.getTimelineOfActions(Sets.newHashSet(HoodieActiveTimeline.COMMIT_ACTION,
HoodieActiveTimeline.DELTA_COMMIT_ACTION, HoodieActiveTimeline.COMPACTION_ACTION))
.getInstants().filter(i -> commit.equals(i.getTimestamp())).findFirst());
HoodieInstant instantToRollback = commitOrCompactionOption.get();
// Atomically un-publish all non-inflight commits
if (!instantToRollback.isInflight()) {
this.getActiveTimeline().revertToInflight(instantToRollback);
// For Requested State (like failure during index lookup), there is nothing to do rollback other than
// deleting the timeline file
if (!instant.isRequested()) {
LOG.info("Unpublished " + commit);
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instant);
// TODO: We need to persist this as rollback workload and use it in case of partial failures
allRollbackStats = new RollbackExecutor(metaClient, config).performRollback(jsc, instant, rollbackRequests);
}
LOG.info("Unpublished " + commit);
Long startTime = System.currentTimeMillis();
List<RollbackRequest> rollbackRequests = generateRollbackRequests(jsc, instantToRollback);
// TODO: We need to persist this as rollback workload and use it in case of partial failures
List<HoodieRollbackStat> allRollbackStats =
new RollbackExecutor(metaClient, config).performRollback(jsc, instantToRollback, rollbackRequests);
// Delete Inflight instants if enabled
deleteInflightInstant(deleteInstants, this.getActiveTimeline(),
new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()));
deleteInflightAndRequestedInstant(deleteInstants, this.getActiveTimeline(), instant);
LOG.info("Time(in ms) taken to finish rollback " + (System.currentTimeMillis() - startTime));

View File

@@ -182,8 +182,8 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
/**
* Get only the inflights (no-completed) commit timeline.
*/
public HoodieTimeline getInflightCommitTimeline() {
return metaClient.getCommitsTimeline().filterInflightsExcludingCompaction();
public HoodieTimeline getPendingCommitTimeline() {
return metaClient.getCommitsTimeline().filterPendingExcludingCompaction();
}
/**
@@ -287,16 +287,18 @@ public abstract class HoodieTable<T extends HoodieRecordPayload> implements Seri
*
* @param jsc Java Spark Context
* @param cleanInstant Clean Instant
* @param cleanerPlan Cleaner Plan
* @return list of Clean Stats
*/
public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant);
public abstract List<HoodieCleanStat> clean(JavaSparkContext jsc, HoodieInstant cleanInstant,
HoodieCleanerPlan cleanerPlan);
/**
* Rollback the (inflight/committed) record changes with the given commit time. Four steps: (1) Atomically unpublish
* this commit (2) clean indexing data (3) clean new generated parquet files / log blocks (4) Finally, delete
* .<action>.commit or .<action>.inflight file if deleteInstants = true
*/
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, String commit, boolean deleteInstants)
public abstract List<HoodieRollbackStat> rollback(JavaSparkContext jsc, HoodieInstant instant, boolean deleteInstants)
throws IOException;
/**