[HUDI-417] Refactor HoodieWriteClient so that commit logic can be shareable by both bootstrap and normal write operations (#1166)
This commit is contained in:
committed by
vinoth chandar
parent
b5df6723a2
commit
8306f749a2
@@ -0,0 +1,421 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hudi;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||||
|
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||||
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
|
import org.apache.hudi.common.model.HoodieCommitMetadata;
|
||||||
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.model.HoodieRollingStat;
|
||||||
|
import org.apache.hudi.common.model.HoodieRollingStatMetadata;
|
||||||
|
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||||
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
|
import org.apache.hudi.common.table.HoodieTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||||
|
import org.apache.hudi.common.table.timeline.HoodieInstant.State;
|
||||||
|
import org.apache.hudi.common.util.AvroUtils;
|
||||||
|
import org.apache.hudi.common.util.FSUtils;
|
||||||
|
import org.apache.hudi.common.util.Option;
|
||||||
|
import org.apache.hudi.config.HoodieWriteConfig;
|
||||||
|
import org.apache.hudi.exception.HoodieCommitException;
|
||||||
|
import org.apache.hudi.exception.HoodieIOException;
|
||||||
|
import org.apache.hudi.exception.HoodieRollbackException;
|
||||||
|
import org.apache.hudi.index.HoodieIndex;
|
||||||
|
import org.apache.hudi.metrics.HoodieMetrics;
|
||||||
|
import org.apache.hudi.table.HoodieTable;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Timer;
|
||||||
|
import org.apache.log4j.LogManager;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract Write Client providing functionality for performing commit, index updates and rollback
|
||||||
|
* Reused for regular write operations like upsert/insert/bulk-insert.. as well as bootstrap
|
||||||
|
* @param <T> Sub type of HoodieRecordPayload
|
||||||
|
*/
|
||||||
|
public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieClient {
|
||||||
|
|
||||||
|
private static final Logger LOG = LogManager.getLogger(AbstractHoodieWriteClient.class);
|
||||||
|
private static final String UPDATE_STR = "update";
|
||||||
|
|
||||||
|
private final transient HoodieMetrics metrics;
|
||||||
|
private final transient HoodieIndex<T> index;
|
||||||
|
|
||||||
|
private transient Timer.Context writeContext = null;
|
||||||
|
|
||||||
|
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig) {
|
||||||
|
super(jsc, clientConfig);
|
||||||
|
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||||
|
this.index = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AbstractHoodieWriteClient(JavaSparkContext jsc, HoodieIndex index, HoodieWriteConfig clientConfig,
|
||||||
|
Option<EmbeddedTimelineService> timelineServer) {
|
||||||
|
super(jsc, clientConfig, timelineServer);
|
||||||
|
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||||
|
this.index = index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commit changes performed at the given commitTime marker.
|
||||||
|
*/
|
||||||
|
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
|
||||||
|
return commit(commitTime, writeStatuses, Option.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commit changes performed at the given commitTime marker.
|
||||||
|
*/
|
||||||
|
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
||||||
|
Option<Map<String, String>> extraMetadata) {
|
||||||
|
HoodieTableMetaClient metaClient = createMetaClient(false);
|
||||||
|
return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table,
|
||||||
|
String commitTime) {
|
||||||
|
// cache writeStatusRDD before updating index, so that all actions before this are not triggered again for future
|
||||||
|
// RDD actions that are performed after updating the index.
|
||||||
|
writeStatusRDD = writeStatusRDD.persist(config.getWriteStatusStorageLevel());
|
||||||
|
Timer.Context indexTimer = metrics.getIndexCtx();
|
||||||
|
// Update the index back
|
||||||
|
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
|
||||||
|
metrics.updateIndexMetrics(UPDATE_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
|
// Trigger the insert and collect statuses
|
||||||
|
commitOnAutoCommit(commitTime, statuses, table.getMetaClient().getCommitActionType());
|
||||||
|
return statuses;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD, String actionType) {
|
||||||
|
if (config.shouldAutoCommit()) {
|
||||||
|
LOG.info("Auto commit enabled: Committing " + commitTime);
|
||||||
|
boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType);
|
||||||
|
if (!commitResult) {
|
||||||
|
throw new HoodieCommitException("Failed to commit " + commitTime);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG.info("Auto commit disabled for " + commitTime);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
||||||
|
Option<Map<String, String>> extraMetadata, String actionType) {
|
||||||
|
|
||||||
|
LOG.info("Commiting " + commitTime);
|
||||||
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
|
|
||||||
|
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
||||||
|
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
|
||||||
|
|
||||||
|
List<HoodieWriteStat> stats = writeStatuses.map(WriteStatus::getStat).collect();
|
||||||
|
|
||||||
|
updateMetadataAndRollingStats(actionType, metadata, stats);
|
||||||
|
|
||||||
|
// Finalize write
|
||||||
|
finalizeWrite(table, commitTime, stats);
|
||||||
|
|
||||||
|
// add in extra metadata
|
||||||
|
if (extraMetadata.isPresent()) {
|
||||||
|
extraMetadata.get().forEach(metadata::addMetadata);
|
||||||
|
}
|
||||||
|
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, config.getSchema());
|
||||||
|
|
||||||
|
try {
|
||||||
|
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, commitTime),
|
||||||
|
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
||||||
|
|
||||||
|
postCommit(metadata, commitTime, extraMetadata);
|
||||||
|
|
||||||
|
if (writeContext != null) {
|
||||||
|
long durationInMs = metrics.getDurationInMs(writeContext.stop());
|
||||||
|
metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(commitTime).getTime(), durationInMs,
|
||||||
|
metadata, actionType);
|
||||||
|
writeContext = null;
|
||||||
|
}
|
||||||
|
LOG.info("Committed " + commitTime);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime,
|
||||||
|
e);
|
||||||
|
} catch (ParseException e) {
|
||||||
|
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime
|
||||||
|
+ "Instant time is not of valid format", e);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Post Commit Hook. Derived classes use this method to perform post-commit processing
|
||||||
|
* @param metadata Commit Metadata corresponding to committed instant
|
||||||
|
* @param instantTime Instant Time
|
||||||
|
* @param extraMetadata Additional Metadata passed by user
|
||||||
|
* @throws IOException in case of error
|
||||||
|
*/
|
||||||
|
protected abstract void postCommit(HoodieCommitMetadata metadata, String instantTime,
|
||||||
|
Option<Map<String, String>> extraMetadata) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finalize Write operation.
|
||||||
|
* @param table HoodieTable
|
||||||
|
* @param instantTime Instant Time
|
||||||
|
* @param stats Hoodie Write Stat
|
||||||
|
*/
|
||||||
|
protected void finalizeWrite(HoodieTable<T> table, String instantTime, List<HoodieWriteStat> stats) {
|
||||||
|
try {
|
||||||
|
final Timer.Context finalizeCtx = metrics.getFinalizeCtx();
|
||||||
|
table.finalizeWrite(jsc, instantTime, stats);
|
||||||
|
if (finalizeCtx != null) {
|
||||||
|
Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop()));
|
||||||
|
durationInMs.ifPresent(duration -> {
|
||||||
|
LOG.info("Finalize write elapsed time (milliseconds): " + duration);
|
||||||
|
metrics.updateFinalizeWriteMetrics(duration, stats.size());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (HoodieIOException ioe) {
|
||||||
|
throw new HoodieCommitException("Failed to complete commit " + instantTime + " due to finalize errors.", ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateMetadataAndRollingStats(String actionType, HoodieCommitMetadata metadata,
|
||||||
|
List<HoodieWriteStat> writeStats) {
|
||||||
|
// TODO : make sure we cannot rollback / archive last commit file
|
||||||
|
try {
|
||||||
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
HoodieTable table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
|
// 0. All of the rolling stat management is only done by the DELTA commit for MOR and COMMIT for COW other wise
|
||||||
|
// there may be race conditions
|
||||||
|
HoodieRollingStatMetadata rollingStatMetadata = new HoodieRollingStatMetadata(actionType);
|
||||||
|
// 1. Look up the previous compaction/commit and get the HoodieCommitMetadata from there.
|
||||||
|
// 2. Now, first read the existing rolling stats and merge with the result of current metadata.
|
||||||
|
|
||||||
|
// Need to do this on every commit (delta or commit) to support COW and MOR.
|
||||||
|
|
||||||
|
for (HoodieWriteStat stat : writeStats) {
|
||||||
|
String partitionPath = stat.getPartitionPath();
|
||||||
|
// TODO: why is stat.getPartitionPath() null at times here.
|
||||||
|
metadata.addWriteStat(partitionPath, stat);
|
||||||
|
HoodieRollingStat hoodieRollingStat = new HoodieRollingStat(stat.getFileId(),
|
||||||
|
stat.getNumWrites() - (stat.getNumUpdateWrites() - stat.getNumDeletes()), stat.getNumUpdateWrites(),
|
||||||
|
stat.getNumDeletes(), stat.getTotalWriteBytes());
|
||||||
|
rollingStatMetadata.addRollingStat(partitionPath, hoodieRollingStat);
|
||||||
|
}
|
||||||
|
// The last rolling stat should be present in the completed timeline
|
||||||
|
Option<HoodieInstant> lastInstant =
|
||||||
|
table.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
|
||||||
|
if (lastInstant.isPresent()) {
|
||||||
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
||||||
|
table.getActiveTimeline().getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
|
||||||
|
Option<String> lastRollingStat = Option
|
||||||
|
.ofNullable(commitMetadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY));
|
||||||
|
if (lastRollingStat.isPresent()) {
|
||||||
|
rollingStatMetadata = rollingStatMetadata
|
||||||
|
.merge(HoodieCommitMetadata.fromBytes(lastRollingStat.get().getBytes(), HoodieRollingStatMetadata.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
metadata.addMetadata(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY, rollingStatMetadata.toJsonString());
|
||||||
|
} catch (IOException io) {
|
||||||
|
throw new HoodieCommitException("Unable to save rolling stats");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieMetrics getMetrics() {
|
||||||
|
return metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HoodieIndex<T> getIndex() {
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HoodieTable getTableAndInitCtx(OperationType operationType) {
|
||||||
|
HoodieTableMetaClient metaClient = createMetaClient(true);
|
||||||
|
if (operationType == OperationType.DELETE) {
|
||||||
|
setWriteSchemaFromLastInstant(metaClient);
|
||||||
|
}
|
||||||
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
||||||
|
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
||||||
|
writeContext = metrics.getCommitCtx();
|
||||||
|
} else {
|
||||||
|
writeContext = metrics.getDeltaCommitCtx();
|
||||||
|
}
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets write schema from last instant since deletes may not have schema set in the config.
|
||||||
|
*/
|
||||||
|
private void setWriteSchemaFromLastInstant(HoodieTableMetaClient metaClient) {
|
||||||
|
try {
|
||||||
|
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
||||||
|
Option<HoodieInstant> lastInstant =
|
||||||
|
activeTimeline.filterCompletedInstants().filter(s -> s.getAction().equals(metaClient.getCommitActionType()))
|
||||||
|
.lastInstant();
|
||||||
|
if (lastInstant.isPresent()) {
|
||||||
|
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
||||||
|
activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
|
||||||
|
if (commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)) {
|
||||||
|
config.setSchema(commitMetadata.getExtraMetadata().get(HoodieCommitMetadata.SCHEMA_KEY));
|
||||||
|
} else {
|
||||||
|
throw new HoodieIOException("Latest commit does not have any schema in commit metadata");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new HoodieIOException("Deletes issued without any prior commits");
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieIOException("IOException thrown while reading last commit metadata", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
// Stop timeline-server if running
|
||||||
|
super.close();
|
||||||
|
// Calling this here releases any resources used by your index, so make sure to finish any related operations
|
||||||
|
// before this point
|
||||||
|
this.index.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void rollbackInternal(String commitToRollback) {
|
||||||
|
final String startRollbackTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
final Timer.Context context = metrics.getRollbackCtx();
|
||||||
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
try {
|
||||||
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
|
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||||
|
createMetaClient(true), config, jsc);
|
||||||
|
Option<HoodieInstant> rollbackInstantOpt =
|
||||||
|
Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||||
|
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitToRollback))
|
||||||
|
.findFirst());
|
||||||
|
|
||||||
|
if (rollbackInstantOpt.isPresent()) {
|
||||||
|
List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackInstantOpt.get());
|
||||||
|
finishRollback(context, stats, Collections.singletonList(commitToRollback), startRollbackTime);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitToRollback,
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected List<HoodieRollbackStat> doRollbackAndGetStats(final HoodieInstant instantToRollback) throws
|
||||||
|
IOException {
|
||||||
|
final String commitToRollback = instantToRollback.getTimestamp();
|
||||||
|
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
||||||
|
createMetaClient(true), config, jsc);
|
||||||
|
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
|
||||||
|
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
|
||||||
|
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
||||||
|
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
savepoints.forEach(s -> {
|
||||||
|
if (s.contains(commitToRollback)) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Could not rollback a savepointed commit. Delete savepoint first before rolling back" + s);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
|
||||||
|
// nothing to rollback
|
||||||
|
LOG.info("No commits to rollback " + commitToRollback);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure only the last n commits are being rolled back
|
||||||
|
// If there is a commit in-between or after that is not rolled back, then abort
|
||||||
|
|
||||||
|
if ((commitToRollback != null) && !commitTimeline.empty()
|
||||||
|
&& !commitTimeline.findInstantsAfter(commitToRollback, Integer.MAX_VALUE).empty()) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Found commits after time :" + commitToRollback + ", please rollback greater commits first");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
if ((commitToRollback != null) && !inflights.isEmpty()
|
||||||
|
&& (inflights.indexOf(commitToRollback) != inflights.size() - 1)) {
|
||||||
|
throw new HoodieRollbackException(
|
||||||
|
"Found in-flight commits after time :" + commitToRollback + ", please rollback greater commits first");
|
||||||
|
}
|
||||||
|
|
||||||
|
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
|
||||||
|
|
||||||
|
LOG.info("Deleted inflight commits " + commitToRollback);
|
||||||
|
|
||||||
|
// cleanup index entries
|
||||||
|
if (!getIndex().rollbackCommit(commitToRollback)) {
|
||||||
|
throw new HoodieRollbackException("Rollback index changes failed, for time :" + commitToRollback);
|
||||||
|
}
|
||||||
|
LOG.info("Index rolled back for commits " + commitToRollback);
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void finishRollback(final Timer.Context context, List<HoodieRollbackStat> rollbackStats,
|
||||||
|
List<String> commitsToRollback, final String startRollbackTime) throws IOException {
|
||||||
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
|
Option<Long> durationInMs = Option.empty();
|
||||||
|
long numFilesDeleted = rollbackStats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
|
||||||
|
if (context != null) {
|
||||||
|
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
|
||||||
|
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
||||||
|
}
|
||||||
|
HoodieRollbackMetadata rollbackMetadata = AvroUtils
|
||||||
|
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
|
||||||
|
//TODO: varadarb - This will be fixed when Rollback transition mimics that of commit
|
||||||
|
table.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION,
|
||||||
|
startRollbackTime));
|
||||||
|
table.getActiveTimeline().saveAsComplete(
|
||||||
|
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
||||||
|
AvroUtils.serializeRollbackMetadata(rollbackMetadata));
|
||||||
|
LOG.info("Rollback of Commits " + commitsToRollback + " is complete");
|
||||||
|
|
||||||
|
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
||||||
|
LOG.info("Cleaning up older rollback meta files");
|
||||||
|
// Cleanup of older cleaner meta files
|
||||||
|
// TODO - make the commit archival generic and archive rollback metadata
|
||||||
|
FSUtils.deleteOlderRollbackMetaFiles(fs, table.getMetaClient().getMetaPath(),
|
||||||
|
table.getActiveTimeline().getRollbackTimeline().getInstants());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Refers to different operation types.
|
||||||
|
*/
|
||||||
|
enum OperationType {
|
||||||
|
INSERT,
|
||||||
|
INSERT_PREPPED,
|
||||||
|
UPSERT,
|
||||||
|
UPSERT_PREPPED,
|
||||||
|
DELETE,
|
||||||
|
BULK_INSERT,
|
||||||
|
BULK_INSERT_PREPPED,
|
||||||
|
BOOTSTRAP
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -21,7 +21,6 @@ package org.apache.hudi;
|
|||||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
import org.apache.hudi.avro.model.HoodieCompactionPlan;
|
||||||
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
|
||||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
|
||||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||||
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
import org.apache.hudi.client.embedded.EmbeddedTimelineService;
|
||||||
import org.apache.hudi.common.HoodieRollbackStat;
|
import org.apache.hudi.common.HoodieRollbackStat;
|
||||||
@@ -31,8 +30,6 @@ import org.apache.hudi.common.model.HoodieDataFile;
|
|||||||
import org.apache.hudi.common.model.HoodieKey;
|
import org.apache.hudi.common.model.HoodieKey;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
import org.apache.hudi.common.model.HoodieRollingStat;
|
|
||||||
import org.apache.hudi.common.model.HoodieRollingStatMetadata;
|
|
||||||
import org.apache.hudi.common.model.HoodieTableType;
|
import org.apache.hudi.common.model.HoodieTableType;
|
||||||
import org.apache.hudi.common.model.HoodieWriteStat;
|
import org.apache.hudi.common.model.HoodieWriteStat;
|
||||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||||
@@ -79,7 +76,6 @@ import org.apache.spark.storage.StorageLevel;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@@ -94,18 +90,15 @@ import scala.Tuple2;
|
|||||||
* <p>
|
* <p>
|
||||||
* Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie dataset.
|
* Note that, at any given time, there can only be one Spark job performing these operations on a Hoodie dataset.
|
||||||
*/
|
*/
|
||||||
public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieClient {
|
public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHoodieWriteClient<T> {
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(HoodieWriteClient.class);
|
private static final Logger LOG = LogManager.getLogger(HoodieWriteClient.class);
|
||||||
private static final String UPDATE_STR = "update";
|
|
||||||
private static final String LOOKUP_STR = "lookup";
|
private static final String LOOKUP_STR = "lookup";
|
||||||
private final boolean rollbackPending;
|
private final boolean rollbackPending;
|
||||||
private final transient HoodieMetrics metrics;
|
private final transient HoodieMetrics metrics;
|
||||||
private final transient HoodieIndex<T> index;
|
|
||||||
private final transient HoodieCleanClient<T> cleanClient;
|
private final transient HoodieCleanClient<T> cleanClient;
|
||||||
private transient Timer.Context writeContext = null;
|
|
||||||
private transient Timer.Context compactionTimer;
|
private transient Timer.Context compactionTimer;
|
||||||
private transient Timer.Context indexTimer = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -128,8 +121,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
|
|
||||||
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending,
|
public HoodieWriteClient(JavaSparkContext jsc, HoodieWriteConfig clientConfig, boolean rollbackPending,
|
||||||
HoodieIndex index, Option<EmbeddedTimelineService> timelineService) {
|
HoodieIndex index, Option<EmbeddedTimelineService> timelineService) {
|
||||||
super(jsc, clientConfig, timelineService);
|
super(jsc, index, clientConfig, timelineService);
|
||||||
this.index = index;
|
|
||||||
this.metrics = new HoodieMetrics(config, config.getTableName());
|
this.metrics = new HoodieMetrics(config, config.getTableName());
|
||||||
this.rollbackPending = rollbackPending;
|
this.rollbackPending = rollbackPending;
|
||||||
this.cleanClient = new HoodieCleanClient<>(jsc, config, metrics, timelineService);
|
this.cleanClient = new HoodieCleanClient<>(jsc, config, metrics, timelineService);
|
||||||
@@ -149,10 +141,9 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
public JavaRDD<HoodieRecord<T>> filterExists(JavaRDD<HoodieRecord<T>> hoodieRecords) {
|
public JavaRDD<HoodieRecord<T>> filterExists(JavaRDD<HoodieRecord<T>> hoodieRecords) {
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
// Create a Hoodie table which encapsulated the commits and files visible
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
indexTimer = metrics.getIndexCtx();
|
Timer.Context indexTimer = metrics.getIndexCtx();
|
||||||
JavaRDD<HoodieRecord<T>> recordsWithLocation = index.tagLocation(hoodieRecords, jsc, table);
|
JavaRDD<HoodieRecord<T>> recordsWithLocation = getIndex().tagLocation(hoodieRecords, jsc, table);
|
||||||
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
indexTimer = null;
|
|
||||||
return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
|
return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -166,11 +157,10 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||||
combineOnCondition(config.shouldCombineBeforeUpsert(), records, config.getUpsertShuffleParallelism());
|
combineOnCondition(config.shouldCombineBeforeUpsert(), records, config.getUpsertShuffleParallelism());
|
||||||
|
|
||||||
indexTimer = metrics.getIndexCtx();
|
Timer.Context indexTimer = metrics.getIndexCtx();
|
||||||
// perform index loop up to get existing location of records
|
// perform index loop up to get existing location of records
|
||||||
JavaRDD<HoodieRecord<T>> taggedRecords = index.tagLocation(dedupedRecords, jsc, table);
|
JavaRDD<HoodieRecord<T>> taggedRecords = getIndex().tagLocation(dedupedRecords, jsc, table);
|
||||||
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
indexTimer = null;
|
|
||||||
return upsertRecordsInternal(taggedRecords, commitTime, table, true);
|
return upsertRecordsInternal(taggedRecords, commitTime, table, true);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (e instanceof HoodieUpsertException) {
|
if (e instanceof HoodieUpsertException) {
|
||||||
@@ -343,14 +333,13 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
|
|
||||||
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
JavaRDD<HoodieRecord<T>> dedupedRecords =
|
||||||
dedupedKeys.map(key -> new HoodieRecord(key, new EmptyHoodieRecordPayload()));
|
dedupedKeys.map(key -> new HoodieRecord(key, new EmptyHoodieRecordPayload()));
|
||||||
indexTimer = metrics.getIndexCtx();
|
Timer.Context indexTimer = metrics.getIndexCtx();
|
||||||
// perform index loop up to get existing location of records
|
// perform index loop up to get existing location of records
|
||||||
JavaRDD<HoodieRecord<T>> taggedRecords = index.tagLocation(dedupedRecords, jsc, table);
|
JavaRDD<HoodieRecord<T>> taggedRecords = getIndex().tagLocation(dedupedRecords, jsc, table);
|
||||||
// filter out non existant keys/records
|
// filter out non existant keys/records
|
||||||
JavaRDD<HoodieRecord<T>> taggedValidRecords = taggedRecords.filter(HoodieRecord::isCurrentLocationKnown);
|
JavaRDD<HoodieRecord<T>> taggedValidRecords = taggedRecords.filter(HoodieRecord::isCurrentLocationKnown);
|
||||||
if (!taggedValidRecords.isEmpty()) {
|
if (!taggedValidRecords.isEmpty()) {
|
||||||
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
metrics.updateIndexMetrics(LOOKUP_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
||||||
indexTimer = null;
|
|
||||||
return upsertRecordsInternal(taggedValidRecords, commitTime, table, true);
|
return upsertRecordsInternal(taggedValidRecords, commitTime, table, true);
|
||||||
} else {
|
} else {
|
||||||
// if entire set of keys are non existent
|
// if entire set of keys are non existent
|
||||||
@@ -397,18 +386,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
|
return updateIndexAndCommitIfNeeded(writeStatusRDD, table, commitTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void commitOnAutoCommit(String commitTime, JavaRDD<WriteStatus> resultRDD, String actionType) {
|
|
||||||
if (config.shouldAutoCommit()) {
|
|
||||||
LOG.info("Auto commit enabled: Committing " + commitTime);
|
|
||||||
boolean commitResult = commit(commitTime, resultRDD, Option.empty(), actionType);
|
|
||||||
if (!commitResult) {
|
|
||||||
throw new HoodieCommitException("Failed to commit " + commitTime);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG.info("Auto commit disabled for " + commitTime);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<HoodieRecord<T>> records,
|
private JavaRDD<HoodieRecord<T>> combineOnCondition(boolean condition, JavaRDD<HoodieRecord<T>> records,
|
||||||
int parallelism) {
|
int parallelism) {
|
||||||
return condition ? deduplicateRecords(records, parallelism) : records;
|
return condition ? deduplicateRecords(records, parallelism) : records;
|
||||||
@@ -486,102 +463,33 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table,
|
|
||||||
String commitTime) {
|
|
||||||
// cache writeStatusRDD before updating index, so that all actions before this are not triggered again for future
|
|
||||||
// RDD actions that are performed after updating the index.
|
|
||||||
writeStatusRDD = writeStatusRDD.persist(config.getWriteStatusStorageLevel());
|
|
||||||
indexTimer = metrics.getIndexCtx();
|
|
||||||
// Update the index back
|
|
||||||
JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table);
|
|
||||||
metrics.updateIndexMetrics(UPDATE_STR, metrics.getDurationInMs(indexTimer == null ? 0L : indexTimer.stop()));
|
|
||||||
indexTimer = null;
|
|
||||||
// Trigger the insert and collect statuses
|
|
||||||
commitOnAutoCommit(commitTime, statuses, table.getMetaClient().getCommitActionType());
|
|
||||||
return statuses;
|
|
||||||
}
|
|
||||||
|
|
||||||
private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords, Partitioner partitioner) {
|
private JavaRDD<HoodieRecord<T>> partition(JavaRDD<HoodieRecord<T>> dedupedRecords, Partitioner partitioner) {
|
||||||
return dedupedRecords.mapToPair(
|
return dedupedRecords.mapToPair(
|
||||||
record -> new Tuple2<>(new Tuple2<>(record.getKey(), Option.ofNullable(record.getCurrentLocation())), record))
|
record -> new Tuple2<>(new Tuple2<>(record.getKey(), Option.ofNullable(record.getCurrentLocation())), record))
|
||||||
.partitionBy(partitioner).map(Tuple2::_2);
|
.partitionBy(partitioner).map(Tuple2::_2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
@Override
|
||||||
* Commit changes performed at the given commitTime marker.
|
protected void postCommit(HoodieCommitMetadata metadata, String instantTime,
|
||||||
*/
|
Option<Map<String, String>> extraMetadata) throws IOException {
|
||||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses) {
|
|
||||||
return commit(commitTime, writeStatuses, Option.empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
// Do an inline compaction if enabled
|
||||||
* Commit changes performed at the given commitTime marker.
|
if (config.isInlineCompaction()) {
|
||||||
*/
|
metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT_PROP, "true");
|
||||||
public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
forceCompact(extraMetadata);
|
||||||
Option<Map<String, String>> extraMetadata) {
|
} else {
|
||||||
HoodieTableMetaClient metaClient = createMetaClient(false);
|
metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT_PROP, "false");
|
||||||
return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType());
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses,
|
|
||||||
Option<Map<String, String>> extraMetadata, String actionType) {
|
|
||||||
|
|
||||||
LOG.info("Commiting " + commitTime);
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
|
||||||
|
|
||||||
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
|
|
||||||
HoodieCommitMetadata metadata = new HoodieCommitMetadata();
|
|
||||||
|
|
||||||
List<HoodieWriteStat> stats = writeStatuses.map(WriteStatus::getStat).collect();
|
|
||||||
|
|
||||||
updateMetadataAndRollingStats(actionType, metadata, stats);
|
|
||||||
|
|
||||||
// Finalize write
|
|
||||||
finalizeWrite(table, commitTime, stats);
|
|
||||||
|
|
||||||
// add in extra metadata
|
|
||||||
if (extraMetadata.isPresent()) {
|
|
||||||
extraMetadata.get().forEach(metadata::addMetadata);
|
|
||||||
}
|
}
|
||||||
metadata.addMetadata(HoodieCommitMetadata.SCHEMA_KEY, config.getSchema());
|
// We cannot have unbounded commit files. Archive commits if we have to archive
|
||||||
|
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(config, createMetaClient(true));
|
||||||
try {
|
archiveLog.archiveIfRequired(jsc);
|
||||||
activeTimeline.saveAsComplete(new HoodieInstant(true, actionType, commitTime),
|
if (config.isAutoClean()) {
|
||||||
Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
|
// Call clean to cleanup if there is anything to cleanup after the commit,
|
||||||
// Save was a success & Do a inline compaction if enabled
|
LOG.info("Auto cleaning is enabled. Running cleaner now");
|
||||||
if (config.isInlineCompaction()) {
|
clean(instantTime);
|
||||||
metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT_PROP, "true");
|
} else {
|
||||||
forceCompact(extraMetadata);
|
LOG.info("Auto cleaning is not enabled. Not running cleaner now");
|
||||||
} else {
|
|
||||||
metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT_PROP, "false");
|
|
||||||
}
|
|
||||||
|
|
||||||
// We cannot have unbounded commit files. Archive commits if we have to archive
|
|
||||||
HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(config, createMetaClient(true));
|
|
||||||
archiveLog.archiveIfRequired(jsc);
|
|
||||||
if (config.isAutoClean()) {
|
|
||||||
// Call clean to cleanup if there is anything to cleanup after the commit,
|
|
||||||
LOG.info("Auto cleaning is enabled. Running cleaner now");
|
|
||||||
clean(commitTime);
|
|
||||||
} else {
|
|
||||||
LOG.info("Auto cleaning is not enabled. Not running cleaner now");
|
|
||||||
}
|
|
||||||
if (writeContext != null) {
|
|
||||||
long durationInMs = metrics.getDurationInMs(writeContext.stop());
|
|
||||||
metrics.updateCommitMetrics(HoodieActiveTimeline.COMMIT_FORMATTER.parse(commitTime).getTime(), durationInMs,
|
|
||||||
metadata, actionType);
|
|
||||||
writeContext = null;
|
|
||||||
}
|
|
||||||
LOG.info("Committed " + commitTime);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime,
|
|
||||||
e);
|
|
||||||
} catch (ParseException e) {
|
|
||||||
throw new HoodieCommitException("Failed to complete commit " + config.getBasePath() + " at time " + commitTime
|
|
||||||
+ "Instant time is not of valid format", e);
|
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -840,84 +748,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
return metrics.getRollbackCtx();
|
return metrics.getRollbackCtx();
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<HoodieRollbackStat> doRollbackAndGetStats(final HoodieInstant instantToRollback) throws
|
|
||||||
IOException {
|
|
||||||
final String commitToRollback = instantToRollback.getTimestamp();
|
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
|
||||||
createMetaClient(true), config, jsc);
|
|
||||||
HoodieTimeline inflightAndRequestedCommitTimeline = table.getPendingCommitTimeline();
|
|
||||||
HoodieTimeline commitTimeline = table.getCompletedCommitsTimeline();
|
|
||||||
// Check if any of the commits is a savepoint - do not allow rollback on those commits
|
|
||||||
List<String> savepoints = table.getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
savepoints.forEach(s -> {
|
|
||||||
if (s.contains(commitToRollback)) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Could not rollback a savepointed commit. Delete savepoint first before rolling back" + s);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if (commitTimeline.empty() && inflightAndRequestedCommitTimeline.empty()) {
|
|
||||||
// nothing to rollback
|
|
||||||
LOG.info("No commits to rollback " + commitToRollback);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure only the last n commits are being rolled back
|
|
||||||
// If there is a commit in-between or after that is not rolled back, then abort
|
|
||||||
|
|
||||||
if ((commitToRollback != null) && !commitTimeline.empty()
|
|
||||||
&& !commitTimeline.findInstantsAfter(commitToRollback, Integer.MAX_VALUE).empty()) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Found commits after time :" + commitToRollback + ", please rollback greater commits first");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> inflights = inflightAndRequestedCommitTimeline.getInstants().map(HoodieInstant::getTimestamp)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
if ((commitToRollback != null) && !inflights.isEmpty() && (inflights.indexOf(commitToRollback) != inflights.size() - 1)) {
|
|
||||||
throw new HoodieRollbackException(
|
|
||||||
"Found in-flight commits after time :" + commitToRollback + ", please rollback greater commits first");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<HoodieRollbackStat> stats = table.rollback(jsc, instantToRollback, true);
|
|
||||||
|
|
||||||
LOG.info("Deleted inflight commits " + commitToRollback);
|
|
||||||
|
|
||||||
// cleanup index entries
|
|
||||||
if (!index.rollbackCommit(commitToRollback)) {
|
|
||||||
throw new HoodieRollbackException("Rollback index changes failed, for time :" + commitToRollback);
|
|
||||||
}
|
|
||||||
LOG.info("Index rolled back for commits " + commitToRollback);
|
|
||||||
return stats;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void finishRollback(final Timer.Context context, List<HoodieRollbackStat> rollbackStats,
|
|
||||||
List<String> commitsToRollback, final String startRollbackTime) throws IOException {
|
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
|
||||||
Option<Long> durationInMs = Option.empty();
|
|
||||||
long numFilesDeleted = rollbackStats.stream().mapToLong(stat -> stat.getSuccessDeleteFiles().size()).sum();
|
|
||||||
if (context != null) {
|
|
||||||
durationInMs = Option.of(metrics.getDurationInMs(context.stop()));
|
|
||||||
metrics.updateRollbackMetrics(durationInMs.get(), numFilesDeleted);
|
|
||||||
}
|
|
||||||
HoodieRollbackMetadata rollbackMetadata = AvroUtils
|
|
||||||
.convertRollbackMetadata(startRollbackTime, durationInMs, commitsToRollback, rollbackStats);
|
|
||||||
//TODO: varadarb - This will be fixed in subsequent PR when Rollback and Clean transition mimics that of commit
|
|
||||||
table.getActiveTimeline().createNewInstant(new HoodieInstant(State.INFLIGHT, HoodieTimeline.ROLLBACK_ACTION,
|
|
||||||
startRollbackTime));
|
|
||||||
table.getActiveTimeline().saveAsComplete(
|
|
||||||
new HoodieInstant(true, HoodieTimeline.ROLLBACK_ACTION, startRollbackTime),
|
|
||||||
AvroUtils.serializeRollbackMetadata(rollbackMetadata));
|
|
||||||
LOG.info("Commits " + commitsToRollback + " rollback is complete");
|
|
||||||
|
|
||||||
if (!table.getActiveTimeline().getCleanerTimeline().empty()) {
|
|
||||||
LOG.info("Cleaning up older rollback meta files");
|
|
||||||
// Cleanup of older cleaner meta files
|
|
||||||
// TODO - make the commit archival generic and archive rollback metadata
|
|
||||||
FSUtils.deleteOlderRollbackMetaFiles(fs, table.getMetaClient().getMetaPath(),
|
|
||||||
table.getActiveTimeline().getRollbackTimeline().getInstants());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void finishRestore(final Timer.Context context, Map<String, List<HoodieRollbackStat>> commitToStats,
|
private void finishRestore(final Timer.Context context, Map<String, List<HoodieRollbackStat>> commitToStats,
|
||||||
List<String> commitsToRollback, final String startRestoreTime, final String restoreToInstant) throws IOException {
|
List<String> commitsToRollback, final String startRestoreTime, final String restoreToInstant) throws IOException {
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
HoodieTable<T> table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
||||||
@@ -946,29 +776,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void rollbackInternal(String commitToRollback) {
|
|
||||||
final String startRollbackTime = HoodieActiveTimeline.createNewInstantTime();
|
|
||||||
final Timer.Context context = startContext();
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
try {
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
HoodieTable<T> table = HoodieTable.getHoodieTable(
|
|
||||||
createMetaClient(true), config, jsc);
|
|
||||||
Option<HoodieInstant> rollbackInstantOpt =
|
|
||||||
Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
|
||||||
.filter(instant -> HoodieActiveTimeline.EQUAL.test(instant.getTimestamp(), commitToRollback))
|
|
||||||
.findFirst());
|
|
||||||
|
|
||||||
if (rollbackInstantOpt.isPresent()) {
|
|
||||||
List<HoodieRollbackStat> stats = doRollbackAndGetStats(rollbackInstantOpt.get());
|
|
||||||
finishRollback(context, stats, Collections.singletonList(commitToRollback), startRollbackTime);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieRollbackException("Failed to rollback " + config.getBasePath() + " commits " + commitToRollback,
|
|
||||||
e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Releases any resources used by the client.
|
* Releases any resources used by the client.
|
||||||
*/
|
*/
|
||||||
@@ -977,9 +784,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
// Stop timeline-server if running
|
// Stop timeline-server if running
|
||||||
super.close();
|
super.close();
|
||||||
this.cleanClient.close();
|
this.cleanClient.close();
|
||||||
// Calling this here releases any resources used by your index, so make sure to finish any related operations
|
|
||||||
// before this point
|
|
||||||
this.index.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1126,7 +930,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* Deduplicate Hoodie records, using the given deduplication function.
|
* Deduplicate Hoodie records, using the given deduplication function.
|
||||||
*/
|
*/
|
||||||
JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) {
|
JavaRDD<HoodieRecord<T>> deduplicateRecords(JavaRDD<HoodieRecord<T>> records, int parallelism) {
|
||||||
boolean isIndexingGlobal = index.isGlobal();
|
boolean isIndexingGlobal = getIndex().isGlobal();
|
||||||
return records.mapToPair(record -> {
|
return records.mapToPair(record -> {
|
||||||
HoodieKey hoodieKey = record.getKey();
|
HoodieKey hoodieKey = record.getKey();
|
||||||
// If index used is global, then records are expected to differ in their partitionPath
|
// If index used is global, then records are expected to differ in their partitionPath
|
||||||
@@ -1146,7 +950,7 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
* Deduplicate Hoodie records, using the given deduplication function.
|
* Deduplicate Hoodie records, using the given deduplication function.
|
||||||
*/
|
*/
|
||||||
JavaRDD<HoodieKey> deduplicateKeys(JavaRDD<HoodieKey> keys, int parallelism) {
|
JavaRDD<HoodieKey> deduplicateKeys(JavaRDD<HoodieKey> keys, int parallelism) {
|
||||||
boolean isIndexingGlobal = index.isGlobal();
|
boolean isIndexingGlobal = getIndex().isGlobal();
|
||||||
if (isIndexingGlobal) {
|
if (isIndexingGlobal) {
|
||||||
return keys.keyBy(HoodieKey::getRecordKey)
|
return keys.keyBy(HoodieKey::getRecordKey)
|
||||||
.reduceByKey((key1, key2) -> key1)
|
.reduceByKey((key1, key2) -> key1)
|
||||||
@@ -1169,45 +973,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private HoodieTable getTableAndInitCtx(OperationType operationType) {
|
|
||||||
HoodieTableMetaClient metaClient = createMetaClient(true);
|
|
||||||
if (operationType == OperationType.DELETE) {
|
|
||||||
setWriteSchemaFromLastInstant(metaClient);
|
|
||||||
}
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc);
|
|
||||||
if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) {
|
|
||||||
writeContext = metrics.getCommitCtx();
|
|
||||||
} else {
|
|
||||||
writeContext = metrics.getDeltaCommitCtx();
|
|
||||||
}
|
|
||||||
return table;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets write schema from last instant since deletes may not have schema set in the config.
|
|
||||||
*/
|
|
||||||
private void setWriteSchemaFromLastInstant(HoodieTableMetaClient metaClient) {
|
|
||||||
try {
|
|
||||||
HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
|
|
||||||
Option<HoodieInstant> lastInstant =
|
|
||||||
activeTimeline.filterCompletedInstants().filter(s -> s.getAction().equals(metaClient.getCommitActionType()))
|
|
||||||
.lastInstant();
|
|
||||||
if (lastInstant.isPresent()) {
|
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
|
||||||
activeTimeline.getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
|
|
||||||
if (commitMetadata.getExtraMetadata().containsKey(HoodieCommitMetadata.SCHEMA_KEY)) {
|
|
||||||
config.setSchema(commitMetadata.getExtraMetadata().get(HoodieCommitMetadata.SCHEMA_KEY));
|
|
||||||
} else {
|
|
||||||
throw new HoodieIOException("Latest commit does not have any schema in commit metadata");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new HoodieIOException("Deletes issued without any prior commits");
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new HoodieIOException("IOException thrown while reading last commit metadata", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Compaction specific private methods
|
* Compaction specific private methods
|
||||||
*/
|
*/
|
||||||
@@ -1297,22 +1062,6 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void finalizeWrite(HoodieTable<T> table, String instantTime, List<HoodieWriteStat> stats) {
|
|
||||||
try {
|
|
||||||
final Timer.Context finalizeCtx = metrics.getFinalizeCtx();
|
|
||||||
table.finalizeWrite(jsc, instantTime, stats);
|
|
||||||
if (finalizeCtx != null) {
|
|
||||||
Option<Long> durationInMs = Option.of(metrics.getDurationInMs(finalizeCtx.stop()));
|
|
||||||
durationInMs.ifPresent(duration -> {
|
|
||||||
LOG.info("Finalize write elapsed time (milliseconds): " + duration);
|
|
||||||
metrics.updateFinalizeWriteMetrics(duration, stats.size());
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch (HoodieIOException ioe) {
|
|
||||||
throw new HoodieCommitException("Failed to complete commit " + instantTime + " due to finalize errors.", ioe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file to the .requested file
|
* Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file to the .requested file
|
||||||
*
|
*
|
||||||
@@ -1373,59 +1122,4 @@ public class HoodieWriteClient<T extends HoodieRecordPayload> extends AbstractHo
|
|||||||
});
|
});
|
||||||
return compactionInstantTimeOpt;
|
return compactionInstantTimeOpt;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
private void updateMetadataAndRollingStats(String actionType, HoodieCommitMetadata metadata,
|
|
||||||
List<HoodieWriteStat> writeStats) {
|
|
||||||
// TODO : make sure we cannot rollback / archive last commit file
|
|
||||||
try {
|
|
||||||
// Create a Hoodie table which encapsulated the commits and files visible
|
|
||||||
HoodieTable table = HoodieTable.getHoodieTable(createMetaClient(true), config, jsc);
|
|
||||||
// 0. All of the rolling stat management is only done by the DELTA commit for MOR and COMMIT for COW other wise
|
|
||||||
// there may be race conditions
|
|
||||||
HoodieRollingStatMetadata rollingStatMetadata = new HoodieRollingStatMetadata(actionType);
|
|
||||||
// 1. Look up the previous compaction/commit and get the HoodieCommitMetadata from there.
|
|
||||||
// 2. Now, first read the existing rolling stats and merge with the result of current metadata.
|
|
||||||
|
|
||||||
// Need to do this on every commit (delta or commit) to support COW and MOR.
|
|
||||||
|
|
||||||
for (HoodieWriteStat stat : writeStats) {
|
|
||||||
String partitionPath = stat.getPartitionPath();
|
|
||||||
// TODO: why is stat.getPartitionPath() null at times here.
|
|
||||||
metadata.addWriteStat(partitionPath, stat);
|
|
||||||
HoodieRollingStat hoodieRollingStat = new HoodieRollingStat(stat.getFileId(),
|
|
||||||
stat.getNumWrites() - (stat.getNumUpdateWrites() - stat.getNumDeletes()), stat.getNumUpdateWrites(),
|
|
||||||
stat.getNumDeletes(), stat.getTotalWriteBytes());
|
|
||||||
rollingStatMetadata.addRollingStat(partitionPath, hoodieRollingStat);
|
|
||||||
}
|
|
||||||
// The last rolling stat should be present in the completed timeline
|
|
||||||
Option<HoodieInstant> lastInstant =
|
|
||||||
table.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
|
|
||||||
if (lastInstant.isPresent()) {
|
|
||||||
HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
|
|
||||||
table.getActiveTimeline().getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class);
|
|
||||||
Option<String> lastRollingStat = Option
|
|
||||||
.ofNullable(commitMetadata.getExtraMetadata().get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY));
|
|
||||||
if (lastRollingStat.isPresent()) {
|
|
||||||
rollingStatMetadata = rollingStatMetadata
|
|
||||||
.merge(HoodieCommitMetadata.fromBytes(lastRollingStat.get().getBytes(), HoodieRollingStatMetadata.class));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
metadata.addMetadata(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY, rollingStatMetadata.toJsonString());
|
|
||||||
} catch (IOException io) {
|
|
||||||
throw new HoodieCommitException("Unable to save rolling stats");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Refers to different operation types.
|
|
||||||
*/
|
|
||||||
enum OperationType {
|
|
||||||
INSERT,
|
|
||||||
INSERT_PREPPED,
|
|
||||||
UPSERT,
|
|
||||||
UPSERT_PREPPED,
|
|
||||||
DELETE,
|
|
||||||
BULK_INSERT,
|
|
||||||
BULK_INSERT_PREPPED
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user