1
0

[HUDI-2422] Adding rollback plan and rollback requested instant (#3651)

- This patch introduces rollback plan and rollback.requested instant. Rollback will be done in two phases, namely rollback plan and rollback action. In planning, we prepare the rollback plan and serialize it to rollback.requested. In the rollback action phase, we fetch details from the plan and just delete the files as per the plan. This will ensure final rollback commit metadata will contain all files that got rolled back even if rollback failed midway and retried again.
This commit is contained in:
Sivabalan Narayanan
2021-09-16 11:16:06 -04:00
committed by GitHub
parent 4deaa30c8d
commit b8dad628e5
35 changed files with 1048 additions and 419 deletions

View File

@@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
{
"namespace": "org.apache.hudi.avro.model",
"type": "record",
"name": "HoodieRollbackPlan",
"fields": [
{
"name": "instantToRollback",
"doc": "Hoodie instant that needs to be rolled back",
"type": ["null", "HoodieInstantInfo"],
"default": null
},
{
"name": "RollbackRequests",
"type":["null", {
"type":"array",
"items":{
"type": "record",
"name": "HoodieRollbackRequest",
"fields": [
{"name": "partitionPath", "type": "string"},
{"name": "fileId",
"type":["null", "string"],
"default": null
},
{"name": "latestBaseInstant",
"type":["null", "string"],
"default": null
},
{"name": "filesToBeDeleted",
"default": [],
"type": {
"type": "array",
"default": [],
"items": "string"
}
},
{"name": "logBlocksToBeDeleted",
"type": ["null", {
"type": "map",
"doc": "Log blocks that need to be deleted as part of the rollback",
"values": {
"type": "long",
"doc": "Size of this file/block in bytes"
}
}],
"default":null
}
]
}
}],
"default" : null
},
{
"name":"version",
"type":["int", "null"],
"default": 1
}
]
}

View File

@@ -24,6 +24,7 @@ import org.apache.hudi.common.function.SerializableConsumer;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import java.util.List;
import java.util.Map;
@@ -60,6 +61,9 @@ public abstract class HoodieEngineContext {
public abstract <I, K, V> List<V> mapToPairAndReduceByKey(
List<I> data, SerializablePairFunction<I, K, V> mapToPairFunc, SerializableBiFunction<V, V, V> reduceFunc, int parallelism);
public abstract <I, K, V> List<V> reduceByKey(
List<Pair<K, V>> data, SerializableBiFunction<V, V, V> reduceFunc, int parallelism);
public abstract <I, O> List<O> flatMap(List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism);
public abstract <I> void foreach(List<I> data, SerializableConsumer<I> consumer, int parallelism);

View File

@@ -30,6 +30,7 @@ import org.apache.hudi.common.util.collection.Pair;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -67,6 +68,16 @@ public final class HoodieLocalEngineContext extends HoodieEngineContext {
.collect(Collectors.toList());
}
@Override
public <I, K, V> List<V> reduceByKey(
List<Pair<K, V>> data, SerializableBiFunction<V, V, V> reduceFunc, int parallelism) {
return data.stream().parallel()
.collect(Collectors.groupingBy(p -> p.getKey())).values().stream()
.map(list -> list.stream().map(e -> e.getValue()).reduce(throwingReduceWrapper(reduceFunc)).orElse(null))
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
@Override
public <I, O> List<O> flatMap(List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism) {
return data.stream().parallel().flatMap(throwingFlatMapWrapper(func)).collect(toList());

View File

@@ -67,7 +67,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
CLEAN_EXTENSION, REQUESTED_CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION,
ROLLBACK_EXTENSION, INFLIGHT_ROLLBACK_EXTENSION,
ROLLBACK_EXTENSION, REQUESTED_ROLLBACK_EXTENSION, INFLIGHT_ROLLBACK_EXTENSION,
REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION));
private static final Logger LOG = LogManager.getLogger(HoodieActiveTimeline.class);
protected HoodieTableMetaClient metaClient;
@@ -229,6 +229,11 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
}
public Option<byte[]> readRollbackInfoAsBytes(HoodieInstant instant) {
// Rollback metadata are always stored only in timeline .hoodie
return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
}
//-----------------------------------------------------------------
// BEGIN - COMPACTION RELATED META-DATA MANAGEMENT.
//-----------------------------------------------------------------
@@ -339,6 +344,37 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
return inflight;
}
/**
* Transition Rollback State from inflight to Committed.
*
* @param inflightInstant Inflight instant
* @param data Extra Metadata
* @return commit instant
*/
public HoodieInstant transitionRollbackInflightToComplete(HoodieInstant inflightInstant, Option<byte[]> data) {
ValidationUtils.checkArgument(inflightInstant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION));
ValidationUtils.checkArgument(inflightInstant.isInflight());
HoodieInstant commitInstant = new HoodieInstant(State.COMPLETED, ROLLBACK_ACTION, inflightInstant.getTimestamp());
// Then write to timeline
transitionState(inflightInstant, commitInstant, data);
return commitInstant;
}
/**
* Transition Rollback State from requested to inflight.
*
* @param requestedInstant requested instant
* @param data Optional data to be stored
* @return commit instant
*/
public HoodieInstant transitionRollbackRequestedToInflight(HoodieInstant requestedInstant, Option<byte[]> data) {
ValidationUtils.checkArgument(requestedInstant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION));
ValidationUtils.checkArgument(requestedInstant.isRequested());
HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, ROLLBACK_ACTION, requestedInstant.getTimestamp());
transitionState(requestedInstant, inflight, data);
return inflight;
}
/**
* Transition replace requested file to replace inflight.
*
@@ -497,6 +533,13 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
createFileInMetaPath(instant.getFileName(), content, false);
}
public void saveToRollbackRequested(HoodieInstant instant, Option<byte[]> content) {
ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION));
ValidationUtils.checkArgument(instant.getState().equals(State.REQUESTED));
// Plan is stored in meta path
createFileInMetaPath(instant.getFileName(), content, false);
}
private void createFileInMetaPath(String filename, Option<byte[]> content, boolean allowOverwrite) {
Path fullPath = new Path(metaClient.getMetaPath(), filename);
if (allowOverwrite || metaClient.getTimelineLayoutVersion().isNullVersion()) {

View File

@@ -147,7 +147,8 @@ public class HoodieInstant implements Serializable, Comparable<HoodieInstant> {
: HoodieTimeline.makeCleanerFileName(timestamp);
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
return isInflight() ? HoodieTimeline.makeInflightRollbackFileName(timestamp)
: HoodieTimeline.makeRollbackFileName(timestamp);
: isRequested() ? HoodieTimeline.makeRequestedRollbackFileName(timestamp)
: HoodieTimeline.makeRollbackFileName(timestamp);
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
return isInflight() ? HoodieTimeline.makeInflightSavePointFileName(timestamp)
: HoodieTimeline.makeSavePointFileName(timestamp);

View File

@@ -73,6 +73,7 @@ public interface HoodieTimeline extends Serializable {
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String REQUESTED_CLEAN_EXTENSION = "." + CLEAN_ACTION + REQUESTED_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String REQUESTED_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + REQUESTED_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String REQUESTED_COMPACTION_SUFFIX = StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
String REQUESTED_COMPACTION_EXTENSION = StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
@@ -363,6 +364,10 @@ public interface HoodieTimeline extends Serializable {
return StringUtils.join(instant, HoodieTimeline.ROLLBACK_EXTENSION);
}
static String makeRequestedRollbackFileName(String instant) {
return StringUtils.join(instant, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION);
}
static String makeInflightRollbackFileName(String instant) {
return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
}

View File

@@ -28,6 +28,7 @@ import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
import org.apache.hudi.avro.model.HoodieRestoreMetadata;
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
import org.apache.hudi.avro.model.HoodieRollbackPlan;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.avro.model.HoodieSavepointPartitionMetadata;
import org.apache.hudi.common.HoodieRollbackStat;
@@ -109,6 +110,10 @@ public class TimelineMetadataUtils {
return serializeAvroMetadata(cleanPlan, HoodieCleanerPlan.class);
}
public static Option<byte[]> serializeRollbackPlan(HoodieRollbackPlan rollbackPlan) throws IOException {
return serializeAvroMetadata(rollbackPlan, HoodieRollbackPlan.class);
}
public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata) throws IOException {
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
}