1
0

Implement Savepoints and required metadata timeline (#86)

- Introduce avro to save clean metadata with details about the last commit that was retained
- Save rollback metadata in the meta timeline
- Create savepoint metadata and add API to createSavepoint, deleteSavepoint and rollbackToSavepoint
- Savepointed commit should not be rolledback or cleaned or archived
- introduce cli commands to show, create and rollback to savepoints
- Write unit tests to test savepoints and rollbackToSavepoints
This commit is contained in:
prazanna
2017-03-13 15:12:03 -07:00
committed by GitHub
parent 69d3950a32
commit 6f36e1eaaf
27 changed files with 1423 additions and 130 deletions

View File

@@ -0,0 +1,24 @@
{"namespace": "com.uber.hoodie.avro.model",
"type": "record",
"name": "HoodieCleanMetadata",
"fields": [
{"name": "startCleanTime", "type": "string"},
{"name": "timeTakenInMillis", "type": "long"},
{"name": "totalFilesDeleted", "type": "int"},
{"name": "earliestCommitToRetain", "type": "string"},
{"name": "partitionMetadata", "type": {
"type" : "map", "values" : {
"type": "record",
"name": "HoodieCleanPartitionMetadata",
"fields": [
{"name": "partitionPath", "type": "string"},
{"name": "policy", "type": "string"},
{"name": "deletePathPatterns", "type": {"type": "array", "items": "string"}},
{"name": "successDeleteFiles", "type": {"type": "array", "items": "string"}},
{"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}}
]
}
}
}
]
}

View File

@@ -0,0 +1,22 @@
{"namespace": "com.uber.hoodie.avro.model",
"type": "record",
"name": "HoodieRollbackMetadata",
"fields": [
{"name": "startRollbackTime", "type": "string"},
{"name": "timeTakenInMillis", "type": "long"},
{"name": "totalFilesDeleted", "type": "int"},
{"name": "commitsRollback", "type": {"type": "array", "items": "string"}},
{"name": "partitionMetadata", "type": {
"type" : "map", "values" : {
"type": "record",
"name": "HoodieRollbackPartitionMetadata",
"fields": [
{"name": "partitionPath", "type": "string"},
{"name": "successDeleteFiles", "type": {"type": "array", "items": "string"}},
{"name": "failedDeleteFiles", "type": {"type": "array", "items": "string"}}
]
}
}
}
]
}

View File

@@ -0,0 +1,9 @@
{"namespace": "com.uber.hoodie.avro.model",
"type": "record",
"name": "HoodieSavepointMetadata",
"fields": [
{"name": "savepointedBy", "type": "string"},
{"name": "savepointedAt", "type": "string"},
{"name": "comments", "type": "string"}
]
}

View File

@@ -0,0 +1,128 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common;
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import org.apache.hadoop.fs.FileStatus;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
/**
* Collects stats about a single partition clean operation
*/
public class HoodieCleanStat implements Serializable {
// Policy used
private final HoodieCleaningPolicy policy;
// Partition path cleaned
private final String partitionPath;
// The patterns that were generated for the delete operation
private final List<String> deletePathPatterns;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
// Earliest commit that was retained in this clean
private final String earliestCommitToRetain;
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
List<String> deletePathPatterns, List<String> successDeleteFiles,
List<String> failedDeleteFiles, String earliestCommitToRetain) {
this.policy = policy;
this.partitionPath = partitionPath;
this.deletePathPatterns = deletePathPatterns;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
this.earliestCommitToRetain = earliestCommitToRetain;
}
public HoodieCleaningPolicy getPolicy() {
return policy;
}
public String getPartitionPath() {
return partitionPath;
}
public List<String> getDeletePathPatterns() {
return deletePathPatterns;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
}
public String getEarliestCommitToRetain() {
return earliestCommitToRetain;
}
public static HoodieCleanStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private HoodieCleaningPolicy policy;
private List<String> deletePathPatterns;
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private String partitionPath;
private String earliestCommitToRetain;
public Builder withPolicy(HoodieCleaningPolicy policy) {
this.policy = policy;
return this;
}
public Builder withDeletePathPattern(List<String> deletePathPatterns) {
this.deletePathPatterns = deletePathPatterns;
return this;
}
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
//noinspection Convert2MethodRef
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
return this;
}
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
earliestCommitToRetain.get().getTimestamp() :
"-1";
return this;
}
public HoodieCleanStat build() {
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
}
}
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common;
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import org.apache.hadoop.fs.FileStatus;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Collects stats about a single partition clean operation
*/
public class HoodieRollbackStat implements Serializable {
// Partition path
private final String partitionPath;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
List<String> failedDeleteFiles) {
this.partitionPath = partitionPath;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
}
public String getPartitionPath() {
return partitionPath;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
}
public static HoodieRollbackStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private String partitionPath;
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
//noinspection Convert2MethodRef
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
return this;
}
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public HoodieRollbackStat build() {
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles);
}
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
public enum HoodieCleaningPolicy {
KEEP_LATEST_FILE_VERSIONS,
KEEP_LATEST_COMMITS
}

View File

@@ -41,6 +41,7 @@ public interface HoodieTimeline extends Serializable {
String COMMIT_ACTION = "commit";
String DELTA_COMMIT_ACTION = "deltacommit";
String CLEAN_ACTION = "clean";
String ROLLBACK_ACTION = "rollback";
String SAVEPOINT_ACTION = "savepoint";
String COMPACTION_ACTION = "compaction";
String INFLIGHT_EXTENSION = ".inflight";
@@ -48,12 +49,14 @@ public interface HoodieTimeline extends Serializable {
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
//this is to preserve backwards compatibility on commit in-flight filenames
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
@@ -191,6 +194,14 @@ public interface HoodieTimeline extends Serializable {
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
}
static String makeRollbackFileName(String instant) {
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
}
static String makeInflightRollbackFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
}
static String makeInflightSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
}

View File

@@ -87,8 +87,9 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
this(fs, metaPath,
new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION, COMPACTION_EXTENSION});
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
}
/**
@@ -160,6 +161,16 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the rollback action (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getRollbackTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the save point action (inflight and completed) in the active timeline
*

View File

@@ -89,6 +89,10 @@ public class HoodieInstant implements Serializable {
return isInflight ?
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
HoodieTimeline.makeCleanerFileName(timestamp);
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightRollbackFileName(timestamp) :
HoodieTimeline.makeRollbackFileName(timestamp);
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightSavePointFileName(timestamp) :

View File

@@ -16,24 +16,45 @@
package com.uber.hoodie.common.util;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
import com.uber.hoodie.avro.model.HoodieRollbackPartitionMetadata;
import com.uber.hoodie.avro.model.HoodieSavepointMetadata;
import com.uber.hoodie.common.HoodieCleanStat;
import com.uber.hoodie.common.HoodieRollbackStat;
import com.uber.hoodie.common.model.HoodieAvroPayload;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.FileReader;
import org.apache.avro.file.SeekableByteArrayInput;
import org.apache.avro.file.SeekableInput;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hadoop.fs.AvroFSInput;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.ByteArrayOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
@@ -68,4 +89,85 @@ public class AvroUtils {
});
return loadedRecords;
}
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
stat.getDeletePathPatterns());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
}
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
int totalDeleted = 0;
for (HoodieRollbackStat stat : stats) {
HoodieRollbackPartitionMetadata metadata =
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
}
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, commits, partitionMetadataBuilder.build());
}
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
}
public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
}
public static Optional<byte[]> serializeRollbackMetadata(
HoodieRollbackMetadata rollbackMetadata) throws IOException {
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
}
public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
Class<T> clazz) throws IOException {
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
fileWriter.create(metadata.getSchema(), baos);
fileWriter.append(metadata);
fileWriter.flush();
return Optional.of(baos.toByteArray());
}
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
throws IOException {
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
}
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
Class<T> clazz) throws IOException {
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
FileReader<T> fileReader =
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
Preconditions
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
return fileReader.next();
}
}

View File

@@ -19,6 +19,7 @@ package com.uber.hoodie.common.util;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.log.HoodieLogFile;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.InvalidHoodiePathException;
import org.apache.hadoop.conf.Configuration;
@@ -49,6 +50,8 @@ public class FSUtils {
// Log files are of this pattern - b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.avro.delta.1
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("(.*)_(.*)\\.(.*)\\.(.*)\\.([0-9]*)");
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
private static final long MIN_ROLLBACK_TO_KEEP = 10;
public static FileSystem getFs() {
Configuration conf = new Configuration();
@@ -305,4 +308,31 @@ public class FSUtils {
}
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_CLEAN_TO_KEEP and delete rest
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
e);
}
});
}
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException(
"Could not delete rollback meta files " + s.getFileName(), e);
}
});
}
}