1
0

[HUDI-839] Introducing support for rollbacks using marker files (#1756)

* [HUDI-839] Introducing rollback strategy using marker files

 - Adds a new mechanism for rollbacks where it's based on the marker files generated during the write
 - Consequently, marker file/dir deletion now happens post commit, instead of during finalize 
 - Marker files are also generated for AppendHandle, making it consistent throughout the write path 
 - Until upgrade-downgrade mechanism can upgrade non-marker based inflight writes to marker based, this should only be turned on for new datasets.
 - Added marker dir deletion after successful commit/rollback, individual files are not deleted during finalize
 - Fail safe for deleting marker directories, now during timeline archival process
 - Added check to ensure completed instants are not rolled back using marker based strategy. This will be incorrect
 - Reworked tests to rollback inflight instants, instead of completed instants whenever necessary
 - Added an unit test for MarkerBasedRollbackStrategy


Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
lw0090
2020-07-21 13:41:42 +08:00
committed by GitHub
parent b71f25f210
commit 1ec89e9a94
43 changed files with 1947 additions and 512 deletions

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.common;
import org.apache.hadoop.fs.FileStatus;
import java.io.Serializable;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@@ -85,6 +86,15 @@ public class HoodieRollbackStat implements Serializable {
return this;
}
public Builder withDeletedFileResult(String fileName, boolean isDeleted) {
if (isDeleted) {
successDeleteFiles = Collections.singletonList(fileName);
} else {
failedDeleteFiles = Collections.singletonList(fileName);
}
return this;
}
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
this.commandBlocksCount = commandBlocksCount;
return this;
@@ -96,6 +106,15 @@ public class HoodieRollbackStat implements Serializable {
}
public HoodieRollbackStat build() {
if (successDeleteFiles == null) {
successDeleteFiles = Collections.EMPTY_LIST;
}
if (failedDeleteFiles == null) {
failedDeleteFiles = Collections.EMPTY_LIST;
}
if (commandBlocksCount == null) {
commandBlocksCount = Collections.EMPTY_MAP;
}
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
}
}

View File

@@ -24,7 +24,6 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
@@ -46,7 +45,6 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
@@ -116,22 +114,6 @@ public class FSUtils {
return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, fileExtension);
}
public static String makeMarkerFile(String instantTime, String writeToken, String fileId) {
return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, HoodieTableMetaClient.MARKER_EXTN);
}
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs,
String baseFileExtension) {
ValidationUtils.checkArgument(markerPath.endsWith(HoodieTableMetaClient.MARKER_EXTN));
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
int begin = markerPath.indexOf(markerRootPath);
ValidationUtils.checkArgument(begin >= 0,
"Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
return String.format("%s/%s%s", basePath, rPath.replace(HoodieTableMetaClient.MARKER_EXTN, ""), baseFileExtension);
}
public static String maskWithoutFileId(String instantTime, int taskPartitionId) {
return String.format("*_%s_%s%s", taskPartitionId, instantTime, HoodieFileFormat.PARQUET.getFileExtension());
}
@@ -171,15 +153,15 @@ public class FSUtils {
/**
* Given a base partition and a partition path, return relative path of partition path to the base path.
*/
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
public static String getRelativePartitionPath(Path basePath, Path fullPartitionPath) {
basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
String partitionFullPath = partitionPath.toString();
int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
fullPartitionPath = Path.getPathWithoutSchemeAndAuthority(fullPartitionPath);
String fullPartitionPathStr = fullPartitionPath.toString();
int partitionStartIndex = fullPartitionPathStr.indexOf(basePath.getName(),
basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
// Partition-Path could be empty for non-partitioned tables
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
: partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
return partitionStartIndex + basePath.getName().length() == fullPartitionPathStr.length() ? ""
: fullPartitionPathStr.substring(partitionStartIndex + basePath.getName().length() + 1);
}
/**
@@ -199,19 +181,6 @@ public class FSUtils {
return partitions;
}
public static List<String> getAllDataFilesForMarkers(FileSystem fs, String basePath, String instantTs,
String markerDir, String baseFileExtension) throws IOException {
List<String> dataFiles = new LinkedList<>();
processFiles(fs, markerDir, (status) -> {
String pathStr = status.getPath().toString();
if (pathStr.endsWith(HoodieTableMetaClient.MARKER_EXTN)) {
dataFiles.add(FSUtils.translateMarkerToDataPath(basePath, pathStr, instantTs, baseFileExtension));
}
return true;
}, false);
return dataFiles;
}
/**
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its subdirs
* are skipped
@@ -222,8 +191,8 @@ public class FSUtils {
* @param excludeMetaFolder Exclude .hoodie folder
* @throws IOException -
*/
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
boolean excludeMetaFolder) throws IOException {
public static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
boolean excludeMetaFolder) throws IOException {
PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
for (FileStatus child : topLevelStatuses) {
@@ -390,7 +359,7 @@ public class FSUtils {
public static boolean isLogFile(Path logPath) {
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
return matcher.find();
return matcher.find() && logPath.getName().contains(".log");
}
/**
@@ -501,18 +470,6 @@ public class FSUtils {
}
}
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
// TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException("Could not delete restore meta files " + s.getFileName(), e);
}
});
}
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
if (!fs.exists(partitionPath)) {
fs.mkdirs(partitionPath);
@@ -535,8 +492,8 @@ public class FSUtils {
/**
* Get DFS full partition path (e.g. hdfs://ip-address:8020:/<absolute path>)
*/
public static String getDFSFullPartitionPath(FileSystem fs, Path partitionPath) {
return fs.getUri() + partitionPath.toUri().getRawPath();
public static String getDFSFullPartitionPath(FileSystem fs, Path fullPartitionPath) {
return fs.getUri() + fullPartitionPath.toUri().getRawPath();
}
/**