[HUDI-839] Introducing support for rollbacks using marker files (#1756)
* [HUDI-839] Introducing rollback strategy using marker files - Adds a new mechanism for rollbacks where it's based on the marker files generated during the write - Consequently, marker file/dir deletion now happens post commit, instead of during finalize - Marker files are also generated for AppendHandle, making it consistent throughout the write path - Until upgrade-downgrade mechanism can upgrade non-marker based inflight writes to marker based, this should only be turned on for new datasets. - Added marker dir deletion after successful commit/rollback, individual files are not deleted during finalize - Fail safe for deleting marker directories, now during timeline archival process - Added check to ensure completed instants are not rolled back using marker based strategy. This will be incorrect - Reworked tests to rollback inflight instants, instead of completed instants whenever necessary - Added an unit test for MarkerBasedRollbackStrategy Co-authored-by: Vinoth Chandar <vinoth@apache.org>
This commit is contained in:
@@ -21,6 +21,7 @@ package org.apache.hudi.common;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -85,6 +86,15 @@ public class HoodieRollbackStat implements Serializable {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withDeletedFileResult(String fileName, boolean isDeleted) {
|
||||
if (isDeleted) {
|
||||
successDeleteFiles = Collections.singletonList(fileName);
|
||||
} else {
|
||||
failedDeleteFiles = Collections.singletonList(fileName);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.commandBlocksCount = commandBlocksCount;
|
||||
return this;
|
||||
@@ -96,6 +106,15 @@ public class HoodieRollbackStat implements Serializable {
|
||||
}
|
||||
|
||||
public HoodieRollbackStat build() {
|
||||
if (successDeleteFiles == null) {
|
||||
successDeleteFiles = Collections.EMPTY_LIST;
|
||||
}
|
||||
if (failedDeleteFiles == null) {
|
||||
failedDeleteFiles = Collections.EMPTY_LIST;
|
||||
}
|
||||
if (commandBlocksCount == null) {
|
||||
commandBlocksCount = Collections.EMPTY_MAP;
|
||||
}
|
||||
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ import org.apache.hudi.common.model.HoodiePartitionMetadata;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
@@ -46,7 +45,6 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
@@ -116,22 +114,6 @@ public class FSUtils {
|
||||
return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, fileExtension);
|
||||
}
|
||||
|
||||
public static String makeMarkerFile(String instantTime, String writeToken, String fileId) {
|
||||
return String.format("%s_%s_%s%s", fileId, writeToken, instantTime, HoodieTableMetaClient.MARKER_EXTN);
|
||||
}
|
||||
|
||||
public static String translateMarkerToDataPath(String basePath, String markerPath, String instantTs,
|
||||
String baseFileExtension) {
|
||||
ValidationUtils.checkArgument(markerPath.endsWith(HoodieTableMetaClient.MARKER_EXTN));
|
||||
String markerRootPath = Path.getPathWithoutSchemeAndAuthority(
|
||||
new Path(String.format("%s/%s/%s", basePath, HoodieTableMetaClient.TEMPFOLDER_NAME, instantTs))).toString();
|
||||
int begin = markerPath.indexOf(markerRootPath);
|
||||
ValidationUtils.checkArgument(begin >= 0,
|
||||
"Not in marker dir. Marker Path=" + markerPath + ", Expected Marker Root=" + markerRootPath);
|
||||
String rPath = markerPath.substring(begin + markerRootPath.length() + 1);
|
||||
return String.format("%s/%s%s", basePath, rPath.replace(HoodieTableMetaClient.MARKER_EXTN, ""), baseFileExtension);
|
||||
}
|
||||
|
||||
public static String maskWithoutFileId(String instantTime, int taskPartitionId) {
|
||||
return String.format("*_%s_%s%s", taskPartitionId, instantTime, HoodieFileFormat.PARQUET.getFileExtension());
|
||||
}
|
||||
@@ -171,15 +153,15 @@ public class FSUtils {
|
||||
/**
|
||||
* Given a base partition and a partition path, return relative path of partition path to the base path.
|
||||
*/
|
||||
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
|
||||
public static String getRelativePartitionPath(Path basePath, Path fullPartitionPath) {
|
||||
basePath = Path.getPathWithoutSchemeAndAuthority(basePath);
|
||||
partitionPath = Path.getPathWithoutSchemeAndAuthority(partitionPath);
|
||||
String partitionFullPath = partitionPath.toString();
|
||||
int partitionStartIndex = partitionFullPath.indexOf(basePath.getName(),
|
||||
fullPartitionPath = Path.getPathWithoutSchemeAndAuthority(fullPartitionPath);
|
||||
String fullPartitionPathStr = fullPartitionPath.toString();
|
||||
int partitionStartIndex = fullPartitionPathStr.indexOf(basePath.getName(),
|
||||
basePath.getParent() == null ? 0 : basePath.getParent().toString().length());
|
||||
// Partition-Path could be empty for non-partitioned tables
|
||||
return partitionStartIndex + basePath.getName().length() == partitionFullPath.length() ? ""
|
||||
: partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
return partitionStartIndex + basePath.getName().length() == fullPartitionPathStr.length() ? ""
|
||||
: fullPartitionPathStr.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -199,19 +181,6 @@ public class FSUtils {
|
||||
return partitions;
|
||||
}
|
||||
|
||||
public static List<String> getAllDataFilesForMarkers(FileSystem fs, String basePath, String instantTs,
|
||||
String markerDir, String baseFileExtension) throws IOException {
|
||||
List<String> dataFiles = new LinkedList<>();
|
||||
processFiles(fs, markerDir, (status) -> {
|
||||
String pathStr = status.getPath().toString();
|
||||
if (pathStr.endsWith(HoodieTableMetaClient.MARKER_EXTN)) {
|
||||
dataFiles.add(FSUtils.translateMarkerToDataPath(basePath, pathStr, instantTs, baseFileExtension));
|
||||
}
|
||||
return true;
|
||||
}, false);
|
||||
return dataFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its subdirs
|
||||
* are skipped
|
||||
@@ -222,8 +191,8 @@ public class FSUtils {
|
||||
* @param excludeMetaFolder Exclude .hoodie folder
|
||||
* @throws IOException -
|
||||
*/
|
||||
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
|
||||
boolean excludeMetaFolder) throws IOException {
|
||||
public static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
|
||||
boolean excludeMetaFolder) throws IOException {
|
||||
PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
|
||||
FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
|
||||
for (FileStatus child : topLevelStatuses) {
|
||||
@@ -390,7 +359,7 @@ public class FSUtils {
|
||||
|
||||
public static boolean isLogFile(Path logPath) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
|
||||
return matcher.find();
|
||||
return matcher.find() && logPath.getName().contains(".log");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -501,18 +470,6 @@ public class FSUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static void deleteOlderRestoreMetaFiles(FileSystem fs, String metaPath, Stream<HoodieInstant> instants) {
|
||||
// TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_ROLLBACK_TO_KEEP and delete rest
|
||||
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not delete restore meta files " + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
|
||||
if (!fs.exists(partitionPath)) {
|
||||
fs.mkdirs(partitionPath);
|
||||
@@ -535,8 +492,8 @@ public class FSUtils {
|
||||
/**
|
||||
* Get DFS full partition path (e.g. hdfs://ip-address:8020:/<absolute path>)
|
||||
*/
|
||||
public static String getDFSFullPartitionPath(FileSystem fs, Path partitionPath) {
|
||||
return fs.getUri() + partitionPath.toUri().getRawPath();
|
||||
public static String getDFSFullPartitionPath(FileSystem fs, Path fullPartitionPath) {
|
||||
return fs.getUri() + fullPartitionPath.toUri().getRawPath();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -18,6 +18,10 @@
|
||||
|
||||
package org.apache.hudi.common.testutils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hudi.common.fs.inline.InLineFSUtils;
|
||||
import org.apache.hudi.common.fs.inline.InLineFileSystem;
|
||||
import org.apache.hudi.common.fs.inline.InMemoryFileSystem;
|
||||
@@ -26,6 +30,8 @@ import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
|
||||
@@ -67,4 +73,13 @@ public class FileSystemTestUtils {
|
||||
throw new IOException(message);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<FileStatus> listRecursive(FileSystem fs, Path path) throws IOException {
|
||||
RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, true);
|
||||
List<FileStatus> statuses = new ArrayList<>();
|
||||
while (itr.hasNext()) {
|
||||
statuses.add(itr.next());
|
||||
}
|
||||
return statuses;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
||||
import org.apache.hudi.common.table.view.SyncableFileSystemView;
|
||||
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -77,6 +78,17 @@ public class HoodieCommonTestHarness {
|
||||
return getFileSystemView(timeline, true);
|
||||
}
|
||||
|
||||
protected SyncableFileSystemView getFileSystemViewWithUnCommittedSlices(HoodieTableMetaClient metaClient) {
|
||||
try {
|
||||
return new HoodieTableFileSystemView(metaClient,
|
||||
metaClient.getActiveTimeline(),
|
||||
HoodieTestUtils.listAllDataFilesAndLogFilesInPath(metaClient.getFs(), metaClient.getBasePath())
|
||||
);
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieIOException("Error getting file system view", ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a default {@link HoodieTableType#COPY_ON_WRITE} table type. Sub-classes can override this method to specify a
|
||||
* new table type.
|
||||
|
||||
@@ -18,6 +18,12 @@
|
||||
|
||||
package org.apache.hudi.common.testutils;
|
||||
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.avro.model.HoodieActionInstant;
|
||||
import org.apache.hudi.avro.model.HoodieCleanMetadata;
|
||||
@@ -60,12 +66,6 @@ import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
@@ -268,12 +268,6 @@ public class HoodieTestUtils {
|
||||
return createDataFileFixLength(basePath, partitionPath, instantTime, fileID, length);
|
||||
}
|
||||
|
||||
public static String createNewMarkerFile(String basePath, String partitionPath, String instantTime)
|
||||
throws IOException {
|
||||
String fileID = UUID.randomUUID().toString();
|
||||
return createMarkerFile(basePath, partitionPath, instantTime, fileID);
|
||||
}
|
||||
|
||||
public static String createDataFile(String basePath, String partitionPath, String instantTime, String fileID)
|
||||
throws IOException {
|
||||
String folderPath = basePath + "/" + partitionPath + "/";
|
||||
@@ -294,16 +288,6 @@ public class HoodieTestUtils {
|
||||
return fileID;
|
||||
}
|
||||
|
||||
public static String createMarkerFile(String basePath, String partitionPath, String instantTime, String fileID)
|
||||
throws IOException {
|
||||
String folderPath =
|
||||
basePath + "/" + HoodieTableMetaClient.TEMPFOLDER_NAME + "/" + instantTime + "/" + partitionPath + "/";
|
||||
new File(folderPath).mkdirs();
|
||||
File f = new File(folderPath + FSUtils.makeMarkerFile(instantTime, DEFAULT_WRITE_TOKEN, fileID));
|
||||
f.createNewFile();
|
||||
return f.getAbsolutePath();
|
||||
}
|
||||
|
||||
public static String createNewLogFile(FileSystem fs, String basePath, String partitionPath, String instantTime,
|
||||
String fileID, Option<Integer> version) throws IOException {
|
||||
String folderPath = basePath + "/" + partitionPath + "/";
|
||||
@@ -465,7 +449,7 @@ public class HoodieTestUtils {
|
||||
|
||||
// TODO: should be removed
|
||||
public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath) throws IOException {
|
||||
return listAllDataFilesInPath(fs, basePath, ".parquet");
|
||||
return listAllDataFilesInPath(fs, basePath, HoodieFileFormat.PARQUET.getFileExtension());
|
||||
}
|
||||
|
||||
public static FileStatus[] listAllDataFilesInPath(FileSystem fs, String basePath, String datafileExtension)
|
||||
@@ -474,26 +458,31 @@ public class HoodieTestUtils {
|
||||
List<FileStatus> returns = new ArrayList<>();
|
||||
while (itr.hasNext()) {
|
||||
LocatedFileStatus status = itr.next();
|
||||
if (status.getPath().getName().contains(datafileExtension)) {
|
||||
if (status.getPath().getName().contains(datafileExtension) && !status.getPath().getName().contains(".marker")) {
|
||||
returns.add(status);
|
||||
}
|
||||
}
|
||||
return returns.toArray(new FileStatus[returns.size()]);
|
||||
}
|
||||
|
||||
public static FileStatus[] listAllLogFilesInPath(FileSystem fs, String basePath, String logfileExtension)
|
||||
public static FileStatus[] listAllLogFilesInPath(FileSystem fs, String basePath)
|
||||
throws IOException {
|
||||
RemoteIterator<LocatedFileStatus> itr = fs.listFiles(new Path(basePath), true);
|
||||
List<FileStatus> returns = new ArrayList<>();
|
||||
while (itr.hasNext()) {
|
||||
LocatedFileStatus status = itr.next();
|
||||
if (status.getPath().getName().contains(logfileExtension)) {
|
||||
if (status.getPath().getName().contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())) {
|
||||
returns.add(status);
|
||||
}
|
||||
}
|
||||
return returns.toArray(new FileStatus[returns.size()]);
|
||||
}
|
||||
|
||||
public static FileStatus[] listAllDataFilesAndLogFilesInPath(FileSystem fs, String basePath) throws IOException {
|
||||
return Stream.concat(Arrays.stream(listAllDataFilesInPath(fs, basePath)), Arrays.stream(listAllLogFilesInPath(fs, basePath)))
|
||||
.toArray(FileStatus[]::new);
|
||||
}
|
||||
|
||||
public static List<String> monotonicIncreasingCommitTimestamps(int numTimestamps, int startSecsDelta) {
|
||||
Calendar cal = Calendar.getInstance();
|
||||
cal.add(Calendar.SECOND, startSecsDelta);
|
||||
|
||||
Reference in New Issue
Block a user