1
0

[HUDI-2305] Add MARKERS.type and fix marker-based rollback (#3472)

- Rollback infers the directory structure and does rollback based on the strategy used while markers were written. "write markers type" in write config is used to determine marker strategy only for new writes.
This commit is contained in:
Y Ethan Guo
2021-08-14 05:18:49 -07:00
committed by GitHub
parent b7da6cb33d
commit 9056c68744
17 changed files with 403 additions and 143 deletions

View File

@@ -23,14 +23,15 @@ import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.marker.MarkerType;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.testutils.FileSystemTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.MarkerUtils;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
@@ -40,16 +41,15 @@ import org.apache.spark.api.java.JavaSparkContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
TimelineService timelineService;
@@ -94,26 +94,10 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
@Override
void verifyMarkersInFileSystem() throws IOException {
List<String> allMarkers = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
.stream().filter(status -> status.getPath().getName().contains(MarkerDirState.MARKERS_FILENAME_PREFIX))
.flatMap(status -> {
// Read all markers stored in each marker file maintained by the timeline service
FSDataInputStream fsDataInputStream = null;
BufferedReader bufferedReader = null;
List<String> markers = null;
try {
fsDataInputStream = fs.open(status.getPath());
bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream, StandardCharsets.UTF_8));
markers = bufferedReader.lines().collect(Collectors.toList());
} catch (IOException e) {
e.printStackTrace();
} finally {
closeQuietly(bufferedReader);
closeQuietly(fsDataInputStream);
}
return markers.stream();
})
.sorted()
// Verifies the markers
List<String> allMarkers = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
markerFolderPath.toString(), fs, context, 1)
.values().stream().flatMap(Collection::stream).sorted()
.collect(Collectors.toList());
assertEquals(3, allMarkers.size());
assertIterableEquals(CollectionUtils.createImmutableList(
@@ -121,6 +105,13 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
"2020/06/02/file2.marker.APPEND",
"2020/06/03/file3.marker.CREATE"),
allMarkers);
// Verifies the marker type file
Path markerTypeFilePath = new Path(markerFolderPath, MarkerUtils.MARKER_TYPE_FILENAME);
assertTrue(MarkerUtils.doesMarkerTypeFileExist(fs, markerFolderPath.toString()));
FSDataInputStream fsDataInputStream = fs.open(markerTypeFilePath);
assertEquals(MarkerType.TIMELINE_SERVER_BASED.toString(),
FileIOUtils.readAsUTFString(fsDataInputStream));
closeQuietly(fsDataInputStream);
}
/**

View File

@@ -24,6 +24,7 @@ import org.apache.hudi.common.model.IOType;
import org.apache.hudi.common.testutils.FileSystemTestUtils;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.MarkerUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hadoop.fs.FileSystem;
@@ -100,8 +101,10 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
createSomeMarkers();
// add invalid file
createInvalidFile("2020/06/01", "invalid_file3");
int fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).size();
assertEquals(fileSize,4);
long fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).stream()
.filter(fileStatus -> !fileStatus.getPath().getName().contains(MarkerUtils.MARKER_TYPE_FILENAME))
.count();
assertEquals(fileSize, 4);
// then
assertIterableEquals(CollectionUtils.createImmutableList(
@@ -118,7 +121,9 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
// then
assertIterableEquals(CollectionUtils.createImmutableList("2020/06/01/file1.marker.MERGE",
"2020/06/02/file2.marker.APPEND", "2020/06/03/file3.marker.CREATE"),
writeMarkers.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
writeMarkers.allMarkerFilePaths().stream()
.filter(path -> !path.contains(MarkerUtils.MARKER_TYPE_FILENAME))
.sorted().collect(Collectors.toList())
);
}