[HUDI-2305] Add MARKERS.type and fix marker-based rollback (#3472)
- Rollback infers the directory structure and does rollback based on the strategy used while markers were written. "write markers type" in write config is used to determine marker strategy only for new writes.
This commit is contained in:
@@ -23,14 +23,15 @@ import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewManager;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.FileIOUtils;
|
||||
import org.apache.hudi.common.util.MarkerUtils;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
import org.apache.hudi.timeline.service.TimelineService;
|
||||
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
@@ -40,16 +41,15 @@ import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
TimelineService timelineService;
|
||||
@@ -94,26 +94,10 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
|
||||
@Override
|
||||
void verifyMarkersInFileSystem() throws IOException {
|
||||
List<String> allMarkers = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
|
||||
.stream().filter(status -> status.getPath().getName().contains(MarkerDirState.MARKERS_FILENAME_PREFIX))
|
||||
.flatMap(status -> {
|
||||
// Read all markers stored in each marker file maintained by the timeline service
|
||||
FSDataInputStream fsDataInputStream = null;
|
||||
BufferedReader bufferedReader = null;
|
||||
List<String> markers = null;
|
||||
try {
|
||||
fsDataInputStream = fs.open(status.getPath());
|
||||
bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream, StandardCharsets.UTF_8));
|
||||
markers = bufferedReader.lines().collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
closeQuietly(bufferedReader);
|
||||
closeQuietly(fsDataInputStream);
|
||||
}
|
||||
return markers.stream();
|
||||
})
|
||||
.sorted()
|
||||
// Verifies the markers
|
||||
List<String> allMarkers = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
|
||||
markerFolderPath.toString(), fs, context, 1)
|
||||
.values().stream().flatMap(Collection::stream).sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(3, allMarkers.size());
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
@@ -121,6 +105,13 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
"2020/06/02/file2.marker.APPEND",
|
||||
"2020/06/03/file3.marker.CREATE"),
|
||||
allMarkers);
|
||||
// Verifies the marker type file
|
||||
Path markerTypeFilePath = new Path(markerFolderPath, MarkerUtils.MARKER_TYPE_FILENAME);
|
||||
assertTrue(MarkerUtils.doesMarkerTypeFileExist(fs, markerFolderPath.toString()));
|
||||
FSDataInputStream fsDataInputStream = fs.open(markerTypeFilePath);
|
||||
assertEquals(MarkerType.TIMELINE_SERVER_BASED.toString(),
|
||||
FileIOUtils.readAsUTFString(fsDataInputStream));
|
||||
closeQuietly(fsDataInputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.MarkerUtils;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -100,8 +101,10 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
|
||||
createSomeMarkers();
|
||||
// add invalid file
|
||||
createInvalidFile("2020/06/01", "invalid_file3");
|
||||
int fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).size();
|
||||
assertEquals(fileSize,4);
|
||||
long fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).stream()
|
||||
.filter(fileStatus -> !fileStatus.getPath().getName().contains(MarkerUtils.MARKER_TYPE_FILENAME))
|
||||
.count();
|
||||
assertEquals(fileSize, 4);
|
||||
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
@@ -118,7 +121,9 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList("2020/06/01/file1.marker.MERGE",
|
||||
"2020/06/02/file2.marker.APPEND", "2020/06/03/file3.marker.CREATE"),
|
||||
writeMarkers.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
|
||||
writeMarkers.allMarkerFilePaths().stream()
|
||||
.filter(path -> !path.contains(MarkerUtils.MARKER_TYPE_FILENAME))
|
||||
.sorted().collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user