[HUDI-2305] Add MARKERS.type and fix marker-based rollback (#3472)
- Rollback infers the directory structure and does rollback based on the strategy used while markers were written. "write markers type" in write config is used to determine marker strategy only for new writes.
This commit is contained in:
@@ -33,15 +33,14 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.marker.MarkerBasedRollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -58,11 +57,11 @@ public class SparkMarkerBasedRollbackStrategy<T extends HoodieRecordPayload> ext
|
||||
public List<HoodieRollbackStat> execute(HoodieInstant instantToRollback) {
|
||||
JavaSparkContext jsc = HoodieSparkEngineContext.getSparkContext(context);
|
||||
try {
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, instantToRollback.getTimestamp());
|
||||
List<String> markerFilePaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
int parallelism = Math.max(Math.min(markerFilePaths.size(), config.getRollbackParallelism()), 1);
|
||||
List<String> markerPaths = MarkerBasedRollbackUtils.getAllMarkerPaths(
|
||||
table, context, instantToRollback.getTimestamp(), config.getRollbackParallelism());
|
||||
int parallelism = Math.max(Math.min(markerPaths.size(), config.getRollbackParallelism()), 1);
|
||||
jsc.setJobGroup(this.getClass().getSimpleName(), "Rolling back using marker files");
|
||||
return jsc.parallelize(markerFilePaths, parallelism)
|
||||
return jsc.parallelize(markerPaths, parallelism)
|
||||
.map(markerFilePath -> {
|
||||
String typeStr = markerFilePath.substring(markerFilePath.lastIndexOf(".") + 1);
|
||||
IOType type = IOType.valueOf(typeStr);
|
||||
|
||||
@@ -23,6 +23,7 @@ import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
@@ -33,13 +34,12 @@ import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.table.marker.MarkerType;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@@ -23,14 +23,15 @@ import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewManager;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.FileIOUtils;
|
||||
import org.apache.hudi.common.util.MarkerUtils;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
import org.apache.hudi.timeline.service.TimelineService;
|
||||
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
@@ -40,16 +41,15 @@ import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
TimelineService timelineService;
|
||||
@@ -94,26 +94,10 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
|
||||
@Override
|
||||
void verifyMarkersInFileSystem() throws IOException {
|
||||
List<String> allMarkers = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
|
||||
.stream().filter(status -> status.getPath().getName().contains(MarkerDirState.MARKERS_FILENAME_PREFIX))
|
||||
.flatMap(status -> {
|
||||
// Read all markers stored in each marker file maintained by the timeline service
|
||||
FSDataInputStream fsDataInputStream = null;
|
||||
BufferedReader bufferedReader = null;
|
||||
List<String> markers = null;
|
||||
try {
|
||||
fsDataInputStream = fs.open(status.getPath());
|
||||
bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream, StandardCharsets.UTF_8));
|
||||
markers = bufferedReader.lines().collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
closeQuietly(bufferedReader);
|
||||
closeQuietly(fsDataInputStream);
|
||||
}
|
||||
return markers.stream();
|
||||
})
|
||||
.sorted()
|
||||
// Verifies the markers
|
||||
List<String> allMarkers = MarkerUtils.readTimelineServerBasedMarkersFromFileSystem(
|
||||
markerFolderPath.toString(), fs, context, 1)
|
||||
.values().stream().flatMap(Collection::stream).sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(3, allMarkers.size());
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
@@ -121,6 +105,13 @@ public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
"2020/06/02/file2.marker.APPEND",
|
||||
"2020/06/03/file3.marker.CREATE"),
|
||||
allMarkers);
|
||||
// Verifies the marker type file
|
||||
Path markerTypeFilePath = new Path(markerFolderPath, MarkerUtils.MARKER_TYPE_FILENAME);
|
||||
assertTrue(MarkerUtils.doesMarkerTypeFileExist(fs, markerFolderPath.toString()));
|
||||
FSDataInputStream fsDataInputStream = fs.open(markerTypeFilePath);
|
||||
assertEquals(MarkerType.TIMELINE_SERVER_BASED.toString(),
|
||||
FileIOUtils.readAsUTFString(fsDataInputStream));
|
||||
closeQuietly(fsDataInputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.common.model.IOType;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.MarkerUtils;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -100,8 +101,10 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
|
||||
createSomeMarkers();
|
||||
// add invalid file
|
||||
createInvalidFile("2020/06/01", "invalid_file3");
|
||||
int fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).size();
|
||||
assertEquals(fileSize,4);
|
||||
long fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).stream()
|
||||
.filter(fileStatus -> !fileStatus.getPath().getName().contains(MarkerUtils.MARKER_TYPE_FILENAME))
|
||||
.count();
|
||||
assertEquals(fileSize, 4);
|
||||
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
@@ -118,7 +121,9 @@ public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList("2020/06/01/file1.marker.MERGE",
|
||||
"2020/06/02/file2.marker.APPEND", "2020/06/03/file3.marker.CREATE"),
|
||||
writeMarkers.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
|
||||
writeMarkers.allMarkerFilePaths().stream()
|
||||
.filter(path -> !path.contains(MarkerUtils.MARKER_TYPE_FILENAME))
|
||||
.sorted().collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user