1
0

[HUDI-1138] Add timeline-server-based marker file strategy for improving marker-related latency (#3233)

- Can be enabled for cloud stores like S3. Not supported for hdfs yet, due to partial write failures.
This commit is contained in:
Y Ethan Guo
2021-08-11 08:48:13 -07:00
committed by GitHub
parent 29332498af
commit 4783176554
52 changed files with 2144 additions and 353 deletions

View File

@@ -87,9 +87,9 @@ import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
import org.apache.hudi.table.HoodieSparkCopyOnWriteTable;
import org.apache.hudi.table.HoodieSparkTable;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.MarkerFiles;
import org.apache.hudi.table.action.HoodieWriteMetadata;
import org.apache.hudi.table.action.commit.SparkWriteHelper;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.testutils.HoodieClientTestBase;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.testutils.HoodieSparkWriteableTestTable;
@@ -2220,11 +2220,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
path -> path.toString().contains(HoodieTableMetaClient.MARKER_EXTN)))
.limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
Path markerFilePath = new MarkerFiles(fs, basePath, metaClient.getMarkerFolderPath(instantTime), instantTime)
Option<Path> markerFilePath = WriteMarkersFactory.get(
cfg.getMarkersType(), getHoodieTable(metaClient, cfg), instantTime)
.create(partitionPath,
FSUtils.makeDataFileName(instantTime, "1-0-1", UUID.randomUUID().toString()),
IOType.MERGE);
LOG.info("Created a dummy marker path=" + markerFilePath);
LOG.info("Created a dummy marker path=" + markerFilePath.get());
if (!enableOptimisticConsistencyGuard) {
Exception e = assertThrows(HoodieCommitException.class, () -> {
@@ -2235,7 +2236,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
// with optimistic CG, commit should succeed
client.commit(instantTime, result);
}
return Pair.of(markerFilePath, result);
return Pair.of(markerFilePath.get(), result);
}
@ParameterizedTest

View File

@@ -27,7 +27,8 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.HoodieTestTable;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.MarkerFiles;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -213,6 +214,6 @@ public class TestCopyOnWriteRollbackActionExecutor extends HoodieClientRollbackT
String.format("%s:%s/%s", this.fs.getScheme(), basePath, rollbackMetadata.get(DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles().get(0)));
}
assertFalse(new MarkerFiles(table, commitInstant.getTimestamp()).doesMarkerDirExist());
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, commitInstant.getTimestamp()).doesMarkerDirExist());
}
}

View File

@@ -29,7 +29,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.MarkerFiles;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
@@ -138,7 +138,7 @@ public class TestMergeOnReadRollbackActionExecutor extends HoodieClientRollbackT
secondPartitionRollBackLogFiles.removeAll(secondPartitionCommit2LogFiles);
assertEquals(1, secondPartitionRollBackLogFiles.size());
assertFalse(new MarkerFiles(table, "002").doesMarkerDirExist());
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, "002").doesMarkerDirExist());
}
@Test

View File

@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.marker;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.testutils.FileSystemTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
public class TestDirectWriteMarkers extends TestWriteMarkersBase {
@BeforeEach
public void setup() throws IOException {
initPath();
initMetaClient();
this.jsc = new JavaSparkContext(
HoodieClientTestUtils.getSparkConfForTest(TestDirectWriteMarkers.class.getName()));
this.context = new HoodieSparkEngineContext(jsc);
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
this.writeMarkers = new DirectWriteMarkers(
fs, metaClient.getBasePath(), markerFolderPath.toString(), "000");
}
@AfterEach
public void cleanup() {
jsc.stop();
context = null;
}
@Override
void verifyMarkersInFileSystem() throws IOException {
List<FileStatus> markerFiles = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
.stream().filter(status -> status.getPath().getName().contains(".marker"))
.sorted().collect(Collectors.toList());
assertEquals(3, markerFiles.size());
assertIterableEquals(CollectionUtils.createImmutableList(
"file:" + markerFolderPath.toString() + "/2020/06/01/file1.marker.MERGE",
"file:" + markerFolderPath.toString() + "/2020/06/02/file2.marker.APPEND",
"file:" + markerFolderPath.toString() + "/2020/06/03/file3.marker.CREATE"),
markerFiles.stream().map(m -> m.getPath().toString()).collect(Collectors.toList())
);
}
}

View File

@@ -0,0 +1,141 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.marker;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.HoodieCommonConfig;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.testutils.FileSystemTestUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.timeline.service.TimelineService;
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
TimelineService timelineService;
@BeforeEach
public void setup() throws IOException {
initPath();
initMetaClient();
this.jsc = new JavaSparkContext(
HoodieClientTestUtils.getSparkConfForTest(TestTimelineServerBasedWriteMarkers.class.getName()));
this.context = new HoodieSparkEngineContext(jsc);
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
FileSystemViewStorageConfig storageConf =
FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
try {
timelineService = new TimelineService(localEngineContext, new Configuration(),
TimelineService.Config.builder().serverPort(0).enableMarkerRequests(true).build(),
FileSystem.get(new Configuration()),
FileSystemViewManager.createViewManager(
localEngineContext, metadataConfig, storageConf, HoodieCommonConfig.newBuilder().build()));
timelineService.startService();
} catch (Exception ex) {
throw new RuntimeException(ex);
}
this.writeMarkers = new TimelineServerBasedWriteMarkers(
metaClient.getBasePath(), markerFolderPath.toString(), "000", "localhost", timelineService.getServerPort(), 300);
}
@AfterEach
public void cleanup() {
if (timelineService != null) {
timelineService.close();
}
jsc.stop();
context = null;
}
@Override
void verifyMarkersInFileSystem() throws IOException {
List<String> allMarkers = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
.stream().filter(status -> status.getPath().getName().contains(MarkerDirState.MARKERS_FILENAME_PREFIX))
.flatMap(status -> {
// Read all markers stored in each marker file maintained by the timeline service
FSDataInputStream fsDataInputStream = null;
BufferedReader bufferedReader = null;
List<String> markers = null;
try {
fsDataInputStream = fs.open(status.getPath());
bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream, StandardCharsets.UTF_8));
markers = bufferedReader.lines().collect(Collectors.toList());
} catch (IOException e) {
e.printStackTrace();
} finally {
closeQuietly(bufferedReader);
closeQuietly(fsDataInputStream);
}
return markers.stream();
})
.sorted()
.collect(Collectors.toList());
assertEquals(3, allMarkers.size());
assertIterableEquals(CollectionUtils.createImmutableList(
"2020/06/01/file1.marker.MERGE",
"2020/06/02/file2.marker.APPEND",
"2020/06/03/file3.marker.CREATE"),
allMarkers);
}
/**
* Closes {@code Closeable} quietly.
*
* @param closeable {@code Closeable} to close
*/
private void closeQuietly(Closeable closeable) {
if (closeable == null) {
return;
}
try {
closeable.close();
} catch (IOException e) {
// Ignore
}
}
}

View File

@@ -16,7 +16,7 @@
* limitations under the License.
*/
package org.apache.hudi.table;
package org.apache.hudi.table.marker;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.fs.FSUtils;
@@ -25,18 +25,13 @@ import org.apache.hudi.common.testutils.FileSystemTestUtils;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaSparkContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -44,35 +39,18 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestMarkerFiles extends HoodieCommonTestHarness {
public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
private MarkerFiles markerFiles;
private FileSystem fs;
private Path markerFolderPath;
private JavaSparkContext jsc;
private HoodieSparkEngineContext context;
protected WriteMarkers writeMarkers;
protected FileSystem fs;
protected Path markerFolderPath;
protected JavaSparkContext jsc;
protected HoodieSparkEngineContext context;
@BeforeEach
public void setup() throws IOException {
initPath();
initMetaClient();
this.jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(TestMarkerFiles.class.getName()));
this.context = new HoodieSparkEngineContext(jsc);
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
this.markerFiles = new MarkerFiles(fs, metaClient.getBasePath(), markerFolderPath.toString(), "000");
}
@AfterEach
public void cleanup() {
jsc.stop();
context = null;
}
private void createSomeMarkerFiles() {
markerFiles.create("2020/06/01", "file1", IOType.MERGE);
markerFiles.create("2020/06/02", "file2", IOType.APPEND);
markerFiles.create("2020/06/03", "file3", IOType.CREATE);
private void createSomeMarkers() {
writeMarkers.create("2020/06/01", "file1", IOType.MERGE);
writeMarkers.create("2020/06/02", "file2", IOType.APPEND);
writeMarkers.create("2020/06/03", "file3", IOType.CREATE);
}
private void createInvalidFile(String partitionPath, String invalidFileName) {
@@ -85,48 +63,41 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
}
}
abstract void verifyMarkersInFileSystem() throws IOException;
@Test
public void testCreation() throws Exception {
// when
createSomeMarkerFiles();
createSomeMarkers();
// then
assertTrue(fs.exists(markerFolderPath));
List<FileStatus> markerFiles = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
.stream().filter(status -> status.getPath().getName().contains(".marker"))
.sorted().collect(Collectors.toList());
assertEquals(3, markerFiles.size());
assertIterableEquals(CollectionUtils.createImmutableList(
"file:" + markerFolderPath.toString() + "/2020/06/01/file1.marker.MERGE",
"file:" + markerFolderPath.toString() + "/2020/06/02/file2.marker.APPEND",
"file:" + markerFolderPath.toString() + "/2020/06/03/file3.marker.CREATE"),
markerFiles.stream().map(m -> m.getPath().toString()).collect(Collectors.toList())
);
verifyMarkersInFileSystem();
}
@Test
public void testDeletionWhenMarkerDirExists() throws IOException {
//when
markerFiles.create("2020/06/01", "file1", IOType.MERGE);
writeMarkers.create("2020/06/01", "file1", IOType.MERGE);
// then
assertTrue(markerFiles.doesMarkerDirExist());
assertTrue(markerFiles.deleteMarkerDir(context, 2));
assertFalse(markerFiles.doesMarkerDirExist());
assertTrue(writeMarkers.doesMarkerDirExist());
assertTrue(writeMarkers.deleteMarkerDir(context, 2));
assertFalse(writeMarkers.doesMarkerDirExist());
}
@Test
public void testDeletionWhenMarkerDirNotExists() throws IOException {
// then
assertFalse(markerFiles.doesMarkerDirExist());
assertTrue(markerFiles.allMarkerFilePaths().isEmpty());
assertFalse(markerFiles.deleteMarkerDir(context, 2));
assertFalse(writeMarkers.doesMarkerDirExist());
assertTrue(writeMarkers.allMarkerFilePaths().isEmpty());
assertFalse(writeMarkers.deleteMarkerDir(context, 2));
}
@Test
public void testDataPathsWhenCreatingOrMerging() throws IOException {
// add markfiles
createSomeMarkerFiles();
createSomeMarkers();
// add invalid file
createInvalidFile("2020/06/01", "invalid_file3");
int fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).size();
@@ -135,19 +106,19 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
// then
assertIterableEquals(CollectionUtils.createImmutableList(
"2020/06/01/file1", "2020/06/03/file3"),
markerFiles.createdAndMergedDataPaths(context, 2).stream().sorted().collect(Collectors.toList())
writeMarkers.createdAndMergedDataPaths(context, 2).stream().sorted().collect(Collectors.toList())
);
}
@Test
public void testAllMarkerPaths() throws IOException {
// given
createSomeMarkerFiles();
createSomeMarkers();
// then
assertIterableEquals(CollectionUtils.createImmutableList("2020/06/01/file1.marker.MERGE",
"2020/06/02/file2.marker.APPEND", "2020/06/03/file3.marker.CREATE"),
markerFiles.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
writeMarkers.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
);
}
@@ -158,6 +129,6 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
final String markerFilePath = pathPrefix + ".marker.APPEND";
// when-then
assertEquals(pathPrefix, MarkerFiles.stripMarkerSuffix(markerFilePath));
assertEquals(pathPrefix, WriteMarkers.stripMarkerSuffix(markerFilePath));
}
}

View File

@@ -39,7 +39,8 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.MarkerFiles;
import org.apache.hudi.table.marker.WriteMarkers;
import org.apache.hudi.table.marker.WriteMarkersFactory;
import org.apache.hudi.testutils.Assertions;
import org.apache.hudi.testutils.HoodieClientTestBase;
import org.apache.hudi.testutils.HoodieClientTestUtils;
@@ -58,6 +59,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -126,8 +128,9 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
List<String> markerPaths = markerFiles.allMarkerFilePaths();
WriteMarkers writeMarkers =
WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
@@ -182,8 +185,8 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
// delete one of the marker files in 2nd commit if need be.
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
List<String> markerPaths = markerFiles.allMarkerFilePaths();
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
if (deletePartialMarkerFiles) {
String toDeleteMarkerFile = markerPaths.get(0);
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
@@ -212,21 +215,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant) throws IOException {
// Verify recreated marker files are as expected
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
assertFalse(markerFiles.doesMarkerDirExist());
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
assertFalse(writeMarkers.doesMarkerDirExist());
}
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices,
List<FileSlice> secondPartitionCommit2FileSlices) throws IOException {
// Verify recreated marker files are as expected
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
assertTrue(markerFiles.doesMarkerDirExist());
List<String> files = markerFiles.allMarkerFilePaths();
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
assertTrue(writeMarkers.doesMarkerDirExist());
Set<String> files = writeMarkers.allMarkerFilePaths();
assertEquals(2, files.size());
List<String> actualFiles = new ArrayList<>();
for (String file : files) {
String fileName = MarkerFiles.stripMarkerSuffix(file);
String fileName = WriteMarkers.stripMarkerSuffix(file);
actualFiles.add(fileName);
}