[HUDI-1138] Add timeline-server-based marker file strategy for improving marker-related latency (#3233)
- Can be enabled for cloud stores like S3. Not supported for hdfs yet, due to partial write failures.
This commit is contained in:
@@ -51,9 +51,9 @@ import org.apache.hudi.metrics.DistributedRegistry;
|
||||
import org.apache.hudi.table.BulkInsertPartitioner;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.compact.SparkCompactHelpers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.table.upgrade.AbstractUpgradeDowngrade;
|
||||
import org.apache.hudi.table.upgrade.SparkUpgradeDowngrade;
|
||||
|
||||
@@ -375,7 +375,8 @@ public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
|
||||
} catch (IOException e) {
|
||||
throw new HoodieClusteringException("unable to transition clustering inflight to complete: " + clusteringCommitTime, e);
|
||||
}
|
||||
new MarkerFiles(table, clusteringCommitTime).quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
WriteMarkersFactory.get(config.getMarkersType(), table, clusteringCommitTime)
|
||||
.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
if (clusteringTimer != null) {
|
||||
long durationInMs = metrics.getDurationInMs(clusteringTimer.stop());
|
||||
try {
|
||||
|
||||
@@ -31,7 +31,7 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.exception.HoodieInsertException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
@@ -187,8 +187,8 @@ public class HoodieRowCreateHandle implements Serializable {
|
||||
* @param partitionPath Partition path
|
||||
*/
|
||||
private void createMarkerFile(String partitionPath, String dataFileName) {
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, instantTime);
|
||||
markerFiles.create(partitionPath, dataFileName, IOType.CREATE);
|
||||
WriteMarkersFactory.get(writeConfig.getMarkersType(), table, instantTime)
|
||||
.create(partitionPath, dataFileName, IOType.CREATE);
|
||||
}
|
||||
|
||||
private String getWriteToken() {
|
||||
|
||||
@@ -33,13 +33,15 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -56,8 +58,8 @@ public class SparkMarkerBasedRollbackStrategy<T extends HoodieRecordPayload> ext
|
||||
public List<HoodieRollbackStat> execute(HoodieInstant instantToRollback) {
|
||||
JavaSparkContext jsc = HoodieSparkEngineContext.getSparkContext(context);
|
||||
try {
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, instantToRollback.getTimestamp());
|
||||
List<String> markerFilePaths = markerFiles.allMarkerFilePaths();
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, instantToRollback.getTimestamp());
|
||||
List<String> markerFilePaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
int parallelism = Math.max(Math.min(markerFilePaths.size(), config.getRollbackParallelism()), 1);
|
||||
jsc.setJobGroup(this.getClass().getSimpleName(), "Rolling back using marker files");
|
||||
return jsc.parallelize(markerFilePaths, parallelism)
|
||||
@@ -66,11 +68,11 @@ public class SparkMarkerBasedRollbackStrategy<T extends HoodieRecordPayload> ext
|
||||
IOType type = IOType.valueOf(typeStr);
|
||||
switch (type) {
|
||||
case MERGE:
|
||||
return undoMerge(MarkerFiles.stripMarkerSuffix(markerFilePath));
|
||||
return undoMerge(WriteMarkers.stripMarkerSuffix(markerFilePath));
|
||||
case APPEND:
|
||||
return undoAppend(MarkerFiles.stripMarkerSuffix(markerFilePath), instantToRollback);
|
||||
return undoAppend(WriteMarkers.stripMarkerSuffix(markerFilePath), instantToRollback);
|
||||
case CREATE:
|
||||
return undoCreate(MarkerFiles.stripMarkerSuffix(markerFilePath));
|
||||
return undoCreate(WriteMarkers.stripMarkerSuffix(markerFilePath));
|
||||
default:
|
||||
throw new HoodieRollbackException("Unknown marker type, during rollback of " + instantToRollback);
|
||||
}
|
||||
|
||||
@@ -23,7 +23,8 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -40,9 +41,9 @@ public class OneToZeroDowngradeHandler implements DowngradeHandler {
|
||||
HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
|
||||
List<HoodieInstant> commits = inflightTimeline.getReverseOrderedInstants().collect(Collectors.toList());
|
||||
for (HoodieInstant commitInstant : commits) {
|
||||
// delete existing marker files
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
|
||||
markerFiles.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(config.getMarkersType(), table, commitInstant.getTimestamp());
|
||||
writeMarkers.quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,13 +31,15 @@ import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieRollbackException;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackHelper;
|
||||
import org.apache.hudi.table.action.rollback.ListingBasedRollbackRequest;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.table.marker.MarkerType;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -59,35 +61,37 @@ public class ZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
commits.remove(instantTime);
|
||||
}
|
||||
for (String commit : commits) {
|
||||
// for every pending commit, delete old marker files and re-create marker files in new format
|
||||
recreateMarkerFiles(commit, table, context, config.getMarkersDeleteParallelism());
|
||||
// for every pending commit, delete old markers and re-create markers in new format
|
||||
recreateMarkers(commit, table, context, config.getMarkersType(), config.getMarkersDeleteParallelism());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recreate marker files in new format.
|
||||
* Step1: Delete existing marker files
|
||||
* Recreate markers in new format.
|
||||
* Step1: Delete existing markers
|
||||
* Step2: Collect all rollback file info.
|
||||
* Step3: recreate marker files for all interested files.
|
||||
* Step3: recreate markers for all interested files.
|
||||
*
|
||||
* @param commitInstantTime instant of interest for which marker files need to be recreated.
|
||||
* @param commitInstantTime instant of interest for which markers need to be recreated.
|
||||
* @param table instance of {@link HoodieSparkTable} to use
|
||||
* @param context instance of {@link HoodieEngineContext} to use
|
||||
* @param markerType marker type to use
|
||||
* @throws HoodieRollbackException on any exception during upgrade.
|
||||
*/
|
||||
private static void recreateMarkerFiles(final String commitInstantTime,
|
||||
HoodieSparkTable table,
|
||||
HoodieEngineContext context,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
private static void recreateMarkers(final String commitInstantTime,
|
||||
HoodieSparkTable table,
|
||||
HoodieEngineContext context,
|
||||
MarkerType markerType,
|
||||
int parallelism) throws HoodieRollbackException {
|
||||
try {
|
||||
// fetch hoodie instant
|
||||
Option<HoodieInstant> commitInstantOpt = Option.fromJavaOptional(table.getActiveTimeline().getCommitsTimeline().getInstants()
|
||||
.filter(instant -> HoodieActiveTimeline.EQUALS.test(instant.getTimestamp(), commitInstantTime))
|
||||
.findFirst());
|
||||
if (commitInstantOpt.isPresent()) {
|
||||
// delete existing marker files
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstantTime);
|
||||
markerFiles.quietDeleteMarkerDir(context, parallelism);
|
||||
// delete existing markers
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(markerType, table, commitInstantTime);
|
||||
writeMarkers.quietDeleteMarkerDir(context, parallelism);
|
||||
|
||||
// generate rollback stats
|
||||
List<ListingBasedRollbackRequest> rollbackRequests;
|
||||
@@ -99,15 +103,15 @@ public class ZeroToOneUpgradeHandler implements UpgradeHandler {
|
||||
List<HoodieRollbackStat> rollbackStats = new ListingBasedRollbackHelper(table.getMetaClient(), table.getConfig())
|
||||
.collectRollbackStats(context, commitInstantOpt.get(), rollbackRequests);
|
||||
|
||||
// recreate marker files adhering to marker based rollback
|
||||
// recreate markers adhering to marker based rollback
|
||||
for (HoodieRollbackStat rollbackStat : rollbackStats) {
|
||||
for (String path : rollbackStat.getSuccessDeleteFiles()) {
|
||||
String dataFileName = path.substring(path.lastIndexOf("/") + 1);
|
||||
// not feasible to differentiate MERGE from CREATE. hence creating with MERGE IOType for all base files.
|
||||
markerFiles.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), dataFileName, IOType.MERGE);
|
||||
}
|
||||
for (FileStatus fileStatus : rollbackStat.getCommandBlocksCount().keySet()) {
|
||||
markerFiles.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
writeMarkers.create(rollbackStat.getPartitionPath(), getFileNameForMarkerFromLogFile(fileStatus.getPath().toString(), table), IOType.APPEND);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,9 +87,9 @@ import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
|
||||
import org.apache.hudi.table.HoodieSparkCopyOnWriteTable;
|
||||
import org.apache.hudi.table.HoodieSparkTable;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.action.HoodieWriteMetadata;
|
||||
import org.apache.hudi.table.action.commit.SparkWriteHelper;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
import org.apache.hudi.testutils.HoodieSparkWriteableTestTable;
|
||||
@@ -2220,11 +2220,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
||||
path -> path.toString().contains(HoodieTableMetaClient.MARKER_EXTN)))
|
||||
.limit(1).map(status -> status.getPath().getParent().toString()).collect(Collectors.toList()).get(0);
|
||||
|
||||
Path markerFilePath = new MarkerFiles(fs, basePath, metaClient.getMarkerFolderPath(instantTime), instantTime)
|
||||
Option<Path> markerFilePath = WriteMarkersFactory.get(
|
||||
cfg.getMarkersType(), getHoodieTable(metaClient, cfg), instantTime)
|
||||
.create(partitionPath,
|
||||
FSUtils.makeDataFileName(instantTime, "1-0-1", UUID.randomUUID().toString()),
|
||||
IOType.MERGE);
|
||||
LOG.info("Created a dummy marker path=" + markerFilePath);
|
||||
LOG.info("Created a dummy marker path=" + markerFilePath.get());
|
||||
|
||||
if (!enableOptimisticConsistencyGuard) {
|
||||
Exception e = assertThrows(HoodieCommitException.class, () -> {
|
||||
@@ -2235,7 +2236,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
||||
// with optimistic CG, commit should succeed
|
||||
client.commit(instantTime, result);
|
||||
}
|
||||
return Pair.of(markerFilePath, result);
|
||||
return Pair.of(markerFilePath.get(), result);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
|
||||
@@ -27,7 +27,8 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.testutils.HoodieTestTable;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@@ -213,6 +214,6 @@ public class TestCopyOnWriteRollbackActionExecutor extends HoodieClientRollbackT
|
||||
String.format("%s:%s/%s", this.fs.getScheme(), basePath, rollbackMetadata.get(DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles().get(0)));
|
||||
}
|
||||
|
||||
assertFalse(new MarkerFiles(table, commitInstant.getTimestamp()).doesMarkerDirExist());
|
||||
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, commitInstant.getTimestamp()).doesMarkerDirExist());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
@@ -138,7 +138,7 @@ public class TestMergeOnReadRollbackActionExecutor extends HoodieClientRollbackT
|
||||
secondPartitionRollBackLogFiles.removeAll(secondPartitionCommit2LogFiles);
|
||||
assertEquals(1, secondPartitionRollBackLogFiles.size());
|
||||
|
||||
assertFalse(new MarkerFiles(table, "002").doesMarkerDirExist());
|
||||
assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, "002").doesMarkerDirExist());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||
|
||||
public class TestDirectWriteMarkers extends TestWriteMarkersBase {
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws IOException {
|
||||
initPath();
|
||||
initMetaClient();
|
||||
this.jsc = new JavaSparkContext(
|
||||
HoodieClientTestUtils.getSparkConfForTest(TestDirectWriteMarkers.class.getName()));
|
||||
this.context = new HoodieSparkEngineContext(jsc);
|
||||
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
|
||||
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
|
||||
this.writeMarkers = new DirectWriteMarkers(
|
||||
fs, metaClient.getBasePath(), markerFolderPath.toString(), "000");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void cleanup() {
|
||||
jsc.stop();
|
||||
context = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
void verifyMarkersInFileSystem() throws IOException {
|
||||
List<FileStatus> markerFiles = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
|
||||
.stream().filter(status -> status.getPath().getName().contains(".marker"))
|
||||
.sorted().collect(Collectors.toList());
|
||||
assertEquals(3, markerFiles.size());
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/01/file1.marker.MERGE",
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/02/file2.marker.APPEND",
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/03/file3.marker.CREATE"),
|
||||
markerFiles.stream().map(m -> m.getPath().toString()).collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.common.config.HoodieCommonConfig;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewManager;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
|
||||
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
|
||||
import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
import org.apache.hudi.timeline.service.TimelineService;
|
||||
import org.apache.hudi.timeline.service.handlers.marker.MarkerDirState;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||
|
||||
public class TestTimelineServerBasedWriteMarkers extends TestWriteMarkersBase {
|
||||
TimelineService timelineService;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws IOException {
|
||||
initPath();
|
||||
initMetaClient();
|
||||
this.jsc = new JavaSparkContext(
|
||||
HoodieClientTestUtils.getSparkConfForTest(TestTimelineServerBasedWriteMarkers.class.getName()));
|
||||
this.context = new HoodieSparkEngineContext(jsc);
|
||||
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
|
||||
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
|
||||
|
||||
FileSystemViewStorageConfig storageConf =
|
||||
FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.SPILLABLE_DISK).build();
|
||||
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
|
||||
HoodieLocalEngineContext localEngineContext = new HoodieLocalEngineContext(metaClient.getHadoopConf());
|
||||
|
||||
try {
|
||||
timelineService = new TimelineService(localEngineContext, new Configuration(),
|
||||
TimelineService.Config.builder().serverPort(0).enableMarkerRequests(true).build(),
|
||||
FileSystem.get(new Configuration()),
|
||||
FileSystemViewManager.createViewManager(
|
||||
localEngineContext, metadataConfig, storageConf, HoodieCommonConfig.newBuilder().build()));
|
||||
timelineService.startService();
|
||||
} catch (Exception ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
this.writeMarkers = new TimelineServerBasedWriteMarkers(
|
||||
metaClient.getBasePath(), markerFolderPath.toString(), "000", "localhost", timelineService.getServerPort(), 300);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void cleanup() {
|
||||
if (timelineService != null) {
|
||||
timelineService.close();
|
||||
}
|
||||
jsc.stop();
|
||||
context = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
void verifyMarkersInFileSystem() throws IOException {
|
||||
List<String> allMarkers = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
|
||||
.stream().filter(status -> status.getPath().getName().contains(MarkerDirState.MARKERS_FILENAME_PREFIX))
|
||||
.flatMap(status -> {
|
||||
// Read all markers stored in each marker file maintained by the timeline service
|
||||
FSDataInputStream fsDataInputStream = null;
|
||||
BufferedReader bufferedReader = null;
|
||||
List<String> markers = null;
|
||||
try {
|
||||
fsDataInputStream = fs.open(status.getPath());
|
||||
bufferedReader = new BufferedReader(new InputStreamReader(fsDataInputStream, StandardCharsets.UTF_8));
|
||||
markers = bufferedReader.lines().collect(Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
closeQuietly(bufferedReader);
|
||||
closeQuietly(fsDataInputStream);
|
||||
}
|
||||
return markers.stream();
|
||||
})
|
||||
.sorted()
|
||||
.collect(Collectors.toList());
|
||||
assertEquals(3, allMarkers.size());
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
"2020/06/01/file1.marker.MERGE",
|
||||
"2020/06/02/file2.marker.APPEND",
|
||||
"2020/06/03/file3.marker.CREATE"),
|
||||
allMarkers);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes {@code Closeable} quietly.
|
||||
*
|
||||
* @param closeable {@code Closeable} to close
|
||||
*/
|
||||
private void closeQuietly(Closeable closeable) {
|
||||
if (closeable == null) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
closeable.close();
|
||||
} catch (IOException e) {
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -16,7 +16,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table;
|
||||
package org.apache.hudi.table.marker;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
@@ -25,18 +25,13 @@ import org.apache.hudi.common.testutils.FileSystemTestUtils;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
@@ -44,35 +39,18 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestMarkerFiles extends HoodieCommonTestHarness {
|
||||
public abstract class TestWriteMarkersBase extends HoodieCommonTestHarness {
|
||||
|
||||
private MarkerFiles markerFiles;
|
||||
private FileSystem fs;
|
||||
private Path markerFolderPath;
|
||||
private JavaSparkContext jsc;
|
||||
private HoodieSparkEngineContext context;
|
||||
protected WriteMarkers writeMarkers;
|
||||
protected FileSystem fs;
|
||||
protected Path markerFolderPath;
|
||||
protected JavaSparkContext jsc;
|
||||
protected HoodieSparkEngineContext context;
|
||||
|
||||
@BeforeEach
|
||||
public void setup() throws IOException {
|
||||
initPath();
|
||||
initMetaClient();
|
||||
this.jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest(TestMarkerFiles.class.getName()));
|
||||
this.context = new HoodieSparkEngineContext(jsc);
|
||||
this.fs = FSUtils.getFs(metaClient.getBasePath(), metaClient.getHadoopConf());
|
||||
this.markerFolderPath = new Path(metaClient.getMarkerFolderPath("000"));
|
||||
this.markerFiles = new MarkerFiles(fs, metaClient.getBasePath(), markerFolderPath.toString(), "000");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void cleanup() {
|
||||
jsc.stop();
|
||||
context = null;
|
||||
}
|
||||
|
||||
private void createSomeMarkerFiles() {
|
||||
markerFiles.create("2020/06/01", "file1", IOType.MERGE);
|
||||
markerFiles.create("2020/06/02", "file2", IOType.APPEND);
|
||||
markerFiles.create("2020/06/03", "file3", IOType.CREATE);
|
||||
private void createSomeMarkers() {
|
||||
writeMarkers.create("2020/06/01", "file1", IOType.MERGE);
|
||||
writeMarkers.create("2020/06/02", "file2", IOType.APPEND);
|
||||
writeMarkers.create("2020/06/03", "file3", IOType.CREATE);
|
||||
}
|
||||
|
||||
private void createInvalidFile(String partitionPath, String invalidFileName) {
|
||||
@@ -85,48 +63,41 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
abstract void verifyMarkersInFileSystem() throws IOException;
|
||||
|
||||
@Test
|
||||
public void testCreation() throws Exception {
|
||||
// when
|
||||
createSomeMarkerFiles();
|
||||
createSomeMarkers();
|
||||
|
||||
// then
|
||||
assertTrue(fs.exists(markerFolderPath));
|
||||
List<FileStatus> markerFiles = FileSystemTestUtils.listRecursive(fs, markerFolderPath)
|
||||
.stream().filter(status -> status.getPath().getName().contains(".marker"))
|
||||
.sorted().collect(Collectors.toList());
|
||||
assertEquals(3, markerFiles.size());
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/01/file1.marker.MERGE",
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/02/file2.marker.APPEND",
|
||||
"file:" + markerFolderPath.toString() + "/2020/06/03/file3.marker.CREATE"),
|
||||
markerFiles.stream().map(m -> m.getPath().toString()).collect(Collectors.toList())
|
||||
);
|
||||
verifyMarkersInFileSystem();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletionWhenMarkerDirExists() throws IOException {
|
||||
//when
|
||||
markerFiles.create("2020/06/01", "file1", IOType.MERGE);
|
||||
writeMarkers.create("2020/06/01", "file1", IOType.MERGE);
|
||||
|
||||
// then
|
||||
assertTrue(markerFiles.doesMarkerDirExist());
|
||||
assertTrue(markerFiles.deleteMarkerDir(context, 2));
|
||||
assertFalse(markerFiles.doesMarkerDirExist());
|
||||
assertTrue(writeMarkers.doesMarkerDirExist());
|
||||
assertTrue(writeMarkers.deleteMarkerDir(context, 2));
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletionWhenMarkerDirNotExists() throws IOException {
|
||||
// then
|
||||
assertFalse(markerFiles.doesMarkerDirExist());
|
||||
assertTrue(markerFiles.allMarkerFilePaths().isEmpty());
|
||||
assertFalse(markerFiles.deleteMarkerDir(context, 2));
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
assertTrue(writeMarkers.allMarkerFilePaths().isEmpty());
|
||||
assertFalse(writeMarkers.deleteMarkerDir(context, 2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDataPathsWhenCreatingOrMerging() throws IOException {
|
||||
// add markfiles
|
||||
createSomeMarkerFiles();
|
||||
createSomeMarkers();
|
||||
// add invalid file
|
||||
createInvalidFile("2020/06/01", "invalid_file3");
|
||||
int fileSize = FileSystemTestUtils.listRecursive(fs, markerFolderPath).size();
|
||||
@@ -135,19 +106,19 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList(
|
||||
"2020/06/01/file1", "2020/06/03/file3"),
|
||||
markerFiles.createdAndMergedDataPaths(context, 2).stream().sorted().collect(Collectors.toList())
|
||||
writeMarkers.createdAndMergedDataPaths(context, 2).stream().sorted().collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllMarkerPaths() throws IOException {
|
||||
// given
|
||||
createSomeMarkerFiles();
|
||||
createSomeMarkers();
|
||||
|
||||
// then
|
||||
assertIterableEquals(CollectionUtils.createImmutableList("2020/06/01/file1.marker.MERGE",
|
||||
"2020/06/02/file2.marker.APPEND", "2020/06/03/file3.marker.CREATE"),
|
||||
markerFiles.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
|
||||
writeMarkers.allMarkerFilePaths().stream().sorted().collect(Collectors.toList())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -158,6 +129,6 @@ public class TestMarkerFiles extends HoodieCommonTestHarness {
|
||||
final String markerFilePath = pathPrefix + ".marker.APPEND";
|
||||
|
||||
// when-then
|
||||
assertEquals(pathPrefix, MarkerFiles.stripMarkerSuffix(markerFilePath));
|
||||
assertEquals(pathPrefix, WriteMarkers.stripMarkerSuffix(markerFilePath));
|
||||
}
|
||||
}
|
||||
@@ -39,7 +39,8 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.MarkerFiles;
|
||||
import org.apache.hudi.table.marker.WriteMarkers;
|
||||
import org.apache.hudi.table.marker.WriteMarkersFactory;
|
||||
import org.apache.hudi.testutils.Assertions;
|
||||
import org.apache.hudi.testutils.HoodieClientTestBase;
|
||||
import org.apache.hudi.testutils.HoodieClientTestUtils;
|
||||
@@ -58,6 +59,7 @@ import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
@@ -126,8 +128,9 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
|
||||
|
||||
// delete one of the marker files in 2nd commit if need be.
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = markerFiles.allMarkerFilePaths();
|
||||
WriteMarkers writeMarkers =
|
||||
WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
if (deletePartialMarkerFiles) {
|
||||
String toDeleteMarkerFile = markerPaths.get(0);
|
||||
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
|
||||
@@ -182,8 +185,8 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
HoodieInstant commitInstant = table.getPendingCommitTimeline().lastInstant().get();
|
||||
|
||||
// delete one of the marker files in 2nd commit if need be.
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = markerFiles.allMarkerFilePaths();
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
List<String> markerPaths = new ArrayList<>(writeMarkers.allMarkerFilePaths());
|
||||
if (deletePartialMarkerFiles) {
|
||||
String toDeleteMarkerFile = markerPaths.get(0);
|
||||
table.getMetaClient().getFs().delete(new Path(table.getMetaClient().getTempFolderPath() + "/" + commitInstant.getTimestamp() + "/" + toDeleteMarkerFile));
|
||||
@@ -212,21 +215,21 @@ public class TestUpgradeDowngrade extends HoodieClientTestBase {
|
||||
|
||||
private void assertMarkerFilesForDowngrade(HoodieTable table, HoodieInstant commitInstant) throws IOException {
|
||||
// Verify recreated marker files are as expected
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
|
||||
assertFalse(markerFiles.doesMarkerDirExist());
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
assertFalse(writeMarkers.doesMarkerDirExist());
|
||||
}
|
||||
|
||||
private void assertMarkerFilesForUpgrade(HoodieTable table, HoodieInstant commitInstant, List<FileSlice> firstPartitionCommit2FileSlices,
|
||||
List<FileSlice> secondPartitionCommit2FileSlices) throws IOException {
|
||||
// Verify recreated marker files are as expected
|
||||
MarkerFiles markerFiles = new MarkerFiles(table, commitInstant.getTimestamp());
|
||||
assertTrue(markerFiles.doesMarkerDirExist());
|
||||
List<String> files = markerFiles.allMarkerFilePaths();
|
||||
WriteMarkers writeMarkers = WriteMarkersFactory.get(getConfig().getMarkersType(), table, commitInstant.getTimestamp());
|
||||
assertTrue(writeMarkers.doesMarkerDirExist());
|
||||
Set<String> files = writeMarkers.allMarkerFilePaths();
|
||||
|
||||
assertEquals(2, files.size());
|
||||
List<String> actualFiles = new ArrayList<>();
|
||||
for (String file : files) {
|
||||
String fileName = MarkerFiles.stripMarkerSuffix(file);
|
||||
String fileName = WriteMarkers.stripMarkerSuffix(file);
|
||||
actualFiles.add(fileName);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user