[HUDI-3884] Support archival beyond savepoint commits (#5837)
Co-authored-by: sivabalan <n.siva.b@gmail.com>
This commit is contained in:
@@ -64,6 +64,7 @@ import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -76,12 +77,14 @@ import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.GREATER_THAN;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.LESSER_THAN_OR_EQUALS;
|
||||
import static org.apache.hudi.common.table.timeline.HoodieTimeline.compareTimestamps;
|
||||
|
||||
/**
|
||||
* Archiver to bound the growth of files under .hoodie meta path.
|
||||
@@ -409,9 +412,11 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
||||
.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION))
|
||||
.filterInflights().firstInstant();
|
||||
|
||||
// We cannot have any holes in the commit timeline. We cannot archive any commits which are
|
||||
// made after the first savepoint present.
|
||||
// NOTE: We cannot have any holes in the commit timeline.
|
||||
// We cannot archive any commits which are made after the first savepoint present,
|
||||
// unless HoodieArchivalConfig#ARCHIVE_BEYOND_SAVEPOINT is enabled.
|
||||
Option<HoodieInstant> firstSavepoint = table.getCompletedSavepointTimeline().firstInstant();
|
||||
Set<String> savepointTimestamps = table.getSavepointTimestamps();
|
||||
if (!commitTimeline.empty() && commitTimeline.countInstants() > maxInstantsToKeep) {
|
||||
// For Merge-On-Read table, inline or async compaction is enabled
|
||||
// We need to make sure that there are enough delta commits in the active timeline
|
||||
@@ -428,28 +433,33 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
||||
// Actually do the commits
|
||||
Stream<HoodieInstant> instantToArchiveStream = commitTimeline.getInstants()
|
||||
.filter(s -> {
|
||||
// if no savepoint present, then don't filter
|
||||
return !(firstSavepoint.isPresent() && HoodieTimeline.compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
|
||||
if (config.shouldArchiveBeyondSavepoint()) {
|
||||
// skip savepoint commits and proceed further
|
||||
return !savepointTimestamps.contains(s.getTimestamp());
|
||||
} else {
|
||||
// if no savepoint present, then don't filter
|
||||
// stop at first savepoint commit
|
||||
return !(firstSavepoint.isPresent() && compareTimestamps(firstSavepoint.get().getTimestamp(), LESSER_THAN_OR_EQUALS, s.getTimestamp()));
|
||||
}
|
||||
}).filter(s -> {
|
||||
// Ensure commits >= oldest pending compaction commit is retained
|
||||
return oldestPendingCompactionAndReplaceInstant
|
||||
.map(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
|
||||
.map(instant -> compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
|
||||
.orElse(true);
|
||||
}).filter(s -> {
|
||||
// We need this to ensure that when multiple writers are performing conflict resolution, eligible instants don't
|
||||
// get archived, i.e, instants after the oldestInflight are retained on the timeline
|
||||
if (config.getFailedWritesCleanPolicy() == HoodieFailedWritesCleaningPolicy.LAZY) {
|
||||
return oldestInflightCommitInstant.map(instant ->
|
||||
HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
|
||||
compareTimestamps(instant.getTimestamp(), GREATER_THAN, s.getTimestamp()))
|
||||
.orElse(true);
|
||||
}
|
||||
return true;
|
||||
}).filter(s ->
|
||||
oldestInstantToRetainForCompaction.map(instantToRetain ->
|
||||
HoodieTimeline.compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp()))
|
||||
compareTimestamps(s.getTimestamp(), LESSER_THAN, instantToRetain.getTimestamp()))
|
||||
.orElse(true)
|
||||
);
|
||||
|
||||
return instantToArchiveStream.limit(commitTimeline.countInstants() - minInstantsToKeep);
|
||||
} else {
|
||||
return Stream.empty();
|
||||
@@ -479,7 +489,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
||||
instants = Stream.empty();
|
||||
} else {
|
||||
LOG.info("Limiting archiving of instants to latest compaction on metadata table at " + latestCompactionTime.get());
|
||||
instants = instants.filter(instant -> HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN,
|
||||
instants = instants.filter(instant -> compareTimestamps(instant.getTimestamp(), LESSER_THAN,
|
||||
latestCompactionTime.get()));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
@@ -487,18 +497,29 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a metadata table, do not archive the commits that live in data set
|
||||
// active timeline. This is required by metadata table,
|
||||
// see HoodieTableMetadataUtil#processRollbackMetadata for details.
|
||||
if (HoodieTableMetadata.isMetadataTable(config.getBasePath())) {
|
||||
HoodieTableMetaClient dataMetaClient = HoodieTableMetaClient.builder()
|
||||
.setBasePath(HoodieTableMetadata.getDatasetBasePath(config.getBasePath()))
|
||||
.setConf(metaClient.getHadoopConf())
|
||||
.build();
|
||||
Option<String> earliestActiveDatasetCommit = dataMetaClient.getActiveTimeline().firstInstant().map(HoodieInstant::getTimestamp);
|
||||
if (earliestActiveDatasetCommit.isPresent()) {
|
||||
instants = instants.filter(instant ->
|
||||
HoodieTimeline.compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, earliestActiveDatasetCommit.get()));
|
||||
Option<HoodieInstant> earliestActiveDatasetCommit = dataMetaClient.getActiveTimeline().firstInstant();
|
||||
|
||||
if (config.shouldArchiveBeyondSavepoint()) {
|
||||
// There are chances that there could be holes in the timeline due to archival and savepoint interplay.
|
||||
// So, the first non-savepoint commit in the data timeline is considered as beginning of the active timeline.
|
||||
Option<HoodieInstant> firstNonSavepointCommit = dataMetaClient.getActiveTimeline().getFirstNonSavepointCommit();
|
||||
if (firstNonSavepointCommit.isPresent()) {
|
||||
String firstNonSavepointCommitTime = firstNonSavepointCommit.get().getTimestamp();
|
||||
instants = instants.filter(instant ->
|
||||
compareTimestamps(instant.getTimestamp(), LESSER_THAN, firstNonSavepointCommitTime));
|
||||
}
|
||||
} else {
|
||||
// Do not archive the commits that live in data set active timeline.
|
||||
// This is required by metadata table, see HoodieTableMetadataUtil#processRollbackMetadata for details.
|
||||
if (earliestActiveDatasetCommit.isPresent()) {
|
||||
instants = instants.filter(instant ->
|
||||
compareTimestamps(instant.getTimestamp(), HoodieTimeline.LESSER_THAN, earliestActiveDatasetCommit.get().getTimestamp()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -589,7 +610,7 @@ public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
|
||||
}
|
||||
|
||||
List<HoodieInstant> instantsToBeDeleted =
|
||||
instants.stream().filter(instant1 -> HoodieTimeline.compareTimestamps(instant1.getTimestamp(),
|
||||
instants.stream().filter(instant1 -> compareTimestamps(instant1.getTimestamp(),
|
||||
LESSER_THAN_OR_EQUALS, thresholdInstant.getTimestamp())).collect(Collectors.toList());
|
||||
|
||||
for (HoodieInstant deleteInstant : instantsToBeDeleted) {
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
|
||||
import javax.annotation.concurrent.Immutable;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
@@ -34,8 +35,8 @@ import java.util.Properties;
|
||||
*/
|
||||
@Immutable
|
||||
@ConfigClassProperty(name = "Archival Configs",
|
||||
groupName = ConfigGroups.Names.WRITE_CLIENT,
|
||||
description = "Configurations that control archival.")
|
||||
groupName = ConfigGroups.Names.WRITE_CLIENT,
|
||||
description = "Configurations that control archival.")
|
||||
public class HoodieArchivalConfig extends HoodieConfig {
|
||||
|
||||
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
|
||||
@@ -92,6 +93,13 @@ public class HoodieArchivalConfig extends HoodieConfig {
|
||||
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
|
||||
+ " useful when storage scheme doesn't support append operation.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ARCHIVE_BEYOND_SAVEPOINT = ConfigProperty
|
||||
.key("hoodie.archive.beyond.savepoint")
|
||||
.defaultValue(false)
|
||||
.sinceVersion("0.12.0")
|
||||
.withDocumentation("If enabled, archival will proceed beyond savepoint, skipping savepoint commits. "
|
||||
+ "If disabled, archival will stop at the earliest savepoint commit.");
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@@ -107,7 +115,9 @@ public class HoodieArchivalConfig extends HoodieConfig {
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
|
||||
/** @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */
|
||||
/**
|
||||
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
|
||||
/**
|
||||
@@ -186,6 +196,11 @@ public class HoodieArchivalConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveBeyondSavepoint(boolean archiveBeyondSavepoint) {
|
||||
archivalConfig.setValue(ARCHIVE_BEYOND_SAVEPOINT, String.valueOf(archiveBeyondSavepoint));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig build() {
|
||||
archivalConfig.setDefaults(HoodieArchivalConfig.class.getName());
|
||||
return archivalConfig;
|
||||
|
||||
@@ -1209,7 +1209,11 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public boolean getArchiveMergeEnable() {
|
||||
return getBoolean(HoodieArchivalConfig.ARCHIVE_MERGE_ENABLE);
|
||||
return getBooleanOrDefault(HoodieArchivalConfig.ARCHIVE_MERGE_ENABLE);
|
||||
}
|
||||
|
||||
public boolean shouldArchiveBeyondSavepoint() {
|
||||
return getBooleanOrDefault(HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT);
|
||||
}
|
||||
|
||||
public long getArchiveMergeSmallFileLimitBytes() {
|
||||
|
||||
@@ -368,10 +368,10 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of savepoints in this table.
|
||||
* Get the list of savepoint timestamps in this table.
|
||||
*/
|
||||
public List<String> getSavepoints() {
|
||||
return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
|
||||
public Set<String> getSavepointTimestamps() {
|
||||
return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
public HoodieActiveTimeline getActiveTimeline() {
|
||||
|
||||
@@ -104,7 +104,7 @@ public class CleanPlanner<T extends HoodieRecordPayload, I, K, O> implements Ser
|
||||
* Get the list of data file names savepointed.
|
||||
*/
|
||||
public Stream<String> getSavepointedDataFiles(String savepointTime) {
|
||||
if (!hoodieTable.getSavepoints().contains(savepointTime)) {
|
||||
if (!hoodieTable.getSavepointTimestamps().contains(savepointTime)) {
|
||||
throw new HoodieSavepointException(
|
||||
"Could not get data files for savepoint " + savepointTime + ". No such savepoint.");
|
||||
}
|
||||
@@ -227,7 +227,7 @@ public class CleanPlanner<T extends HoodieRecordPayload, I, K, O> implements Ser
|
||||
+ " file versions. ");
|
||||
List<CleanFileInfo> deletePaths = new ArrayList<>();
|
||||
// Collect all the datafiles savepointed by all the savepoints
|
||||
List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
|
||||
List<String> savepointedFiles = hoodieTable.getSavepointTimestamps().stream()
|
||||
.flatMap(this::getSavepointedDataFiles)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@@ -295,7 +295,7 @@ public class CleanPlanner<T extends HoodieRecordPayload, I, K, O> implements Ser
|
||||
List<CleanFileInfo> deletePaths = new ArrayList<>();
|
||||
|
||||
// Collect all the datafiles savepointed by all the savepoints
|
||||
List<String> savepointedFiles = hoodieTable.getSavepoints().stream()
|
||||
List<String> savepointedFiles = hoodieTable.getSavepointTimestamps().stream()
|
||||
.flatMap(this::getSavepointedDataFiles)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
package org.apache.hudi.io;
|
||||
|
||||
import org.apache.hudi.avro.model.HoodieRollbackMetadata;
|
||||
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
|
||||
import org.apache.hudi.client.HoodieTimelineArchiver;
|
||||
import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
|
||||
import org.apache.hudi.client.utils.MetadataConversionUtils;
|
||||
@@ -44,9 +45,9 @@ import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.util.FileIOUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieArchivalConfig;
|
||||
import org.apache.hudi.config.HoodieCleanConfig;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieLockConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
@@ -65,7 +66,9 @@ import org.apache.log4j.Logger;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.Arguments;
|
||||
import org.junit.jupiter.params.provider.CsvSource;
|
||||
import org.junit.jupiter.params.provider.MethodSource;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -88,6 +91,7 @@ import java.util.stream.IntStream;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable;
|
||||
import static org.apache.hudi.config.HoodieArchivalConfig.ARCHIVE_BEYOND_SAVEPOINT;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
@@ -180,6 +184,33 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
long size,
|
||||
HoodieFailedWritesCleaningPolicy failedWritesCleaningPolicy,
|
||||
WriteConcurrencyMode writeConcurrencyMode) throws Exception {
|
||||
return initTestTableAndGetWriteConfig(
|
||||
enableMetadata,
|
||||
minArchivalCommits,
|
||||
maxArchivalCommits,
|
||||
maxDeltaCommits,
|
||||
maxDeltaCommitsMetadataTable,
|
||||
tableType,
|
||||
enableArchiveMerge,
|
||||
archiveFilesBatch,
|
||||
size,
|
||||
failedWritesCleaningPolicy,
|
||||
writeConcurrencyMode,
|
||||
ARCHIVE_BEYOND_SAVEPOINT.defaultValue());
|
||||
}
|
||||
|
||||
private HoodieWriteConfig initTestTableAndGetWriteConfig(boolean enableMetadata,
|
||||
int minArchivalCommits,
|
||||
int maxArchivalCommits,
|
||||
int maxDeltaCommits,
|
||||
int maxDeltaCommitsMetadataTable,
|
||||
HoodieTableType tableType,
|
||||
boolean enableArchiveMerge,
|
||||
int archiveFilesBatch,
|
||||
long size,
|
||||
HoodieFailedWritesCleaningPolicy failedWritesCleaningPolicy,
|
||||
WriteConcurrencyMode writeConcurrencyMode,
|
||||
boolean archiveProceedBeyondSavepoints) throws Exception {
|
||||
init(tableType);
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
|
||||
@@ -188,7 +219,8 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
.withArchiveMergeEnable(enableArchiveMerge)
|
||||
.withArchiveMergeFilesBatchSize(archiveFilesBatch)
|
||||
.withArchiveMergeSmallFileLimit(size)
|
||||
.archiveCommitsWith(minArchivalCommits, maxArchivalCommits).build())
|
||||
.archiveCommitsWith(minArchivalCommits, maxArchivalCommits)
|
||||
.withArchiveBeyondSavepoint(archiveProceedBeyondSavepoints).build())
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withMaxNumDeltaCommitsBeforeCompaction(maxDeltaCommits).build())
|
||||
.withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
|
||||
@@ -249,6 +281,59 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(booleans = {true, false})
|
||||
public void testSavepointWithArchival(boolean archiveBeyondSavepoint) throws Exception {
|
||||
boolean enableMetadata = false;
|
||||
HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 5, 2, HoodieTableType.COPY_ON_WRITE,
|
||||
false, 10, 209715200, HoodieFailedWritesCleaningPolicy.EAGER, WriteConcurrencyMode.SINGLE_WRITER, archiveBeyondSavepoint);
|
||||
|
||||
// min archival commits is 2 and max archival commits is 4. and so, after 5th commit, 3 commits will be archived.
|
||||
for (int i = 1; i < 5; i++) {
|
||||
testTable.doWriteOperation(String.format("%08d", i), WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
|
||||
}
|
||||
|
||||
// savepoint 3rd commit
|
||||
String commitToSavepoint = String.format("%08d", 3);
|
||||
HoodieSavepointMetadata savepointMetadata = testTable.doSavepoint(commitToSavepoint);
|
||||
testTable.addSavepoint(commitToSavepoint, savepointMetadata);
|
||||
|
||||
for (int i = 5; i < 7; i++) {
|
||||
testTable.doWriteOperation(String.format("%08d", i), WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
|
||||
}
|
||||
// trigger archival
|
||||
Pair<List<HoodieInstant>, List<HoodieInstant>> commitsList = archiveAndGetCommitsList(writeConfig);
|
||||
List<HoodieInstant> originalCommits = commitsList.getKey();
|
||||
List<HoodieInstant> commitsAfterArchival = commitsList.getValue();
|
||||
|
||||
if (archiveBeyondSavepoint) {
|
||||
// retains only 2 commits. C3 and C8. and savepointed commit for C3.
|
||||
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002", "00000004", "00000005")),
|
||||
Stream.concat(getActiveCommitInstants(Arrays.asList("00000003", "00000006")).stream(), getActiveSavepointedCommitInstants(Arrays.asList("00000003")).stream())
|
||||
.collect(Collectors.toList()), commitsAfterArchival);
|
||||
} else {
|
||||
// archives only C1 and C2. stops at first savepointed commit C3.
|
||||
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002")),
|
||||
Stream.concat(getActiveCommitInstants(Arrays.asList("00000003", "00000004", "00000005", "00000006")).stream(),
|
||||
getActiveSavepointedCommitInstants(Arrays.asList("00000003")).stream())
|
||||
.collect(Collectors.toList()), commitsAfterArchival);
|
||||
}
|
||||
|
||||
for (int i = 7; i < 10; i++) {
|
||||
testTable.doWriteOperation(String.format("%08d", i), WriteOperationType.UPSERT, Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
|
||||
}
|
||||
|
||||
// once savepoint is removed. C3 will be archived.
|
||||
testTable.deleteSavepoint(commitToSavepoint);
|
||||
commitsList = archiveAndGetCommitsList(writeConfig);
|
||||
originalCommits = commitsList.getKey();
|
||||
commitsAfterArchival = commitsList.getValue();
|
||||
|
||||
metaClient.reloadActiveTimeline();
|
||||
verifyArchival(getAllArchivedCommitInstants(Arrays.asList("00000001", "00000002","00000003", "00000004", "00000005", "00000006", "00000007")),
|
||||
getActiveCommitInstants(Arrays.asList("00000008", "00000009")), commitsAfterArchival);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(booleans = {true, false})
|
||||
public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableArchiveMerge) throws Exception {
|
||||
@@ -563,13 +648,22 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
assertEquals(originalCommits, commitsAfterArchival);
|
||||
}
|
||||
|
||||
private static Stream<Arguments> archiveCommitSavepointNoHoleParams() {
|
||||
return Arrays.stream(new Boolean[][] {
|
||||
{true, true},
|
||||
{false, true},
|
||||
{true, false},
|
||||
{false, false}
|
||||
}).map(Arguments::of);
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(booleans = {true, false})
|
||||
public void testArchiveCommitSavepointNoHole(boolean enableMetadataTable) throws Exception {
|
||||
@MethodSource("archiveCommitSavepointNoHoleParams")
|
||||
public void testArchiveCommitSavepointNoHole(boolean enableMetadataTable, boolean archiveBeyondSavepoint) throws Exception {
|
||||
init();
|
||||
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
|
||||
.withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
|
||||
.withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 5).build())
|
||||
.withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 5).withArchiveBeyondSavepoint(archiveBeyondSavepoint).build())
|
||||
.withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
|
||||
.withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
|
||||
.withRemoteServerPort(timelineServicePort).build())
|
||||
@@ -596,14 +690,30 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
|
||||
assertTrue(archiver.archiveIfRequired(context));
|
||||
timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
|
||||
assertEquals(5, timeline.countInstants(),
|
||||
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
|
||||
"Archived commits should always be safe");
|
||||
if (archiveBeyondSavepoint) {
|
||||
// commits in active timeline = 101 and 105.
|
||||
assertEquals(2, timeline.countInstants(),
|
||||
"Since archiveBeyondSavepoint config is enabled, we will archive commits 102, 103 ");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
|
||||
"Savepointed commits should always be safe");
|
||||
assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
|
||||
"102 expected to be archived");
|
||||
assertFalse(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
|
||||
"103 expected to be archived");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")),
|
||||
"104 expected to be archived");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "105")),
|
||||
"105 expected to be in active timeline");
|
||||
} else {
|
||||
assertEquals(5, timeline.countInstants(),
|
||||
"Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "101")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "102")),
|
||||
"Archived commits should always be safe");
|
||||
assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "103")),
|
||||
"Archived commits should always be safe");
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@@ -934,7 +1044,7 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
HoodieInstant firstInstant = metaClient.reloadActiveTimeline().firstInstant().get();
|
||||
expectedArchivedInstants = expectedArchivedInstants.stream()
|
||||
.filter(entry -> HoodieTimeline.compareTimestamps(entry.getTimestamp(), HoodieTimeline.LESSER_THAN, firstInstant.getTimestamp()
|
||||
)).collect(Collectors.toList());
|
||||
)).collect(Collectors.toList());
|
||||
expectedArchivedInstants.forEach(entry -> assertTrue(metaClient.getArchivedTimeline().containsInstant(entry)));
|
||||
}
|
||||
|
||||
@@ -1283,7 +1393,7 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
HoodieTimeline timeline = metaClient.getActiveTimeline();
|
||||
expectedArchivedInstants.forEach(entry -> {
|
||||
// check safety
|
||||
if (entry.getAction() != HoodieTimeline.ROLLBACK_ACTION) {
|
||||
if (!entry.getAction().equals(HoodieTimeline.ROLLBACK_ACTION)) {
|
||||
assertTrue(timeline.containsOrBeforeTimelineStarts(entry.getTimestamp()), "Archived commits should always be safe");
|
||||
}
|
||||
}
|
||||
@@ -1315,6 +1425,10 @@ public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
|
||||
return getActiveCommitInstants(commitTimes, HoodieTimeline.COMMIT_ACTION);
|
||||
}
|
||||
|
||||
private List<HoodieInstant> getActiveSavepointedCommitInstants(List<String> commitTimes) {
|
||||
return getActiveCommitInstants(commitTimes, HoodieTimeline.SAVEPOINT_ACTION);
|
||||
}
|
||||
|
||||
private List<HoodieInstant> getActiveCommitInstants(List<String> commitTimes, String action) {
|
||||
List<HoodieInstant> allInstants = new ArrayList<>();
|
||||
commitTimes.forEach(entry -> allInstants.add(new HoodieInstant(State.COMPLETED, action, entry)));
|
||||
|
||||
Reference in New Issue
Block a user