[HUDI-1937] Rollback unfinished replace commit to allow updates (#3869)
* [HUDI-1937] Rollback unfinished replace commit to allow updates while clustering * Revert and delete requested replacecommit too * Rollback pending clustering instants transactionally * No double locking and add a config to enable rollback * Update config to be clear about rollback only on conflict
This commit is contained in:
@@ -191,6 +191,15 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
|||||||
.sinceVersion("0.10.0")
|
.sinceVersion("0.10.0")
|
||||||
.withDocumentation("Enable data skipping by collecting statistics once layout optimization is complete.");
|
.withDocumentation("Enable data skipping by collecting statistics once layout optimization is complete.");
|
||||||
|
|
||||||
|
public static final ConfigProperty<Boolean> ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT = ConfigProperty
|
||||||
|
.key("hoodie.clustering.rollback.pending.replacecommit.on.conflict")
|
||||||
|
.defaultValue(false)
|
||||||
|
.sinceVersion("0.10.0")
|
||||||
|
.withDocumentation("If updates are allowed to file groups pending clustering, then set this config to rollback failed or pending clustering instants. "
|
||||||
|
+ "Pending clustering will be rolled back ONLY IF there is conflict between incoming upsert and filegroup to be clustered. "
|
||||||
|
+ "Please exercise caution while setting this config, especially when clustering is done very frequently. This could lead to race condition in "
|
||||||
|
+ "rare scenarios, for example, when the clustering completes after instants are fetched but before rollback completed.");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @deprecated Use {@link #PLAN_STRATEGY_CLASS_NAME} and its methods instead
|
* @deprecated Use {@link #PLAN_STRATEGY_CLASS_NAME} and its methods instead
|
||||||
*/
|
*/
|
||||||
@@ -404,6 +413,11 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder withRollbackPendingClustering(Boolean rollbackPendingClustering) {
|
||||||
|
clusteringConfig.setValue(ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT, String.valueOf(rollbackPendingClustering));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public Builder withSpaceFillingCurveDataOptimizeEnable(Boolean enable) {
|
public Builder withSpaceFillingCurveDataOptimizeEnable(Boolean enable) {
|
||||||
clusteringConfig.setValue(LAYOUT_OPTIMIZE_ENABLE, String.valueOf(enable));
|
clusteringConfig.setValue(LAYOUT_OPTIMIZE_ENABLE, String.valueOf(enable));
|
||||||
return this;
|
return this;
|
||||||
|
|||||||
@@ -1166,6 +1166,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
|||||||
return inlineClusteringEnabled() || isAsyncClusteringEnabled();
|
return inlineClusteringEnabled() || isAsyncClusteringEnabled();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isRollbackPendingClustering() {
|
||||||
|
return getBoolean(HoodieClusteringConfig.ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT);
|
||||||
|
}
|
||||||
|
|
||||||
public int getInlineClusterMaxCommits() {
|
public int getInlineClusterMaxCommits() {
|
||||||
return getInt(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS);
|
return getInt(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ package org.apache.hudi.table.action.cluster.strategy;
|
|||||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
@@ -41,8 +42,8 @@ public abstract class UpdateStrategy<T extends HoodieRecordPayload<T>, I> {
|
|||||||
* Check the update records to the file group in clustering.
|
* Check the update records to the file group in clustering.
|
||||||
* @param taggedRecordsRDD the records to write, tagged with target file id,
|
* @param taggedRecordsRDD the records to write, tagged with target file id,
|
||||||
* future can update tagged records location to a different fileId.
|
* future can update tagged records location to a different fileId.
|
||||||
* @return the recordsRDD strategy updated
|
* @return the recordsRDD strategy updated and a set of file groups to be updated while pending clustering.
|
||||||
*/
|
*/
|
||||||
public abstract I handleUpdate(I taggedRecordsRDD);
|
public abstract Pair<I, Set<HoodieFileGroupId>> handleUpdate(I taggedRecordsRDD);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,10 +22,15 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
|||||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
|
import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
|
||||||
|
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allow ingestion commits during clustering job.
|
* Allow ingestion commits during clustering job.
|
||||||
@@ -37,8 +42,19 @@ public class SparkAllowUpdateStrategy<T extends HoodieRecordPayload<T>> extends
|
|||||||
super(engineContext, fileGroupsInPendingClustering);
|
super(engineContext, fileGroupsInPendingClustering);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<HoodieFileGroupId> getGroupIdsWithUpdate(JavaRDD<HoodieRecord<T>> inputRecords) {
|
||||||
|
List<HoodieFileGroupId> fileGroupIdsWithUpdates = inputRecords
|
||||||
|
.filter(record -> record.getCurrentLocation() != null)
|
||||||
|
.map(record -> new HoodieFileGroupId(record.getPartitionPath(), record.getCurrentLocation().getFileId())).distinct().collect();
|
||||||
|
return fileGroupIdsWithUpdates;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JavaRDD<HoodieRecord<T>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
|
public Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
|
||||||
return taggedRecordsRDD;
|
List<HoodieFileGroupId> fileGroupIdsWithRecordUpdate = getGroupIdsWithUpdate(taggedRecordsRDD);
|
||||||
|
Set<HoodieFileGroupId> fileGroupIdsWithUpdatesAndPendingClustering = fileGroupIdsWithRecordUpdate.stream()
|
||||||
|
.filter(f -> fileGroupsInPendingClustering.contains(f))
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
return Pair.of(taggedRecordsRDD, fileGroupIdsWithUpdatesAndPendingClustering);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,14 +22,18 @@ import org.apache.hudi.client.common.HoodieSparkEngineContext;
|
|||||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||||
import org.apache.hudi.common.model.HoodieRecord;
|
import org.apache.hudi.common.model.HoodieRecord;
|
||||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||||
|
import org.apache.hudi.common.util.collection.Pair;
|
||||||
import org.apache.hudi.exception.HoodieClusteringUpdateException;
|
import org.apache.hudi.exception.HoodieClusteringUpdateException;
|
||||||
import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
|
import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
|
||||||
|
|
||||||
import org.apache.log4j.LogManager;
|
import org.apache.log4j.LogManager;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.apache.spark.api.java.JavaRDD;
|
import org.apache.spark.api.java.JavaRDD;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update strategy based on following.
|
* Update strategy based on following.
|
||||||
@@ -50,7 +54,7 @@ public class SparkRejectUpdateStrategy<T extends HoodieRecordPayload<T>> extends
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JavaRDD<HoodieRecord<T>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
|
public Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
|
||||||
List<HoodieFileGroupId> fileGroupIdsWithRecordUpdate = getGroupIdsWithUpdate(taggedRecordsRDD);
|
List<HoodieFileGroupId> fileGroupIdsWithRecordUpdate = getGroupIdsWithUpdate(taggedRecordsRDD);
|
||||||
fileGroupIdsWithRecordUpdate.forEach(fileGroupIdWithRecordUpdate -> {
|
fileGroupIdsWithRecordUpdate.forEach(fileGroupIdWithRecordUpdate -> {
|
||||||
if (fileGroupsInPendingClustering.contains(fileGroupIdWithRecordUpdate)) {
|
if (fileGroupsInPendingClustering.contains(fileGroupIdWithRecordUpdate)) {
|
||||||
@@ -61,7 +65,7 @@ public class SparkRejectUpdateStrategy<T extends HoodieRecordPayload<T>> extends
|
|||||||
throw new HoodieClusteringUpdateException(msg);
|
throw new HoodieClusteringUpdateException(msg);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return taggedRecordsRDD;
|
return Pair.of(taggedRecordsRDD, Collections.emptySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,6 +77,8 @@ import java.util.List;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans;
|
||||||
|
|
||||||
public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayload> extends
|
public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayload> extends
|
||||||
BaseCommitActionExecutor<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, HoodieWriteMetadata> {
|
BaseCommitActionExecutor<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, HoodieWriteMetadata> {
|
||||||
|
|
||||||
@@ -118,7 +120,27 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
|
|||||||
table.getFileSystemView().getFileGroupsInPendingClustering().map(entry -> entry.getKey()).collect(Collectors.toSet());
|
table.getFileSystemView().getFileGroupsInPendingClustering().map(entry -> entry.getKey()).collect(Collectors.toSet());
|
||||||
UpdateStrategy updateStrategy = (UpdateStrategy)ReflectionUtils
|
UpdateStrategy updateStrategy = (UpdateStrategy)ReflectionUtils
|
||||||
.loadClass(config.getClusteringUpdatesStrategyClass(), this.context, fileGroupsInPendingClustering);
|
.loadClass(config.getClusteringUpdatesStrategyClass(), this.context, fileGroupsInPendingClustering);
|
||||||
return (JavaRDD<HoodieRecord<T>>)updateStrategy.handleUpdate(inputRecordsRDD);
|
Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> recordsAndPendingClusteringFileGroups =
|
||||||
|
(Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>>)updateStrategy.handleUpdate(inputRecordsRDD);
|
||||||
|
Set<HoodieFileGroupId> fileGroupsWithUpdatesAndPendingClustering = recordsAndPendingClusteringFileGroups.getRight();
|
||||||
|
if (fileGroupsWithUpdatesAndPendingClustering.isEmpty()) {
|
||||||
|
return recordsAndPendingClusteringFileGroups.getLeft();
|
||||||
|
}
|
||||||
|
// there are filegroups pending clustering and receiving updates, so rollback the pending clustering instants
|
||||||
|
// there could be race condition, for example, if the clustering completes after instants are fetched but before rollback completed
|
||||||
|
if (config.isRollbackPendingClustering()) {
|
||||||
|
Set<HoodieInstant> pendingClusteringInstantsToRollback = getAllFileGroupsInPendingClusteringPlans(table.getMetaClient()).entrySet().stream()
|
||||||
|
.filter(e -> fileGroupsWithUpdatesAndPendingClustering.contains(e.getKey()))
|
||||||
|
.map(Map.Entry::getValue)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
pendingClusteringInstantsToRollback.forEach(instant -> {
|
||||||
|
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
table.scheduleRollback(context, commitTime, instant, false, config.shouldRollbackUsingMarkers());
|
||||||
|
table.rollback(context, commitTime, instant, true, true);
|
||||||
|
});
|
||||||
|
table.getMetaClient().reloadActiveTimeline();
|
||||||
|
}
|
||||||
|
return recordsAndPendingClusteringFileGroups.getLeft();
|
||||||
} else {
|
} else {
|
||||||
return inputRecordsRDD;
|
return inputRecordsRDD;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -129,6 +129,14 @@ import java.util.concurrent.Future;
|
|||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
|
import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.COMPLETED;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.INFLIGHT;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieInstant.State.REQUESTED;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieTimeline.CLEAN_ACTION;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieTimeline.REPLACE_COMMIT_ACTION;
|
||||||
|
import static org.apache.hudi.common.table.timeline.HoodieTimeline.ROLLBACK_ACTION;
|
||||||
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0;
|
import static org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion.VERSION_0;
|
||||||
import static org.apache.hudi.common.testutils.FileCreateUtils.getBaseFileCountsForPaths;
|
import static org.apache.hudi.common.testutils.FileCreateUtils.getBaseFileCountsForPaths;
|
||||||
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
|
import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
|
||||||
@@ -586,16 +594,16 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
|
HoodieActiveTimeline activeTimeline = new HoodieActiveTimeline(metaClient, false);
|
||||||
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
|
List<HoodieInstant> instants = activeTimeline.getCommitTimeline().getInstants().collect(Collectors.toList());
|
||||||
assertEquals(5, instants.size());
|
assertEquals(5, instants.size());
|
||||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "001"),
|
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "001"),
|
||||||
instants.get(0));
|
instants.get(0));
|
||||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "004"),
|
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "004"),
|
||||||
instants.get(1));
|
instants.get(1));
|
||||||
// New Format should have all states of instants
|
// New Format should have all states of instants
|
||||||
assertEquals(new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "006"),
|
assertEquals(new HoodieInstant(REQUESTED, COMMIT_ACTION, "006"),
|
||||||
instants.get(2));
|
instants.get(2));
|
||||||
assertEquals(new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "006"),
|
assertEquals(new HoodieInstant(INFLIGHT, COMMIT_ACTION, "006"),
|
||||||
instants.get(3));
|
instants.get(3));
|
||||||
assertEquals(new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMMIT_ACTION, "006"),
|
assertEquals(new HoodieInstant(COMPLETED, COMMIT_ACTION, "006"),
|
||||||
instants.get(4));
|
instants.get(4));
|
||||||
|
|
||||||
final HoodieWriteConfig cfg = hoodieWriteConfig;
|
final HoodieWriteConfig cfg = hoodieWriteConfig;
|
||||||
@@ -1403,7 +1411,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
|
HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
|
||||||
|
|
||||||
// complete another commit after pending clustering
|
// complete another commit after pending clustering
|
||||||
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER);
|
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(EAGER);
|
||||||
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
|
addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
|
||||||
HoodieWriteConfig config = cfgBuilder.build();
|
HoodieWriteConfig config = cfgBuilder.build();
|
||||||
SparkRDDWriteClient client = getHoodieWriteClient(config);
|
SparkRDDWriteClient client = getHoodieWriteClient(config);
|
||||||
@@ -1419,6 +1427,41 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
assertEquals(0, ClusteringUtils.getAllPendingClusteringPlans(metaClient).count());
|
assertEquals(0, ClusteringUtils.getAllPendingClusteringPlans(metaClient).count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(booleans = {true, false})
|
||||||
|
public void testInflightClusteringRollbackWhenUpdatesAllowed(boolean rollbackPendingClustering) throws Exception {
|
||||||
|
// setup clustering config with update strategy to allow updates during ingestion
|
||||||
|
HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder()
|
||||||
|
.withClusteringMaxNumGroups(10).withClusteringTargetPartitions(0)
|
||||||
|
.withClusteringUpdatesStrategy("org.apache.hudi.client.clustering.update.strategy.SparkAllowUpdateStrategy")
|
||||||
|
.withRollbackPendingClustering(rollbackPendingClustering)
|
||||||
|
.withInlineClustering(true).withInlineClusteringNumCommits(1).build();
|
||||||
|
|
||||||
|
// start clustering, but don't commit keep it inflight
|
||||||
|
List<HoodieRecord> allRecords = testInsertAndClustering(clusteringConfig, true, false);
|
||||||
|
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
|
||||||
|
List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans =
|
||||||
|
ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
|
||||||
|
assertEquals(1, pendingClusteringPlans.size());
|
||||||
|
HoodieInstant pendingClusteringInstant = pendingClusteringPlans.get(0).getLeft();
|
||||||
|
assertEquals(pendingClusteringInstant.getState(), INFLIGHT);
|
||||||
|
|
||||||
|
// make an update to a filegroup within the partition that is pending clustering
|
||||||
|
HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(EAGER);
|
||||||
|
addConfigsForPopulateMetaFields(cfgBuilder, true);
|
||||||
|
cfgBuilder.withClusteringConfig(clusteringConfig);
|
||||||
|
HoodieWriteConfig config = cfgBuilder.build();
|
||||||
|
SparkRDDWriteClient client = getHoodieWriteClient(config);
|
||||||
|
String commitTime = HoodieActiveTimeline.createNewInstantTime();
|
||||||
|
allRecords.addAll(dataGen.generateUpdates(commitTime, 200));
|
||||||
|
writeAndVerifyBatch(client, allRecords, commitTime, true);
|
||||||
|
|
||||||
|
// verify inflight clustering was rolled back
|
||||||
|
metaClient.reloadActiveTimeline();
|
||||||
|
pendingClusteringPlans = ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
|
||||||
|
assertEquals(config.isRollbackPendingClustering() ? 0 : 1, pendingClusteringPlans.size());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testClusteringWithFailingValidator() throws Exception {
|
public void testClusteringWithFailingValidator() throws Exception {
|
||||||
// setup clustering config.
|
// setup clustering config.
|
||||||
@@ -1622,7 +1665,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
|
|
||||||
// Do Insert Overwrite
|
// Do Insert Overwrite
|
||||||
String commitTime2 = "002";
|
String commitTime2 = "002";
|
||||||
client.startCommitWithTime(commitTime2, HoodieTimeline.REPLACE_COMMIT_ACTION);
|
client.startCommitWithTime(commitTime2, REPLACE_COMMIT_ACTION);
|
||||||
List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, batch2RecordsCount);
|
List<HoodieRecord> inserts2 = dataGen.generateInserts(commitTime2, batch2RecordsCount);
|
||||||
List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
|
List<HoodieRecord> insertsAndUpdates2 = new ArrayList<>();
|
||||||
insertsAndUpdates2.addAll(inserts2);
|
insertsAndUpdates2.addAll(inserts2);
|
||||||
@@ -1678,7 +1721,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Set<String> deletePartitionWithCommit(SparkRDDWriteClient client, String commitTime, List<String> deletePartitionPath) {
|
private Set<String> deletePartitionWithCommit(SparkRDDWriteClient client, String commitTime, List<String> deletePartitionPath) {
|
||||||
client.startCommitWithTime(commitTime, HoodieTimeline.REPLACE_COMMIT_ACTION);
|
client.startCommitWithTime(commitTime, REPLACE_COMMIT_ACTION);
|
||||||
HoodieWriteResult writeResult = client.deletePartitions(deletePartitionPath, commitTime);
|
HoodieWriteResult writeResult = client.deletePartitions(deletePartitionPath, commitTime);
|
||||||
Set<String> deletePartitionReplaceFileIds =
|
Set<String> deletePartitionReplaceFileIds =
|
||||||
writeResult.getPartitionToReplaceFileIds().entrySet()
|
writeResult.getPartitionToReplaceFileIds().entrySet()
|
||||||
@@ -2124,7 +2167,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
|
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
|
||||||
|
|
||||||
assertTrue(metaClient.getActiveTimeline().getTimelineOfActions(
|
assertTrue(metaClient.getActiveTimeline().getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).countInstants() == 0);
|
CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 0);
|
||||||
assertTrue(metaClient.getActiveTimeline().filterInflights().countInstants() == 2);
|
assertTrue(metaClient.getActiveTimeline().filterInflights().countInstants() == 2);
|
||||||
assertTrue(metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 1);
|
assertTrue(metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 1);
|
||||||
// Await till enough time passes such that the first 2 failed commits heartbeats are expired
|
// Await till enough time passes such that the first 2 failed commits heartbeats are expired
|
||||||
@@ -2143,26 +2186,26 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
if (cleaningPolicy.isLazy()) {
|
if (cleaningPolicy.isLazy()) {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
timeline
|
timeline
|
||||||
.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION))
|
.getTimelineOfActions(CollectionUtils.createSet(ROLLBACK_ACTION))
|
||||||
.countInstants()
|
.countInstants()
|
||||||
== 2);
|
== 2);
|
||||||
// Since we write rollbacks not clean, there should be no clean action on the timeline
|
// Since we write rollbacks not clean, there should be no clean action on the timeline
|
||||||
assertTrue(
|
assertTrue(
|
||||||
timeline
|
timeline
|
||||||
.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.CLEAN_ACTION))
|
.getTimelineOfActions(CollectionUtils.createSet(CLEAN_ACTION))
|
||||||
.countInstants()
|
.countInstants()
|
||||||
== 0);
|
== 0);
|
||||||
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
||||||
} else if (cleaningPolicy.isNever()) {
|
} else if (cleaningPolicy.isNever()) {
|
||||||
assertTrue(
|
assertTrue(
|
||||||
timeline
|
timeline
|
||||||
.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION))
|
.getTimelineOfActions(CollectionUtils.createSet(ROLLBACK_ACTION))
|
||||||
.countInstants()
|
.countInstants()
|
||||||
== 0);
|
== 0);
|
||||||
// There should be no clean or rollback action on the timeline
|
// There should be no clean or rollback action on the timeline
|
||||||
assertTrue(
|
assertTrue(
|
||||||
timeline
|
timeline
|
||||||
.getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.CLEAN_ACTION))
|
.getTimelineOfActions(CollectionUtils.createSet(CLEAN_ACTION))
|
||||||
.countInstants()
|
.countInstants()
|
||||||
== 0);
|
== 0);
|
||||||
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
||||||
@@ -2173,7 +2216,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
@MethodSource("populateMetaFieldsParams")
|
@MethodSource("populateMetaFieldsParams")
|
||||||
public void testRollbackFailedCommitsToggleCleaningPolicy(boolean populateMetaFields) throws Exception {
|
public void testRollbackFailedCommitsToggleCleaningPolicy(boolean populateMetaFields) throws Exception {
|
||||||
HoodieTestUtils.init(hadoopConf, basePath);
|
HoodieTestUtils.init(hadoopConf, basePath);
|
||||||
HoodieFailedWritesCleaningPolicy cleaningPolicy = HoodieFailedWritesCleaningPolicy.EAGER;
|
HoodieFailedWritesCleaningPolicy cleaningPolicy = EAGER;
|
||||||
SparkRDDWriteClient client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
SparkRDDWriteClient client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
||||||
// Perform 1 failed writes to table
|
// Perform 1 failed writes to table
|
||||||
writeBatch(client, "100", "100", Option.of(Arrays.asList("100")), "100",
|
writeBatch(client, "100", "100", Option.of(Arrays.asList("100")), "100",
|
||||||
@@ -2202,7 +2245,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
client.clean();
|
client.clean();
|
||||||
HoodieActiveTimeline timeline = metaClient.getActiveTimeline().reload();
|
HoodieActiveTimeline timeline = metaClient.getActiveTimeline().reload();
|
||||||
assertTrue(timeline.getTimelineOfActions(
|
assertTrue(timeline.getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).countInstants() == 3);
|
CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 3);
|
||||||
// Perform 2 failed commits
|
// Perform 2 failed commits
|
||||||
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
||||||
writeBatch(client, "400", "300", Option.of(Arrays.asList("400")), "400",
|
writeBatch(client, "400", "300", Option.of(Arrays.asList("400")), "400",
|
||||||
@@ -2215,12 +2258,12 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
0, false);
|
0, false);
|
||||||
client.close();
|
client.close();
|
||||||
// Toggle cleaning policy to EAGER
|
// Toggle cleaning policy to EAGER
|
||||||
cleaningPolicy = HoodieFailedWritesCleaningPolicy.EAGER;
|
cleaningPolicy = EAGER;
|
||||||
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, populateMetaFields));
|
||||||
client.startCommit();
|
client.startCommit();
|
||||||
timeline = metaClient.getActiveTimeline().reload();
|
timeline = metaClient.getActiveTimeline().reload();
|
||||||
assertTrue(timeline.getTimelineOfActions(
|
assertTrue(timeline.getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).countInstants() == 5);
|
CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 5);
|
||||||
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 0);
|
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2250,7 +2293,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
|
HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
|
||||||
|
|
||||||
assertTrue(metaClient.getActiveTimeline().getTimelineOfActions(
|
assertTrue(metaClient.getActiveTimeline().getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).countInstants() == 0);
|
CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 0);
|
||||||
assertTrue(metaClient.getActiveTimeline().filterInflights().countInstants() == 2);
|
assertTrue(metaClient.getActiveTimeline().filterInflights().countInstants() == 2);
|
||||||
assertTrue(metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 1);
|
assertTrue(metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 1);
|
||||||
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true));
|
client = new SparkRDDWriteClient(context, getParallelWritingWriteConfig(cleaningPolicy, true));
|
||||||
@@ -2268,10 +2311,10 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
clean1.get();
|
clean1.get();
|
||||||
HoodieActiveTimeline timeline = metaClient.getActiveTimeline().reload();
|
HoodieActiveTimeline timeline = metaClient.getActiveTimeline().reload();
|
||||||
assertTrue(timeline.getTimelineOfActions(
|
assertTrue(timeline.getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.ROLLBACK_ACTION)).countInstants() == 2);
|
CollectionUtils.createSet(ROLLBACK_ACTION)).countInstants() == 2);
|
||||||
// Since we write rollbacks not clean, there should be no clean action on the timeline
|
// Since we write rollbacks not clean, there should be no clean action on the timeline
|
||||||
assertTrue(timeline.getTimelineOfActions(
|
assertTrue(timeline.getTimelineOfActions(
|
||||||
CollectionUtils.createSet(HoodieTimeline.CLEAN_ACTION)).countInstants() == 0);
|
CollectionUtils.createSet(CLEAN_ACTION)).countInstants() == 0);
|
||||||
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
assertTrue(timeline.getCommitsTimeline().filterCompletedInstants().countInstants() == 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2432,7 +2475,7 @@ public class TestHoodieClientOnCopyOnWriteStorage extends HoodieClientTestBase {
|
|||||||
HoodieClusteringPlan clusteringPlan =
|
HoodieClusteringPlan clusteringPlan =
|
||||||
ClusteringUtils.createClusteringPlan(EXECUTION_STRATEGY_CLASS_NAME.defaultValue(), STRATEGY_PARAMS, fileSlices, Collections.emptyMap());
|
ClusteringUtils.createClusteringPlan(EXECUTION_STRATEGY_CLASS_NAME.defaultValue(), STRATEGY_PARAMS, fileSlices, Collections.emptyMap());
|
||||||
|
|
||||||
HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusterTime);
|
HoodieInstant clusteringInstant = new HoodieInstant(REQUESTED, REPLACE_COMMIT_ACTION, clusterTime);
|
||||||
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
HoodieRequestedReplaceMetadata requestedReplaceMetadata = HoodieRequestedReplaceMetadata.newBuilder()
|
||||||
.setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
|
.setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
|
||||||
metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
|
metaClient.getActiveTimeline().saveToPendingReplaceCommit(clusteringInstant, TimelineMetadataUtils.serializeRequestedReplaceMetadata(requestedReplaceMetadata));
|
||||||
|
|||||||
Reference in New Issue
Block a user