[HUDI-1937] Rollback unfinished replace commit to allow updates (#3869)
* [HUDI-1937] Rollback unfinished replace commit to allow updates while clustering * Revert and delete requested replacecommit too * Rollback pending clustering instants transactionally * No double locking and add a config to enable rollback * Update config to be clear about rollback only on conflict
This commit is contained in:
@@ -191,6 +191,15 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
||||
.sinceVersion("0.10.0")
|
||||
.withDocumentation("Enable data skipping by collecting statistics once layout optimization is complete.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT = ConfigProperty
|
||||
.key("hoodie.clustering.rollback.pending.replacecommit.on.conflict")
|
||||
.defaultValue(false)
|
||||
.sinceVersion("0.10.0")
|
||||
.withDocumentation("If updates are allowed to file groups pending clustering, then set this config to rollback failed or pending clustering instants. "
|
||||
+ "Pending clustering will be rolled back ONLY IF there is conflict between incoming upsert and filegroup to be clustered. "
|
||||
+ "Please exercise caution while setting this config, especially when clustering is done very frequently. This could lead to race condition in "
|
||||
+ "rare scenarios, for example, when the clustering completes after instants are fetched but before rollback completed.");
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #PLAN_STRATEGY_CLASS_NAME} and its methods instead
|
||||
*/
|
||||
@@ -404,6 +413,11 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withRollbackPendingClustering(Boolean rollbackPendingClustering) {
|
||||
clusteringConfig.setValue(ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT, String.valueOf(rollbackPendingClustering));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withSpaceFillingCurveDataOptimizeEnable(Boolean enable) {
|
||||
clusteringConfig.setValue(LAYOUT_OPTIMIZE_ENABLE, String.valueOf(enable));
|
||||
return this;
|
||||
|
||||
@@ -1166,6 +1166,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return inlineClusteringEnabled() || isAsyncClusteringEnabled();
|
||||
}
|
||||
|
||||
public boolean isRollbackPendingClustering() {
|
||||
return getBoolean(HoodieClusteringConfig.ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT);
|
||||
}
|
||||
|
||||
public int getInlineClusterMaxCommits() {
|
||||
return getInt(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS);
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.table.action.cluster.strategy;
|
||||
import org.apache.hudi.common.engine.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@@ -41,8 +42,8 @@ public abstract class UpdateStrategy<T extends HoodieRecordPayload<T>, I> {
|
||||
* Check the update records to the file group in clustering.
|
||||
* @param taggedRecordsRDD the records to write, tagged with target file id,
|
||||
* future can update tagged records location to a different fileId.
|
||||
* @return the recordsRDD strategy updated
|
||||
* @return the recordsRDD strategy updated and a set of file groups to be updated while pending clustering.
|
||||
*/
|
||||
public abstract I handleUpdate(I taggedRecordsRDD);
|
||||
public abstract Pair<I, Set<HoodieFileGroupId>> handleUpdate(I taggedRecordsRDD);
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user