1
0

[HUDI-1937] Rollback unfinished replace commit to allow updates (#3869)

* [HUDI-1937] Rollback unfinished replace commit to allow updates while clustering

* Revert and delete requested replacecommit too

* Rollback pending clustering instants transactionally

* No double locking and add a config to enable rollback

* Update config to be clear about rollback only on conflict
This commit is contained in:
Sagar Sumit
2021-11-23 07:29:03 +05:30
committed by GitHub
parent 0d1e7ecdab
commit e22150fe15
7 changed files with 132 additions and 28 deletions

View File

@@ -191,6 +191,15 @@ public class HoodieClusteringConfig extends HoodieConfig {
.sinceVersion("0.10.0")
.withDocumentation("Enable data skipping by collecting statistics once layout optimization is complete.");
public static final ConfigProperty<Boolean> ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT = ConfigProperty
.key("hoodie.clustering.rollback.pending.replacecommit.on.conflict")
.defaultValue(false)
.sinceVersion("0.10.0")
.withDocumentation("If updates are allowed to file groups pending clustering, then set this config to rollback failed or pending clustering instants. "
+ "Pending clustering will be rolled back ONLY IF there is conflict between incoming upsert and filegroup to be clustered. "
+ "Please exercise caution while setting this config, especially when clustering is done very frequently. This could lead to race condition in "
+ "rare scenarios, for example, when the clustering completes after instants are fetched but before rollback completed.");
/**
* @deprecated Use {@link #PLAN_STRATEGY_CLASS_NAME} and its methods instead
*/
@@ -404,6 +413,11 @@ public class HoodieClusteringConfig extends HoodieConfig {
return this;
}
public Builder withRollbackPendingClustering(Boolean rollbackPendingClustering) {
clusteringConfig.setValue(ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT, String.valueOf(rollbackPendingClustering));
return this;
}
public Builder withSpaceFillingCurveDataOptimizeEnable(Boolean enable) {
clusteringConfig.setValue(LAYOUT_OPTIMIZE_ENABLE, String.valueOf(enable));
return this;

View File

@@ -1166,6 +1166,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return inlineClusteringEnabled() || isAsyncClusteringEnabled();
}
public boolean isRollbackPendingClustering() {
return getBoolean(HoodieClusteringConfig.ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT);
}
public int getInlineClusterMaxCommits() {
return getInt(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMITS);
}

View File

@@ -21,6 +21,7 @@ package org.apache.hudi.table.action.cluster.strategy;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.collection.Pair;
import java.util.Set;
@@ -41,8 +42,8 @@ public abstract class UpdateStrategy<T extends HoodieRecordPayload<T>, I> {
* Check the update records to the file group in clustering.
* @param taggedRecordsRDD the records to write, tagged with target file id,
* future can update tagged records location to a different fileId.
* @return the recordsRDD strategy updated
* @return the recordsRDD strategy updated and a set of file groups to be updated while pending clustering.
*/
public abstract I handleUpdate(I taggedRecordsRDD);
public abstract Pair<I, Set<HoodieFileGroupId>> handleUpdate(I taggedRecordsRDD);
}