[HUDI-1436]: Provide an option to trigger clean every nth commit (#4385)
- Provided option to trigger clean every nth commit with default number of commits as 1 so that existing users are not affected. Co-authored-by: sivabalan <n.siva.b@gmail.com>
This commit is contained in:
@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
|
||||
@@ -129,6 +130,17 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
.withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
|
||||
+ "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
|
||||
.key("hoodie.clean.trigger.strategy")
|
||||
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
|
||||
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
|
||||
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
|
||||
.key("hoodie.clean.max.commits")
|
||||
.defaultValue("1")
|
||||
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
|
||||
.key("hoodie.cleaner.fileversions.retained")
|
||||
.defaultValue("3")
|
||||
@@ -583,6 +595,16 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
|
||||
compactionConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
|
||||
compactionConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
|
||||
compactionConfig.setValue(CLEANER_POLICY, policy.name());
|
||||
return this;
|
||||
|
||||
@@ -61,6 +61,7 @@ import org.apache.hudi.keygen.constant.KeyGeneratorType;
|
||||
import org.apache.hudi.metrics.MetricsReporterType;
|
||||
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
|
||||
import org.apache.hudi.table.RandomFileIdPrefixProvider;
|
||||
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
|
||||
import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
|
||||
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
|
||||
@@ -1153,6 +1154,18 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
|
||||
}
|
||||
|
||||
public int getCleaningMaxCommits() {
|
||||
return getInt(HoodieCompactionConfig.CLEAN_MAX_COMMITS);
|
||||
}
|
||||
|
||||
public CleaningTriggerStrategy getCleaningTriggerStrategy() {
|
||||
return CleaningTriggerStrategy.valueOf(getString(HoodieCompactionConfig.CLEAN_TRIGGER_STRATEGY));
|
||||
}
|
||||
|
||||
public boolean isAutoClean() {
|
||||
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
|
||||
}
|
||||
|
||||
public boolean getArchiveMergeEnable() {
|
||||
return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
|
||||
}
|
||||
@@ -1169,10 +1182,6 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
|
||||
}
|
||||
|
||||
public boolean isAutoClean() {
|
||||
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
|
||||
}
|
||||
|
||||
public boolean isAsyncClean() {
|
||||
return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import org.apache.hudi.common.util.CollectionUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
import org.apache.hudi.table.action.BaseActionExecutor;
|
||||
@@ -58,8 +59,30 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
|
||||
this.extraMetadata = extraMetadata;
|
||||
}
|
||||
|
||||
protected Option<HoodieCleanerPlan> createCleanerPlan() {
|
||||
return execute();
|
||||
private int getCommitsSinceLastCleaning() {
|
||||
Option<HoodieInstant> lastCleanInstant = table.getActiveTimeline().getCleanerTimeline().filterCompletedInstants().lastInstant();
|
||||
HoodieTimeline commitTimeline = table.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
|
||||
|
||||
String latestCleanTs;
|
||||
int numCommits = 0;
|
||||
if (lastCleanInstant.isPresent()) {
|
||||
latestCleanTs = lastCleanInstant.get().getTimestamp();
|
||||
numCommits = commitTimeline.findInstantsAfter(latestCleanTs).countInstants();
|
||||
} else {
|
||||
numCommits = commitTimeline.countInstants();
|
||||
}
|
||||
|
||||
return numCommits;
|
||||
}
|
||||
|
||||
private boolean needsCleaning(CleaningTriggerStrategy strategy) {
|
||||
if (strategy == CleaningTriggerStrategy.NUM_COMMITS) {
|
||||
int numberOfCommits = getCommitsSinceLastCleaning();
|
||||
int maxInlineCommitsForNextClean = config.getCleaningMaxCommits();
|
||||
return numberOfCommits >= maxInlineCommitsForNextClean;
|
||||
} else {
|
||||
throw new HoodieException("Unsupported cleaning trigger strategy: " + config.getCleaningTriggerStrategy());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -128,6 +151,9 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
|
||||
|
||||
@Override
|
||||
public Option<HoodieCleanerPlan> execute() {
|
||||
if (!needsCleaning(config.getCleaningTriggerStrategy())) {
|
||||
return Option.empty();
|
||||
}
|
||||
// Plan a new clean action
|
||||
return requestClean(instantTime);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.clean;
|
||||
|
||||
public enum CleaningTriggerStrategy {
|
||||
// trigger cleaning when reach n commits
|
||||
NUM_COMMITS
|
||||
}
|
||||
Reference in New Issue
Block a user