1
0

[HUDI-1436]: Provide an option to trigger clean every nth commit (#4385)

- Provided option to trigger clean every nth commit with default number of commits as 1 so that existing users are not affected.
Co-authored-by: sivabalan <n.siva.b@gmail.com>
This commit is contained in:
Pratyaksh Sharma
2022-03-22 05:36:30 +05:30
committed by GitHub
parent 26e5d2e6fc
commit ca0931d332
9 changed files with 521 additions and 384 deletions

View File

@@ -26,6 +26,7 @@ import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
@@ -129,6 +130,17 @@ public class HoodieCompactionConfig extends HoodieConfig {
.withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
+ "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
.key("hoodie.clean.trigger.strategy")
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
.key("hoodie.clean.max.commits")
.defaultValue("1")
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
.key("hoodie.cleaner.fileversions.retained")
.defaultValue("3")
@@ -583,6 +595,16 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
compactionConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
return this;
}
public Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
compactionConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
return this;
}
public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
compactionConfig.setValue(CLEANER_POLICY, policy.name());
return this;

View File

@@ -61,6 +61,7 @@ import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.apache.hudi.metrics.MetricsReporterType;
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
import org.apache.hudi.table.RandomFileIdPrefixProvider;
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
@@ -1153,6 +1154,18 @@ public class HoodieWriteConfig extends HoodieConfig {
return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
}
public int getCleaningMaxCommits() {
return getInt(HoodieCompactionConfig.CLEAN_MAX_COMMITS);
}
public CleaningTriggerStrategy getCleaningTriggerStrategy() {
return CleaningTriggerStrategy.valueOf(getString(HoodieCompactionConfig.CLEAN_TRIGGER_STRATEGY));
}
public boolean isAutoClean() {
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
}
public boolean getArchiveMergeEnable() {
return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
}
@@ -1169,10 +1182,6 @@ public class HoodieWriteConfig extends HoodieConfig {
return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
}
public boolean isAutoClean() {
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
}
public boolean isAsyncClean() {
return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
}

View File

@@ -32,6 +32,7 @@ import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.BaseActionExecutor;
@@ -58,8 +59,30 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
this.extraMetadata = extraMetadata;
}
protected Option<HoodieCleanerPlan> createCleanerPlan() {
return execute();
private int getCommitsSinceLastCleaning() {
Option<HoodieInstant> lastCleanInstant = table.getActiveTimeline().getCleanerTimeline().filterCompletedInstants().lastInstant();
HoodieTimeline commitTimeline = table.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
String latestCleanTs;
int numCommits = 0;
if (lastCleanInstant.isPresent()) {
latestCleanTs = lastCleanInstant.get().getTimestamp();
numCommits = commitTimeline.findInstantsAfter(latestCleanTs).countInstants();
} else {
numCommits = commitTimeline.countInstants();
}
return numCommits;
}
private boolean needsCleaning(CleaningTriggerStrategy strategy) {
if (strategy == CleaningTriggerStrategy.NUM_COMMITS) {
int numberOfCommits = getCommitsSinceLastCleaning();
int maxInlineCommitsForNextClean = config.getCleaningMaxCommits();
return numberOfCommits >= maxInlineCommitsForNextClean;
} else {
throw new HoodieException("Unsupported cleaning trigger strategy: " + config.getCleaningTriggerStrategy());
}
}
/**
@@ -128,6 +151,9 @@ public class CleanPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> ext
@Override
public Option<HoodieCleanerPlan> execute() {
if (!needsCleaning(config.getCleaningTriggerStrategy())) {
return Option.empty();
}
// Plan a new clean action
return requestClean(instantTime);
}

View File

@@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.action.clean;
public enum CleaningTriggerStrategy {
// trigger cleaning when reach n commits
NUM_COMMITS
}