[HUDI-2194] Skip the latest N partitions when choosing partitions to create ClusteringPlan (#3300)
* skip from latest partitions based on hoodie.clustering.plan.strategy.daybased.skipfromlatest.partitions && 0(default means skip nothing) * change config verison * add ut Co-authored-by: yuezhang <yuezhang@freewheel.tv>
This commit is contained in:
@@ -51,8 +51,10 @@ public class SparkRecentDaysClusteringPlanStrategy<T extends HoodieRecordPayload
|
||||
|
||||
protected List<String> filterPartitionPaths(List<String> partitionPaths) {
|
||||
int targetPartitionsForClustering = getWriteConfig().getTargetPartitionsForClustering();
|
||||
int skipPartitionsFromLatestForClustering = getWriteConfig().getSkipPartitionsFromLatestForClustering();
|
||||
return partitionPaths.stream()
|
||||
.sorted(Comparator.reverseOrder())
|
||||
.skip(Math.max(skipPartitionsFromLatestForClustering, 0))
|
||||
.limit(targetPartitionsForClustering > 0 ? targetPartitionsForClustering : partitionPaths.size())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user