[HUDI-3445] Support Clustering Command Based on Call Procedure Command for Spark SQL (#4901)
* [HUDI-3445] Clustering Command Based on Call Procedure Command for Spark SQL * [HUDI-3445] Clustering Command Based on Call Procedure Command for Spark SQL * [HUDI-3445] Clustering Command Based on Call Procedure Command for Spark SQL Co-authored-by: shibei <huberylee.li@alibaba-inc.com>
This commit is contained in:
@@ -31,6 +31,7 @@ import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
@@ -94,6 +95,12 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Filter clustering partitions that matched regex pattern");
|
||||
|
||||
public static final ConfigProperty<String> PARTITION_SELECTED = ConfigProperty
|
||||
.key(CLUSTERING_STRATEGY_PARAM_PREFIX + "partition.selected")
|
||||
.noDefaultValue()
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Partitions to run clustering");
|
||||
|
||||
public static final ConfigProperty<String> PLAN_STRATEGY_CLASS_NAME = ConfigProperty
|
||||
.key("hoodie.clustering.plan.strategy.class")
|
||||
.defaultValue(SPARK_SIZED_BASED_CLUSTERING_PLAN_STRATEGY)
|
||||
@@ -473,6 +480,11 @@ public class HoodieClusteringConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withClusteringPartitionSelected(String partitionSelected) {
|
||||
clusteringConfig.setValue(PARTITION_SELECTED, partitionSelected);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withClusteringSkipPartitionsFromLatest(int clusteringSkipPartitionsFromLatest) {
|
||||
clusteringConfig.setValue(PLAN_STRATEGY_SKIP_PARTITIONS_FROM_LATEST, String.valueOf(clusteringSkipPartitionsFromLatest));
|
||||
return this;
|
||||
|
||||
@@ -1301,6 +1301,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getLong(HoodieClusteringConfig.PLAN_STRATEGY_SMALL_FILE_LIMIT);
|
||||
}
|
||||
|
||||
public String getClusteringPartitionSelected() {
|
||||
return getString(HoodieClusteringConfig.PARTITION_SELECTED);
|
||||
}
|
||||
|
||||
public String getClusteringPartitionFilterRegexPattern() {
|
||||
return getString(HoodieClusteringConfig.PARTITION_REGEX_PATTERN);
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ import org.apache.hudi.exception.HoodieClusteringException;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Partition filter utilities. Currently, we support three mode:
|
||||
@@ -58,11 +59,18 @@ public class ClusteringPlanPartitionFilter {
|
||||
}
|
||||
|
||||
private static List<String> selectedPartitionsFilter(List<String> partitions, HoodieWriteConfig config) {
|
||||
Stream<String> filteredPartitions = partitions.stream();
|
||||
|
||||
String beginPartition = config.getBeginPartitionForClustering();
|
||||
if (beginPartition != null) {
|
||||
filteredPartitions = filteredPartitions.filter(path -> path.compareTo(beginPartition) >= 0);
|
||||
}
|
||||
|
||||
String endPartition = config.getEndPartitionForClustering();
|
||||
List<String> filteredPartitions = partitions.stream()
|
||||
.filter(path -> path.compareTo(beginPartition) >= 0 && path.compareTo(endPartition) <= 0)
|
||||
.collect(Collectors.toList());
|
||||
return filteredPartitions;
|
||||
if (endPartition != null) {
|
||||
filteredPartitions = filteredPartitions.filter(path -> path.compareTo(endPartition) <= 0);
|
||||
}
|
||||
|
||||
return filteredPartitions.collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilter;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -72,8 +73,8 @@ public abstract class PartitionAwareClusteringPlanStrategy<T extends HoodieRecor
|
||||
HoodieWriteConfig config = getWriteConfig();
|
||||
List<String> partitionPaths = FSUtils.getAllPartitionPaths(getEngineContext(), config.getMetadataConfig(), metaClient.getBasePath());
|
||||
|
||||
// get regex matched partitions if set
|
||||
partitionPaths = getRegexPatternMatchedPartitions(config, partitionPaths);
|
||||
// get matched partitions if set
|
||||
partitionPaths = getMatchedPartitions(config, partitionPaths);
|
||||
// filter the partition paths if needed to reduce list status
|
||||
partitionPaths = filterPartitionPaths(partitionPaths);
|
||||
|
||||
@@ -113,6 +114,15 @@ public abstract class PartitionAwareClusteringPlanStrategy<T extends HoodieRecor
|
||||
.build());
|
||||
}
|
||||
|
||||
public List<String> getMatchedPartitions(HoodieWriteConfig config, List<String> partitionPaths) {
|
||||
String partitionSelected = config.getClusteringPartitionSelected();
|
||||
if (!StringUtils.isNullOrEmpty(partitionSelected)) {
|
||||
return Arrays.asList(partitionSelected.split(","));
|
||||
} else {
|
||||
return getRegexPatternMatchedPartitions(config, partitionPaths);
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getRegexPatternMatchedPartitions(HoodieWriteConfig config, List<String> partitionPaths) {
|
||||
String pattern = config.getClusteringPartitionFilterRegexPattern();
|
||||
if (!StringUtils.isNullOrEmpty(pattern)) {
|
||||
|
||||
Reference in New Issue
Block a user