[HUDI-1354] Block updates and replace on file groups in clustering (#2275)
* [HUDI-1354] Block updates and replace on file groups in clustering * [HUDI-1354] Block updates and replace on file groups in clustering
This commit is contained in:
@@ -73,9 +73,17 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
|
||||
public static final String CLUSTERING_TARGET_FILE_MAX_BYTES = CLUSTERING_STRATEGY_PARAM_PREFIX + "target.file.max.bytes";
|
||||
public static final String DEFAULT_CLUSTERING_TARGET_FILE_MAX_BYTES = String.valueOf(1 * 1024 * 1024 * 1024L); // 1GB
|
||||
|
||||
// constants related to clustering that may be used by more than 1 strategy.
|
||||
// Constants related to clustering that may be used by more than 1 strategy.
|
||||
public static final String CLUSTERING_SORT_COLUMNS_PROPERTY = HoodieClusteringConfig.CLUSTERING_STRATEGY_PARAM_PREFIX + "sort.columns";
|
||||
|
||||
// When file groups is in clustering, need to handle the update to these file groups. Default strategy just reject the update
|
||||
public static final String CLUSTERING_UPDATES_STRATEGY_PROP = "hoodie.clustering.updates.strategy";
|
||||
public static final String DEFAULT_CLUSTERING_UPDATES_STRATEGY = "org.apache.hudi.client.clustering.update.strategy.SparkRejectUpdateStrategy";
|
||||
|
||||
// Async clustering
|
||||
public static final String ASYNC_CLUSTERING_ENABLE_OPT_KEY = "hoodie.clustering.async.enabled";
|
||||
public static final String DEFAULT_ASYNC_CLUSTERING_ENABLE_OPT_VAL = "false";
|
||||
|
||||
public HoodieClusteringConfig(Properties props) {
|
||||
super(props);
|
||||
}
|
||||
@@ -135,8 +143,8 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withInlineClustering(Boolean inlineCompaction) {
|
||||
props.setProperty(INLINE_CLUSTERING_PROP, String.valueOf(inlineCompaction));
|
||||
public Builder withInlineClustering(Boolean inlineClustering) {
|
||||
props.setProperty(INLINE_CLUSTERING_PROP, String.valueOf(inlineClustering));
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -150,8 +158,19 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withClusteringUpdatesStrategy(String updatesStrategyClass) {
|
||||
props.setProperty(CLUSTERING_UPDATES_STRATEGY_PROP, updatesStrategyClass);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAsyncClustering(Boolean asyncClustering) {
|
||||
props.setProperty(ASYNC_CLUSTERING_ENABLE_OPT_KEY, String.valueOf(asyncClustering));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieClusteringConfig build() {
|
||||
HoodieClusteringConfig config = new HoodieClusteringConfig(props);
|
||||
|
||||
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_PLAN_STRATEGY_CLASS),
|
||||
CLUSTERING_PLAN_STRATEGY_CLASS, DEFAULT_CLUSTERING_PLAN_STRATEGY_CLASS);
|
||||
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_EXECUTION_STRATEGY_CLASS),
|
||||
@@ -170,6 +189,10 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
|
||||
DEFAULT_CLUSTERING_TARGET_PARTITIONS);
|
||||
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_PLAN_SMALL_FILE_LIMIT), CLUSTERING_PLAN_SMALL_FILE_LIMIT,
|
||||
DEFAULT_CLUSTERING_PLAN_SMALL_FILE_LIMIT);
|
||||
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_UPDATES_STRATEGY_PROP), CLUSTERING_UPDATES_STRATEGY_PROP,
|
||||
DEFAULT_CLUSTERING_UPDATES_STRATEGY);
|
||||
setDefaultOnCondition(props, !props.containsKey(ASYNC_CLUSTERING_ENABLE_OPT_KEY), ASYNC_CLUSTERING_ENABLE_OPT_KEY,
|
||||
DEFAULT_ASYNC_CLUSTERING_ENABLE_OPT_VAL);
|
||||
return config;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,6 +50,7 @@ import java.util.Properties;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
* Class storing configs for the HoodieWriteClient.
|
||||
*/
|
||||
@@ -395,6 +396,15 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return Boolean.parseBoolean(props.getProperty(HoodieClusteringConfig.INLINE_CLUSTERING_PROP));
|
||||
}
|
||||
|
||||
public boolean isAsyncClusteringEnabled() {
|
||||
return Boolean.parseBoolean(props.getProperty(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY));
|
||||
}
|
||||
|
||||
public boolean isClusteringEnabled() {
|
||||
// TODO: future support async clustering
|
||||
return isInlineClustering() || isAsyncClusteringEnabled();
|
||||
}
|
||||
|
||||
public int getInlineClusterMaxCommits() {
|
||||
return Integer.parseInt(props.getProperty(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP));
|
||||
}
|
||||
@@ -415,6 +425,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
return Boolean.valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLED));
|
||||
}
|
||||
|
||||
public String getClusteringUpdatesStrategyClass() {
|
||||
return props.getProperty(HoodieClusteringConfig.CLUSTERING_UPDATES_STRATEGY_PROP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clustering properties.
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.exception;
|
||||
|
||||
public class HoodieClusteringUpdateException extends HoodieException {
|
||||
public HoodieClusteringUpdateException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public HoodieClusteringUpdateException(String msg, Throwable e) {
|
||||
super(msg, e);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.table.action.cluster.strategy;
|
||||
|
||||
import org.apache.hudi.client.common.HoodieEngineContext;
|
||||
import org.apache.hudi.common.model.HoodieFileGroupId;
|
||||
import org.apache.hudi.common.model.HoodieRecordPayload;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* When file groups in clustering, write records to these file group need to check.
|
||||
*/
|
||||
public abstract class UpdateStrategy<T extends HoodieRecordPayload<T>, I> {
|
||||
|
||||
protected final HoodieEngineContext engineContext;
|
||||
protected Set<HoodieFileGroupId> fileGroupsInPendingClustering;
|
||||
|
||||
protected UpdateStrategy(HoodieEngineContext engineContext, Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
|
||||
this.engineContext = engineContext;
|
||||
this.fileGroupsInPendingClustering = fileGroupsInPendingClustering;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the update records to the file group in clustering.
|
||||
* @param taggedRecordsRDD the records to write, tagged with target file id,
|
||||
* future can update tagged records location to a different fileId.
|
||||
* @return the recordsRDD strategy updated
|
||||
*/
|
||||
public abstract I handleUpdate(I taggedRecordsRDD);
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user