1
0

[HUDI-1354] Block updates and replace on file groups in clustering (#2275)

* [HUDI-1354] Block updates and replace on file groups in clustering

* [HUDI-1354]  Block updates and replace on file groups in clustering
This commit is contained in:
lw0090
2020-12-28 12:30:29 +08:00
committed by GitHub
parent 9e6889a8ce
commit 6cdf59d92b
8 changed files with 339 additions and 6 deletions

View File

@@ -73,9 +73,17 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
public static final String CLUSTERING_TARGET_FILE_MAX_BYTES = CLUSTERING_STRATEGY_PARAM_PREFIX + "target.file.max.bytes";
public static final String DEFAULT_CLUSTERING_TARGET_FILE_MAX_BYTES = String.valueOf(1 * 1024 * 1024 * 1024L); // 1GB
// constants related to clustering that may be used by more than 1 strategy.
// Constants related to clustering that may be used by more than 1 strategy.
public static final String CLUSTERING_SORT_COLUMNS_PROPERTY = HoodieClusteringConfig.CLUSTERING_STRATEGY_PARAM_PREFIX + "sort.columns";
// When file groups is in clustering, need to handle the update to these file groups. Default strategy just reject the update
public static final String CLUSTERING_UPDATES_STRATEGY_PROP = "hoodie.clustering.updates.strategy";
public static final String DEFAULT_CLUSTERING_UPDATES_STRATEGY = "org.apache.hudi.client.clustering.update.strategy.SparkRejectUpdateStrategy";
// Async clustering
public static final String ASYNC_CLUSTERING_ENABLE_OPT_KEY = "hoodie.clustering.async.enabled";
public static final String DEFAULT_ASYNC_CLUSTERING_ENABLE_OPT_VAL = "false";
public HoodieClusteringConfig(Properties props) {
super(props);
}
@@ -135,8 +143,8 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
return this;
}
public Builder withInlineClustering(Boolean inlineCompaction) {
props.setProperty(INLINE_CLUSTERING_PROP, String.valueOf(inlineCompaction));
public Builder withInlineClustering(Boolean inlineClustering) {
props.setProperty(INLINE_CLUSTERING_PROP, String.valueOf(inlineClustering));
return this;
}
@@ -150,8 +158,19 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
return this;
}
public Builder withClusteringUpdatesStrategy(String updatesStrategyClass) {
props.setProperty(CLUSTERING_UPDATES_STRATEGY_PROP, updatesStrategyClass);
return this;
}
public Builder withAsyncClustering(Boolean asyncClustering) {
props.setProperty(ASYNC_CLUSTERING_ENABLE_OPT_KEY, String.valueOf(asyncClustering));
return this;
}
public HoodieClusteringConfig build() {
HoodieClusteringConfig config = new HoodieClusteringConfig(props);
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_PLAN_STRATEGY_CLASS),
CLUSTERING_PLAN_STRATEGY_CLASS, DEFAULT_CLUSTERING_PLAN_STRATEGY_CLASS);
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_EXECUTION_STRATEGY_CLASS),
@@ -170,6 +189,10 @@ public class HoodieClusteringConfig extends DefaultHoodieConfig {
DEFAULT_CLUSTERING_TARGET_PARTITIONS);
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_PLAN_SMALL_FILE_LIMIT), CLUSTERING_PLAN_SMALL_FILE_LIMIT,
DEFAULT_CLUSTERING_PLAN_SMALL_FILE_LIMIT);
setDefaultOnCondition(props, !props.containsKey(CLUSTERING_UPDATES_STRATEGY_PROP), CLUSTERING_UPDATES_STRATEGY_PROP,
DEFAULT_CLUSTERING_UPDATES_STRATEGY);
setDefaultOnCondition(props, !props.containsKey(ASYNC_CLUSTERING_ENABLE_OPT_KEY), ASYNC_CLUSTERING_ENABLE_OPT_KEY,
DEFAULT_ASYNC_CLUSTERING_ENABLE_OPT_VAL);
return config;
}
}

View File

@@ -50,6 +50,7 @@ import java.util.Properties;
import java.util.function.Supplier;
import java.util.stream.Collectors;
/**
* Class storing configs for the HoodieWriteClient.
*/
@@ -395,6 +396,15 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return Boolean.parseBoolean(props.getProperty(HoodieClusteringConfig.INLINE_CLUSTERING_PROP));
}
public boolean isAsyncClusteringEnabled() {
return Boolean.parseBoolean(props.getProperty(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE_OPT_KEY));
}
public boolean isClusteringEnabled() {
// TODO: future support async clustering
return isInlineClustering() || isAsyncClusteringEnabled();
}
public int getInlineClusterMaxCommits() {
return Integer.parseInt(props.getProperty(HoodieClusteringConfig.INLINE_CLUSTERING_MAX_COMMIT_PROP));
}
@@ -415,6 +425,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
return Boolean.valueOf(props.getProperty(HoodieCompactionConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLED));
}
public String getClusteringUpdatesStrategyClass() {
return props.getProperty(HoodieClusteringConfig.CLUSTERING_UPDATES_STRATEGY_PROP);
}
/**
* Clustering properties.
*/

View File

@@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.exception;
public class HoodieClusteringUpdateException extends HoodieException {
public HoodieClusteringUpdateException(String msg) {
super(msg);
}
public HoodieClusteringUpdateException(String msg, Throwable e) {
super(msg, e);
}
}

View File

@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.table.action.cluster.strategy;
import org.apache.hudi.client.common.HoodieEngineContext;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieRecordPayload;
import java.util.Set;
/**
* When file groups in clustering, write records to these file group need to check.
*/
public abstract class UpdateStrategy<T extends HoodieRecordPayload<T>, I> {
protected final HoodieEngineContext engineContext;
protected Set<HoodieFileGroupId> fileGroupsInPendingClustering;
protected UpdateStrategy(HoodieEngineContext engineContext, Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
this.engineContext = engineContext;
this.fileGroupsInPendingClustering = fileGroupsInPendingClustering;
}
/**
* Check the update records to the file group in clustering.
* @param taggedRecordsRDD the records to write, tagged with target file id,
* future can update tagged records location to a different fileId.
* @return the recordsRDD strategy updated
*/
public abstract I handleUpdate(I taggedRecordsRDD);
}