[HUDI-2150] Rename/Restructure configs for better modularity (#6061)
- Move clean related configuration to HoodieCleanConfig - Move Archival related configuration to HoodieArchivalConfig - hoodie.compaction.payload.class move this to HoodiePayloadConfig
This commit is contained in:
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.config;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigClassProperty;
|
||||
import org.apache.hudi.common.config.ConfigGroups;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
|
||||
import javax.annotation.concurrent.Immutable;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Archival related config.
|
||||
*/
|
||||
@Immutable
|
||||
@ConfigClassProperty(name = "Archival Configs",
|
||||
groupName = ConfigGroups.Names.WRITE_CLIENT,
|
||||
description = "Configurations that control archival.")
|
||||
public class HoodieArchivalConfig extends HoodieConfig {
|
||||
|
||||
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
|
||||
.key("hoodie.archive.automatic")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
|
||||
+ " to archive commits if we cross a maximum value of commits."
|
||||
+ " It's recommended to enable this, to ensure number of active commits is bounded.");
|
||||
|
||||
public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
|
||||
.key("hoodie.archive.async")
|
||||
.defaultValue("false")
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
|
||||
+ "When turned on runs archiver async with writing, which can speed up overall write performance.");
|
||||
|
||||
public static final ConfigProperty<String> MAX_COMMITS_TO_KEEP = ConfigProperty
|
||||
.key("hoodie.keep.max.commits")
|
||||
.defaultValue("30")
|
||||
.withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to "
|
||||
+ " keep the metadata overhead constant, even as the table size grows."
|
||||
+ "This config controls the maximum number of instants to retain in the active timeline. ");
|
||||
|
||||
public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.archive.delete.parallelism")
|
||||
.defaultValue(100)
|
||||
.withDocumentation("Parallelism for deleting archived hoodie commits.");
|
||||
|
||||
public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
|
||||
.key("hoodie.keep.min.commits")
|
||||
.defaultValue("20")
|
||||
.withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
|
||||
+ "instants to retain in the active timeline.");
|
||||
|
||||
public static final ConfigProperty<String> COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
|
||||
.key("hoodie.commits.archival.batch")
|
||||
.defaultValue(String.valueOf(10))
|
||||
.withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
|
||||
+ " archive log. This config controls such archival batch size.");
|
||||
|
||||
public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
|
||||
.key("hoodie.archive.merge.files.batch.size")
|
||||
.defaultValue(10)
|
||||
.withDocumentation("The number of small archive files to be merged at once.");
|
||||
|
||||
public static final ConfigProperty<Long> ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES = ConfigProperty
|
||||
.key("hoodie.archive.merge.small.file.limit.bytes")
|
||||
.defaultValue(20L * 1024 * 1024)
|
||||
.withDocumentation("This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ARCHIVE_MERGE_ENABLE = ConfigProperty
|
||||
.key("hoodie.archive.merge.enable")
|
||||
.defaultValue(false)
|
||||
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
|
||||
+ " useful when storage scheme doesn't support append operation.");
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
|
||||
/**
|
||||
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
|
||||
/**
|
||||
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
|
||||
/** @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();
|
||||
|
||||
private HoodieArchivalConfig() {
|
||||
super();
|
||||
}
|
||||
|
||||
public static HoodieArchivalConfig.Builder newBuilder() {
|
||||
return new HoodieArchivalConfig.Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private final HoodieArchivalConfig archivalConfig = new HoodieArchivalConfig();
|
||||
|
||||
public HoodieArchivalConfig.Builder fromFile(File propertiesFile) throws IOException {
|
||||
try (FileReader reader = new FileReader(propertiesFile)) {
|
||||
this.archivalConfig.getProps().load(reader);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder fromProperties(Properties props) {
|
||||
this.archivalConfig.getProps().putAll(props);
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withAutoArchive(Boolean autoArchive) {
|
||||
archivalConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withAsyncArchive(Boolean asyncArchive) {
|
||||
archivalConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
|
||||
archivalConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
|
||||
archivalConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withArchiveMergeFilesBatchSize(int number) {
|
||||
archivalConfig.setValue(ARCHIVE_MERGE_FILES_BATCH_SIZE, String.valueOf(number));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withArchiveMergeSmallFileLimit(long size) {
|
||||
archivalConfig.setValue(ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES, String.valueOf(size));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withArchiveMergeEnable(boolean enable) {
|
||||
archivalConfig.setValue(ARCHIVE_MERGE_ENABLE, String.valueOf(enable));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
|
||||
archivalConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig.Builder withCommitsArchivalBatchSize(int batchSize) {
|
||||
archivalConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieArchivalConfig build() {
|
||||
archivalConfig.setDefaults(HoodieArchivalConfig.class.getName());
|
||||
return archivalConfig;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,297 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.config;
|
||||
|
||||
import org.apache.hudi.common.config.ConfigClassProperty;
|
||||
import org.apache.hudi.common.config.ConfigGroups;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
|
||||
|
||||
import javax.annotation.concurrent.Immutable;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Properties;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Clean related config.
|
||||
*/
|
||||
@Immutable
|
||||
@ConfigClassProperty(name = "Clean Configs",
|
||||
groupName = ConfigGroups.Names.WRITE_CLIENT,
|
||||
description = "Cleaning (reclamation of older/unused file groups/slices).")
|
||||
public class HoodieCleanConfig extends HoodieConfig {
|
||||
|
||||
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
|
||||
.key("hoodie.clean.automatic")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
|
||||
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
|
||||
+ " growth is bounded.");
|
||||
|
||||
public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
|
||||
.key("hoodie.clean.async")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("Only applies when " + AUTO_CLEAN.key() + " is turned on. "
|
||||
+ "When turned on runs cleaner async with writing, which can speed up overall write performance.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_COMMITS_RETAINED = ConfigProperty
|
||||
.key("hoodie.cleaner.commits.retained")
|
||||
.defaultValue("10")
|
||||
.withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
|
||||
+ "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
|
||||
.defaultValue("24")
|
||||
.withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
|
||||
+ "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group,"
|
||||
+ " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
|
||||
.key("hoodie.cleaner.policy")
|
||||
.defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
|
||||
.withDocumentation("Cleaning policy to be used. The cleaner service deletes older file slices files to re-claim space."
|
||||
+ " By default, cleaner spares the file slices written by the last N commits, determined by " + CLEANER_COMMITS_RETAINED.key()
|
||||
+ " Long running query plans may often refer to older file slices and will break if those are cleaned, before the query has had"
|
||||
+ " a chance to run. So, it is good to make sure that the data is retained for more than the maximum query execution time");
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
|
||||
.key("hoodie.clean.trigger.strategy")
|
||||
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
|
||||
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
|
||||
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
|
||||
.key("hoodie.clean.max.commits")
|
||||
.defaultValue("1")
|
||||
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
|
||||
.key("hoodie.cleaner.fileversions.retained")
|
||||
.defaultValue("3")
|
||||
.withDocumentation("When " + HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, "
|
||||
+ " the minimum number of file slices to retain in each file group, during cleaning.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_INCREMENTAL_MODE_ENABLE = ConfigProperty
|
||||
.key("hoodie.cleaner.incremental.mode")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the plans for each cleaner service run is computed incrementally off the events "
|
||||
+ " in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full"
|
||||
+ " table for each planning (even with a metadata table).");
|
||||
|
||||
public static final ConfigProperty<String> FAILED_WRITES_CLEANER_POLICY = ConfigProperty
|
||||
.key("hoodie.cleaner.policy.failed.writes")
|
||||
.defaultValue(HoodieFailedWritesCleaningPolicy.EAGER.name())
|
||||
.withDocumentation("Cleaning policy for failed writes to be used. Hudi will delete any files written by "
|
||||
+ "failed writes to re-claim space. Choose to perform this rollback of failed writes eagerly before "
|
||||
+ "every writer starts (only supported for single writer) or lazily by the cleaner (required for multi-writers)");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.cleaner.parallelism")
|
||||
.defaultValue("200")
|
||||
.withDocumentation("Parallelism for the cleaning operation. Increase this if cleaning becomes slow.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ALLOW_MULTIPLE_CLEANS = ConfigProperty
|
||||
.key("hoodie.clean.allow.multiple")
|
||||
.defaultValue(true)
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, "
|
||||
+ ".i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_BOOTSTRAP_BASE_FILE_ENABLE = ConfigProperty
|
||||
.key("hoodie.cleaner.delete.bootstrap.base.file")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("When set to true, cleaner also deletes the bootstrap base file when it's skeleton base file is "
|
||||
+ " cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the"
|
||||
+ " table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap "
|
||||
+ " base files are also physically deleted, to comply with data privacy enforcement processes.");
|
||||
|
||||
|
||||
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
|
||||
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String AUTO_CLEAN_PROP = AUTO_CLEAN.key();
|
||||
/** @deprecated Use {@link #ASYNC_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String ASYNC_CLEAN_PROP = ASYNC_CLEAN.key();
|
||||
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = CLEANER_FILE_VERSIONS_RETAINED.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_COMMITS_RETAINED_PROP = CLEANER_COMMITS_RETAINED.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_INCREMENTAL_MODE = CLEANER_INCREMENTAL_MODE_ENABLE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String DEFAULT_CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_POLICY = CLEANER_POLICY.defaultValue();
|
||||
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String FAILED_WRITES_CLEANER_POLICY_PROP = FAILED_WRITES_CLEANER_POLICY.key();
|
||||
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_FAILED_WRITES_CLEANER_POLICY = FAILED_WRITES_CLEANER_POLICY.defaultValue();
|
||||
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_AUTO_CLEAN = AUTO_CLEAN.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #ASYNC_CLEAN} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_ASYNC_CLEAN = ASYNC_CLEAN.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_INCREMENTAL_CLEANER = CLEANER_INCREMENTAL_MODE_ENABLE.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = CLEANER_FILE_VERSIONS_RETAINED.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = CLEANER_COMMITS_RETAINED.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.defaultValue();
|
||||
|
||||
private HoodieCleanConfig() {
|
||||
super();
|
||||
}
|
||||
|
||||
public static HoodieCleanConfig.Builder newBuilder() {
|
||||
return new HoodieCleanConfig.Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private final HoodieCleanConfig cleanConfig = new HoodieCleanConfig();
|
||||
|
||||
public HoodieCleanConfig.Builder fromFile(File propertiesFile) throws IOException {
|
||||
try (FileReader reader = new FileReader(propertiesFile)) {
|
||||
this.cleanConfig.getProps().load(reader);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder fromProperties(Properties props) {
|
||||
this.cleanConfig.getProps().putAll(props);
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withAutoClean(Boolean autoClean) {
|
||||
cleanConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withAsyncClean(Boolean asyncClean) {
|
||||
cleanConfig.setValue(ASYNC_CLEAN, String.valueOf(asyncClean));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
|
||||
cleanConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
|
||||
cleanConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
|
||||
cleanConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
|
||||
cleanConfig.setValue(CLEANER_POLICY, policy.name());
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder retainFileVersions(int fileVersionsRetained) {
|
||||
cleanConfig.setValue(CLEANER_FILE_VERSIONS_RETAINED, String.valueOf(fileVersionsRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder retainCommits(int commitsRetained) {
|
||||
cleanConfig.setValue(CLEANER_COMMITS_RETAINED, String.valueOf(commitsRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder cleanerNumHoursRetained(int cleanerHoursRetained) {
|
||||
cleanConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder allowMultipleCleans(boolean allowMultipleCleanSchedules) {
|
||||
cleanConfig.setValue(ALLOW_MULTIPLE_CLEANS, String.valueOf(allowMultipleCleanSchedules));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withCleanerParallelism(int cleanerParallelism) {
|
||||
cleanConfig.setValue(CLEANER_PARALLELISM_VALUE, String.valueOf(cleanerParallelism));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withCleanBootstrapBaseFileEnabled(Boolean cleanBootstrapSourceFileEnabled) {
|
||||
cleanConfig.setValue(CLEANER_BOOTSTRAP_BASE_FILE_ENABLE, String.valueOf(cleanBootstrapSourceFileEnabled));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig.Builder withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy failedWritesPolicy) {
|
||||
cleanConfig.setValue(FAILED_WRITES_CLEANER_POLICY, failedWritesPolicy.name());
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanConfig build() {
|
||||
cleanConfig.setDefaults(HoodieCleanConfig.class.getName());
|
||||
HoodieCleaningPolicy.valueOf(cleanConfig.getString(CLEANER_POLICY));
|
||||
return cleanConfig;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,11 +22,6 @@ import org.apache.hudi.common.config.ConfigClassProperty;
|
||||
import org.apache.hudi.common.config.ConfigGroups;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
import org.apache.hudi.common.model.HoodieCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
|
||||
import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
|
||||
@@ -47,57 +42,9 @@ import java.util.stream.Collectors;
|
||||
@ConfigClassProperty(name = "Compaction Configs",
|
||||
groupName = ConfigGroups.Names.WRITE_CLIENT,
|
||||
description = "Configurations that control compaction "
|
||||
+ "(merging of log files onto a new base files) as well as "
|
||||
+ "cleaning (reclamation of older/unused file groups/slices).")
|
||||
+ "(merging of log files onto a new base files).")
|
||||
public class HoodieCompactionConfig extends HoodieConfig {
|
||||
|
||||
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
|
||||
.key("hoodie.archive.automatic")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
|
||||
+ " to archive commits if we cross a maximum value of commits."
|
||||
+ " It's recommended to enable this, to ensure number of active commits is bounded.");
|
||||
|
||||
public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
|
||||
.key("hoodie.archive.async")
|
||||
.defaultValue("false")
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
|
||||
+ "When turned on runs archiver async with writing, which can speed up overall write performance.");
|
||||
|
||||
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
|
||||
.key("hoodie.clean.automatic")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
|
||||
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
|
||||
+ " growth is bounded.");
|
||||
|
||||
public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
|
||||
.key("hoodie.clean.async")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("Only applies when " + AUTO_CLEAN.key() + " is turned on. "
|
||||
+ "When turned on runs cleaner async with writing, which can speed up overall write performance.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_COMMITS_RETAINED = ConfigProperty
|
||||
.key("hoodie.cleaner.commits.retained")
|
||||
.defaultValue("10")
|
||||
.withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
|
||||
+ "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
|
||||
.defaultValue("24")
|
||||
.withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
|
||||
+ "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group,"
|
||||
+ " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
|
||||
.key("hoodie.cleaner.policy")
|
||||
.defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
|
||||
.withDocumentation("Cleaning policy to be used. The cleaner service deletes older file slices files to re-claim space."
|
||||
+ " By default, cleaner spares the file slices written by the last N commits, determined by " + CLEANER_COMMITS_RETAINED.key()
|
||||
+ " Long running query plans may often refer to older file slices and will break if those are cleaned, before the query has had"
|
||||
+ " a chance to run. So, it is good to make sure that the data is retained for more than the maximum query execution time");
|
||||
|
||||
public static final ConfigProperty<String> INLINE_COMPACT = ConfigProperty
|
||||
.key("hoodie.compact.inline")
|
||||
.defaultValue("false")
|
||||
@@ -130,62 +77,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
.withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
|
||||
+ "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
|
||||
.key("hoodie.clean.trigger.strategy")
|
||||
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
|
||||
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
|
||||
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
|
||||
|
||||
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
|
||||
.key("hoodie.clean.max.commits")
|
||||
.defaultValue("1")
|
||||
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
|
||||
.key("hoodie.cleaner.fileversions.retained")
|
||||
.defaultValue("3")
|
||||
.withDocumentation("When " + HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, "
|
||||
+ " the minimum number of file slices to retain in each file group, during cleaning.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_INCREMENTAL_MODE_ENABLE = ConfigProperty
|
||||
.key("hoodie.cleaner.incremental.mode")
|
||||
.defaultValue("true")
|
||||
.withDocumentation("When enabled, the plans for each cleaner service run is computed incrementally off the events "
|
||||
+ " in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full"
|
||||
+ " table for each planning (even with a metadata table).");
|
||||
|
||||
public static final ConfigProperty<String> MAX_COMMITS_TO_KEEP = ConfigProperty
|
||||
.key("hoodie.keep.max.commits")
|
||||
.defaultValue("30")
|
||||
.withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to "
|
||||
+ " keep the metadata overhead constant, even as the table size grows."
|
||||
+ "This config controls the maximum number of instants to retain in the active timeline. ");
|
||||
|
||||
public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.archive.delete.parallelism")
|
||||
.defaultValue(100)
|
||||
.withDocumentation("Parallelism for deleting archived hoodie commits.");
|
||||
|
||||
public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
|
||||
.key("hoodie.keep.min.commits")
|
||||
.defaultValue("20")
|
||||
.withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
|
||||
+ "instants to retain in the active timeline.");
|
||||
|
||||
public static final ConfigProperty<String> COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
|
||||
.key("hoodie.commits.archival.batch")
|
||||
.defaultValue(String.valueOf(10))
|
||||
.withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
|
||||
+ " archive log. This config controls such archival batch size.");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_BOOTSTRAP_BASE_FILE_ENABLE = ConfigProperty
|
||||
.key("hoodie.cleaner.delete.bootstrap.base.file")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("When set to true, cleaner also deletes the bootstrap base file when it's skeleton base file is "
|
||||
+ " cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the"
|
||||
+ " table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap "
|
||||
+ " base files are also physically deleted, to comply with data privacy enforcement processes.");
|
||||
|
||||
public static final ConfigProperty<String> PARQUET_SMALL_FILE_LIMIT = ConfigProperty
|
||||
.key("hoodie.parquet.small.file.limit")
|
||||
.defaultValue(String.valueOf(104857600))
|
||||
@@ -202,11 +93,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
+ " Hudi will search commits in the reverse order, until we find a commit that has totalBytesWritten "
|
||||
+ " larger than (PARQUET_SMALL_FILE_LIMIT_BYTES * this_threshold)");
|
||||
|
||||
public static final ConfigProperty<String> CLEANER_PARALLELISM_VALUE = ConfigProperty
|
||||
.key("hoodie.cleaner.parallelism")
|
||||
.defaultValue("200")
|
||||
.withDocumentation("Parallelism for the cleaning operation. Increase this if cleaning becomes slow.");
|
||||
|
||||
// 500GB of target IO per compaction (both read and write
|
||||
public static final ConfigProperty<String> TARGET_IO_PER_COMPACTION_IN_MB = ConfigProperty
|
||||
.key("hoodie.compaction.target.io")
|
||||
@@ -227,13 +113,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
+ "compaction during each compaction run. By default. Hudi picks the log file "
|
||||
+ "with most accumulated unmerged data");
|
||||
|
||||
public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
|
||||
.key("hoodie.compaction.payload.class")
|
||||
.defaultValue(OverwriteWithLatestAvroPayload.class.getName())
|
||||
.withDocumentation("This needs to be same as class used during insert/upserts. Just like writing, compaction also uses "
|
||||
+ "the record payload class to merge records in the log against each other, merge again with the base file and "
|
||||
+ "produce the final record to be written after compaction.");
|
||||
|
||||
public static final ConfigProperty<String> COMPACTION_LAZY_BLOCK_READ_ENABLE = ConfigProperty
|
||||
.key("hoodie.compaction.lazy.block.read")
|
||||
.defaultValue("true")
|
||||
@@ -247,13 +126,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
.withDocumentation("HoodieLogFormatReader reads a logfile in the forward direction starting from pos=0 to pos=file_length. "
|
||||
+ "If this config is set to true, the reader reads the logfile in reverse direction, from pos=file_length to pos=0");
|
||||
|
||||
public static final ConfigProperty<String> FAILED_WRITES_CLEANER_POLICY = ConfigProperty
|
||||
.key("hoodie.cleaner.policy.failed.writes")
|
||||
.defaultValue(HoodieFailedWritesCleaningPolicy.EAGER.name())
|
||||
.withDocumentation("Cleaning policy for failed writes to be used. Hudi will delete any files written by "
|
||||
+ "failed writes to re-claim space. Choose to perform this rollback of failed writes eagerly before "
|
||||
+ "every writer starts (only supported for single writer) or lazily by the cleaner (required for multi-writers)");
|
||||
|
||||
public static final ConfigProperty<String> TARGET_PARTITIONS_PER_DAYBASED_COMPACTION = ConfigProperty
|
||||
.key("hoodie.compaction.daybased.target.partitions")
|
||||
.defaultValue("10")
|
||||
@@ -290,39 +162,8 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
.withDocumentation("The average record size. If not explicitly specified, hudi will compute the "
|
||||
+ "record size estimate compute dynamically based on commit metadata. "
|
||||
+ " This is critical in computing the insert parallelism and bin-packing inserts into small files.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ALLOW_MULTIPLE_CLEANS = ConfigProperty
|
||||
.key("hoodie.clean.allow.multiple")
|
||||
.defaultValue(true)
|
||||
.sinceVersion("0.11.0")
|
||||
.withDocumentation("Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, "
|
||||
+ ".i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.");
|
||||
|
||||
public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
|
||||
.key("hoodie.archive.merge.files.batch.size")
|
||||
.defaultValue(10)
|
||||
.withDocumentation("The number of small archive files to be merged at once.");
|
||||
|
||||
public static final ConfigProperty<Long> ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES = ConfigProperty
|
||||
.key("hoodie.archive.merge.small.file.limit.bytes")
|
||||
.defaultValue(20L * 1024 * 1024)
|
||||
.withDocumentation("This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.");
|
||||
|
||||
public static final ConfigProperty<Boolean> ARCHIVE_MERGE_ENABLE = ConfigProperty
|
||||
.key("hoodie.archive.merge.enable")
|
||||
.defaultValue(false)
|
||||
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
|
||||
+ " useful when storage scheme doesn't support append operation.");
|
||||
|
||||
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
|
||||
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String AUTO_CLEAN_PROP = AUTO_CLEAN.key();
|
||||
/** @deprecated Use {@link #ASYNC_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String ASYNC_CLEAN_PROP = ASYNC_CLEAN.key();
|
||||
/** @deprecated Use {@link #INLINE_COMPACT} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String INLINE_COMPACT_PROP = INLINE_COMPACT.key();
|
||||
@@ -335,39 +176,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
/** @deprecated Use {@link #INLINE_COMPACT_TRIGGER_STRATEGY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String INLINE_COMPACT_TRIGGER_STRATEGY_PROP = INLINE_COMPACT_TRIGGER_STRATEGY.key();
|
||||
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = CLEANER_FILE_VERSIONS_RETAINED.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_COMMITS_RETAINED_PROP = CLEANER_COMMITS_RETAINED.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_INCREMENTAL_MODE = CLEANER_INCREMENTAL_MODE_ENABLE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
|
||||
/**
|
||||
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
|
||||
/**
|
||||
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #PARQUET_SMALL_FILE_LIMIT} and its methods instead
|
||||
*/
|
||||
@@ -418,16 +226,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = COPY_ON_WRITE_RECORD_SIZE_ESTIMATE.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.key();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static final String DEFAULT_CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #TARGET_IO_PER_COMPACTION_IN_MB} and its methods instead
|
||||
*/
|
||||
@@ -446,12 +244,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
/** @deprecated Use {@link #COMPACTION_STRATEGY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String DEFAULT_COMPACTION_STRATEGY = COMPACTION_STRATEGY.defaultValue();
|
||||
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String DEFAULT_PAYLOAD_CLASS = PAYLOAD_CLASS_NAME.defaultValue();
|
||||
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String PAYLOAD_CLASS_PROP = PAYLOAD_CLASS_NAME.key();
|
||||
/** @deprecated Use {@link #COMPACTION_LAZY_BLOCK_READ_ENABLE} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = COMPACTION_LAZY_BLOCK_READ_ENABLE.key();
|
||||
@@ -464,33 +256,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
/** @deprecated Use {@link #COMPACTION_REVERSE_LOG_READ_ENABLE} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED = COMPACTION_REVERSE_LOG_READ_ENABLE.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_POLICY = CLEANER_POLICY.defaultValue();
|
||||
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String FAILED_WRITES_CLEANER_POLICY_PROP = FAILED_WRITES_CLEANER_POLICY.key();
|
||||
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_FAILED_WRITES_CLEANER_POLICY = FAILED_WRITES_CLEANER_POLICY.defaultValue();
|
||||
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_AUTO_CLEAN = AUTO_CLEAN.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #ASYNC_CLEAN} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_ASYNC_CLEAN = ASYNC_CLEAN.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #INLINE_COMPACT} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_INLINE_COMPACT = INLINE_COMPACT.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_INCREMENTAL_CLEANER = CLEANER_INCREMENTAL_MODE_ENABLE.defaultValue();
|
||||
/** @deprecated Use {@link #INLINE_COMPACT_NUM_DELTA_COMMITS} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = INLINE_COMPACT_NUM_DELTA_COMMITS.defaultValue();
|
||||
@@ -500,30 +270,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
/** @deprecated Use {@link #INLINE_COMPACT_TRIGGER_STRATEGY} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_INLINE_COMPACT_TRIGGER_STRATEGY = INLINE_COMPACT_TRIGGER_STRATEGY.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = CLEANER_FILE_VERSIONS_RETAINED.defaultValue();
|
||||
/** @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = CLEANER_COMMITS_RETAINED.defaultValue();
|
||||
/** @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */
|
||||
@Deprecated
|
||||
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();
|
||||
/**
|
||||
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
|
||||
*/
|
||||
@Deprecated
|
||||
private static final String DEFAULT_CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.defaultValue();
|
||||
/** @deprecated Use {@link #TARGET_PARTITIONS_PER_DAYBASED_COMPACTION} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP = TARGET_PARTITIONS_PER_DAYBASED_COMPACTION.key();
|
||||
@@ -555,31 +301,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAutoArchive(Boolean autoArchive) {
|
||||
compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAsyncArchive(Boolean asyncArchive) {
|
||||
compactionConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAutoClean(Boolean autoClean) {
|
||||
compactionConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withAsyncClean(Boolean asyncClean) {
|
||||
compactionConfig.setValue(ASYNC_CLEAN, String.valueOf(asyncClean));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
|
||||
compactionConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withInlineCompaction(Boolean inlineCompaction) {
|
||||
compactionConfig.setValue(INLINE_COMPACT, String.valueOf(inlineCompaction));
|
||||
return this;
|
||||
@@ -595,57 +316,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
|
||||
compactionConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
|
||||
compactionConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
|
||||
compactionConfig.setValue(CLEANER_POLICY, policy.name());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder retainFileVersions(int fileVersionsRetained) {
|
||||
compactionConfig.setValue(CLEANER_FILE_VERSIONS_RETAINED, String.valueOf(fileVersionsRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder retainCommits(int commitsRetained) {
|
||||
compactionConfig.setValue(CLEANER_COMMITS_RETAINED, String.valueOf(commitsRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder cleanerNumHoursRetained(int cleanerHoursRetained) {
|
||||
compactionConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
|
||||
compactionConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
|
||||
compactionConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveMergeFilesBatchSize(int number) {
|
||||
compactionConfig.setValue(ARCHIVE_MERGE_FILES_BATCH_SIZE, String.valueOf(number));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveMergeSmallFileLimit(long size) {
|
||||
compactionConfig.setValue(ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES, String.valueOf(size));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveMergeEnable(boolean enable) {
|
||||
compactionConfig.setValue(ARCHIVE_MERGE_ENABLE, String.valueOf(enable));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder compactionSmallFileSize(long smallFileLimitBytes) {
|
||||
compactionConfig.setValue(PARQUET_SMALL_FILE_LIMIT, String.valueOf(smallFileLimitBytes));
|
||||
return this;
|
||||
@@ -671,26 +341,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder allowMultipleCleans(boolean allowMultipleCleanSchedules) {
|
||||
compactionConfig.setValue(ALLOW_MULTIPLE_CLEANS, String.valueOf(allowMultipleCleanSchedules));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleanerParallelism(int cleanerParallelism) {
|
||||
compactionConfig.setValue(CLEANER_PARALLELISM_VALUE, String.valueOf(cleanerParallelism));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
|
||||
compactionConfig.setValue(COMPACTION_STRATEGY, compactionStrategy.getClass().getName());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPayloadClass(String payloadClassName) {
|
||||
compactionConfig.setValue(PAYLOAD_CLASS_NAME, payloadClassName);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
|
||||
compactionConfig.setValue(TARGET_IO_PER_COMPACTION_IN_MB, String.valueOf(targetIOPerCompactionInMB));
|
||||
return this;
|
||||
@@ -701,11 +356,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
|
||||
compactionConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMaxDeltaSecondsBeforeCompaction(int maxDeltaSecondsBeforeCompaction) {
|
||||
compactionConfig.setValue(INLINE_COMPACT_TIME_DELTA_SECONDS, String.valueOf(maxDeltaSecondsBeforeCompaction));
|
||||
return this;
|
||||
@@ -736,49 +386,8 @@ public class HoodieCompactionConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCommitsArchivalBatchSize(int batchSize) {
|
||||
compactionConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleanBootstrapBaseFileEnabled(Boolean cleanBootstrapSourceFileEnabled) {
|
||||
compactionConfig.setValue(CLEANER_BOOTSTRAP_BASE_FILE_ENABLE, String.valueOf(cleanBootstrapSourceFileEnabled));
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy failedWritesPolicy) {
|
||||
compactionConfig.setValue(FAILED_WRITES_CLEANER_POLICY, failedWritesPolicy.name());
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCompactionConfig build() {
|
||||
compactionConfig.setDefaults(HoodieCompactionConfig.class.getName());
|
||||
// validation
|
||||
HoodieCleaningPolicy.valueOf(compactionConfig.getString(CLEANER_POLICY));
|
||||
|
||||
// Ensure minInstantsToKeep > cleanerCommitsRetained, otherwise we will archive some
|
||||
// commit instant on timeline, that still has not been cleaned. Could miss some data via incr pull
|
||||
int minInstantsToKeep = Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
|
||||
int maxInstantsToKeep = Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
|
||||
int cleanerCommitsRetained =
|
||||
Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED));
|
||||
ValidationUtils.checkArgument(maxInstantsToKeep > minInstantsToKeep,
|
||||
String.format(
|
||||
"Increase %s=%d to be greater than %s=%d.",
|
||||
HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), maxInstantsToKeep,
|
||||
HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep));
|
||||
ValidationUtils.checkArgument(minInstantsToKeep > cleanerCommitsRetained,
|
||||
String.format(
|
||||
"Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
|
||||
+ "missing data from few instants.",
|
||||
HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep,
|
||||
HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), cleanerCommitsRetained));
|
||||
|
||||
boolean inlineCompact = compactionConfig.getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
|
||||
boolean inlineCompactSchedule = compactionConfig.getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
|
||||
ValidationUtils.checkArgument(!(inlineCompact && inlineCompactSchedule), String.format("Either of inline compaction (%s) or "
|
||||
+ "schedule inline compaction (%s) can be enabled. Both can't be set to true at the same time. %s, %s", HoodieCompactionConfig.INLINE_COMPACT.key(),
|
||||
HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), inlineCompact, inlineCompactSchedule));
|
||||
return compactionConfig;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.hudi.common.config.ConfigClassProperty;
|
||||
import org.apache.hudi.common.config.ConfigGroups;
|
||||
import org.apache.hudi.common.config.ConfigProperty;
|
||||
import org.apache.hudi.common.config.HoodieConfig;
|
||||
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
@@ -52,6 +53,20 @@ public class HoodiePayloadConfig extends HoodieConfig {
|
||||
.withDocumentation("Table column/field name to derive timestamp associated with the records. This can"
|
||||
+ "be useful for e.g, determining the freshness of the table.");
|
||||
|
||||
public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
|
||||
.key("hoodie.compaction.payload.class")
|
||||
.defaultValue(OverwriteWithLatestAvroPayload.class.getName())
|
||||
.withDocumentation("This needs to be same as class used during insert/upserts. Just like writing, compaction also uses "
|
||||
+ "the record payload class to merge records in the log against each other, merge again with the base file and "
|
||||
+ "produce the final record to be written after compaction.");
|
||||
|
||||
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String DEFAULT_PAYLOAD_CLASS = PAYLOAD_CLASS_NAME.defaultValue();
|
||||
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
|
||||
@Deprecated
|
||||
public static final String PAYLOAD_CLASS_PROP = PAYLOAD_CLASS_NAME.key();
|
||||
|
||||
private HoodiePayloadConfig() {
|
||||
super();
|
||||
}
|
||||
@@ -86,6 +101,11 @@ public class HoodiePayloadConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodiePayloadConfig.Builder withPayloadClass(String payloadClassName) {
|
||||
payloadConfig.setValue(PAYLOAD_CLASS_NAME, payloadClassName);
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodiePayloadConfig build() {
|
||||
payloadConfig.setDefaults(HoodiePayloadConfig.class.getName());
|
||||
return payloadConfig;
|
||||
|
||||
@@ -119,16 +119,16 @@ public class HoodieStorageConfig extends HoodieConfig {
|
||||
.withDocumentation("Whether to use dictionary encoding");
|
||||
|
||||
public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
|
||||
.key("hoodie.parquet.writelegacyformat.enabled")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
|
||||
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
|
||||
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
|
||||
.key("hoodie.parquet.writelegacyformat.enabled")
|
||||
.defaultValue("false")
|
||||
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
|
||||
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
|
||||
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
|
||||
|
||||
public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
|
||||
.key("hoodie.parquet.outputtimestamptype")
|
||||
.defaultValue("TIMESTAMP_MICROS")
|
||||
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
|
||||
.key("hoodie.parquet.outputtimestamptype")
|
||||
.defaultValue("TIMESTAMP_MICROS")
|
||||
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
|
||||
|
||||
public static final ConfigProperty<String> HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty
|
||||
.key("hoodie.hfile.compression.algorithm")
|
||||
|
||||
@@ -89,6 +89,8 @@ import java.util.Properties;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.apache.hudi.config.HoodieCleanConfig.CLEANER_POLICY;
|
||||
|
||||
/**
|
||||
* Class storing configs for the HoodieWriteClient.
|
||||
*/
|
||||
@@ -1148,31 +1150,31 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
* compaction properties.
|
||||
*/
|
||||
public HoodieCleaningPolicy getCleanerPolicy() {
|
||||
return HoodieCleaningPolicy.valueOf(getString(HoodieCompactionConfig.CLEANER_POLICY));
|
||||
return HoodieCleaningPolicy.valueOf(getString(CLEANER_POLICY));
|
||||
}
|
||||
|
||||
public int getCleanerFileVersionsRetained() {
|
||||
return getInt(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED);
|
||||
return getInt(HoodieCleanConfig.CLEANER_FILE_VERSIONS_RETAINED);
|
||||
}
|
||||
|
||||
public int getCleanerCommitsRetained() {
|
||||
return getInt(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED);
|
||||
return getInt(HoodieCleanConfig.CLEANER_COMMITS_RETAINED);
|
||||
}
|
||||
|
||||
public int getCleanerHoursRetained() {
|
||||
return getInt(HoodieCompactionConfig.CLEANER_HOURS_RETAINED);
|
||||
return getInt(HoodieCleanConfig.CLEANER_HOURS_RETAINED);
|
||||
}
|
||||
|
||||
public int getMaxCommitsToKeep() {
|
||||
return getInt(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP);
|
||||
return getInt(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP);
|
||||
}
|
||||
|
||||
public int getMinCommitsToKeep() {
|
||||
return getInt(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP);
|
||||
return getInt(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP);
|
||||
}
|
||||
|
||||
public int getArchiveMergeFilesBatchSize() {
|
||||
return getInt(HoodieCompactionConfig.ARCHIVE_MERGE_FILES_BATCH_SIZE);
|
||||
return getInt(HoodieArchivalConfig.ARCHIVE_MERGE_FILES_BATCH_SIZE);
|
||||
}
|
||||
|
||||
public int getParquetSmallFileLimit() {
|
||||
@@ -1192,7 +1194,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public boolean allowMultipleCleans() {
|
||||
return getBoolean(HoodieCompactionConfig.ALLOW_MULTIPLE_CLEANS);
|
||||
return getBoolean(HoodieCleanConfig.ALLOW_MULTIPLE_CLEANS);
|
||||
}
|
||||
|
||||
public boolean shouldAutoTuneInsertSplits() {
|
||||
@@ -1200,43 +1202,43 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public int getCleanerParallelism() {
|
||||
return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
|
||||
return getInt(HoodieCleanConfig.CLEANER_PARALLELISM_VALUE);
|
||||
}
|
||||
|
||||
public int getCleaningMaxCommits() {
|
||||
return getInt(HoodieCompactionConfig.CLEAN_MAX_COMMITS);
|
||||
return getInt(HoodieCleanConfig.CLEAN_MAX_COMMITS);
|
||||
}
|
||||
|
||||
public CleaningTriggerStrategy getCleaningTriggerStrategy() {
|
||||
return CleaningTriggerStrategy.valueOf(getString(HoodieCompactionConfig.CLEAN_TRIGGER_STRATEGY));
|
||||
return CleaningTriggerStrategy.valueOf(getString(HoodieCleanConfig.CLEAN_TRIGGER_STRATEGY));
|
||||
}
|
||||
|
||||
public boolean isAutoClean() {
|
||||
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
|
||||
return getBoolean(HoodieCleanConfig.AUTO_CLEAN);
|
||||
}
|
||||
|
||||
public boolean getArchiveMergeEnable() {
|
||||
return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
|
||||
return getBoolean(HoodieArchivalConfig.ARCHIVE_MERGE_ENABLE);
|
||||
}
|
||||
|
||||
public long getArchiveMergeSmallFileLimitBytes() {
|
||||
return getLong(HoodieCompactionConfig.ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES);
|
||||
return getLong(HoodieArchivalConfig.ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES);
|
||||
}
|
||||
|
||||
public boolean isAutoArchive() {
|
||||
return getBoolean(HoodieCompactionConfig.AUTO_ARCHIVE);
|
||||
return getBoolean(HoodieArchivalConfig.AUTO_ARCHIVE);
|
||||
}
|
||||
|
||||
public boolean isAsyncArchive() {
|
||||
return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
|
||||
return getBoolean(HoodieArchivalConfig.ASYNC_ARCHIVE);
|
||||
}
|
||||
|
||||
public boolean isAsyncClean() {
|
||||
return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
|
||||
return getBoolean(HoodieCleanConfig.ASYNC_CLEAN);
|
||||
}
|
||||
|
||||
public boolean incrementalCleanerModeEnabled() {
|
||||
return getBoolean(HoodieCompactionConfig.CLEANER_INCREMENTAL_MODE_ENABLE);
|
||||
return getBoolean(HoodieCleanConfig.CLEANER_INCREMENTAL_MODE_ENABLE);
|
||||
}
|
||||
|
||||
public boolean inlineCompactionEnabled() {
|
||||
@@ -1280,7 +1282,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public int getArchiveDeleteParallelism() {
|
||||
return getInt(HoodieCompactionConfig.DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE);
|
||||
return getInt(HoodieArchivalConfig.DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE);
|
||||
}
|
||||
|
||||
public boolean inlineClusteringEnabled() {
|
||||
@@ -1321,7 +1323,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public String getPayloadClass() {
|
||||
return getString(HoodieCompactionConfig.PAYLOAD_CLASS_NAME);
|
||||
return getString(HoodiePayloadConfig.PAYLOAD_CLASS_NAME);
|
||||
}
|
||||
|
||||
public int getTargetPartitionsPerDayBasedCompaction() {
|
||||
@@ -1329,11 +1331,11 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
}
|
||||
|
||||
public int getCommitArchivalBatchSize() {
|
||||
return getInt(HoodieCompactionConfig.COMMITS_ARCHIVAL_BATCH_SIZE);
|
||||
return getInt(HoodieArchivalConfig.COMMITS_ARCHIVAL_BATCH_SIZE);
|
||||
}
|
||||
|
||||
public Boolean shouldCleanBootstrapBaseFile() {
|
||||
return getBoolean(HoodieCompactionConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLE);
|
||||
return getBoolean(HoodieCleanConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLE);
|
||||
}
|
||||
|
||||
public String getClusteringUpdatesStrategyClass() {
|
||||
@@ -1342,7 +1344,7 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
|
||||
public HoodieFailedWritesCleaningPolicy getFailedWritesCleanPolicy() {
|
||||
return HoodieFailedWritesCleaningPolicy
|
||||
.valueOf(getString(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY));
|
||||
.valueOf(getString(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2117,6 +2119,8 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
private boolean isIndexConfigSet = false;
|
||||
private boolean isStorageConfigSet = false;
|
||||
private boolean isCompactionConfigSet = false;
|
||||
private boolean isCleanConfigSet = false;
|
||||
private boolean isArchivalConfigSet = false;
|
||||
private boolean isClusteringConfigSet = false;
|
||||
private boolean isOptimizeConfigSet = false;
|
||||
private boolean isMetricsConfigSet = false;
|
||||
@@ -2284,6 +2288,18 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCleanConfig(HoodieCleanConfig cleanConfig) {
|
||||
writeConfig.getProps().putAll(cleanConfig.getProps());
|
||||
isCleanConfigSet = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withArchivalConfig(HoodieArchivalConfig cleanConfig) {
|
||||
writeConfig.getProps().putAll(cleanConfig.getProps());
|
||||
isArchivalConfigSet = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withClusteringConfig(HoodieClusteringConfig clusteringConfig) {
|
||||
writeConfig.getProps().putAll(clusteringConfig.getProps());
|
||||
isClusteringConfigSet = true;
|
||||
@@ -2517,6 +2533,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
writeConfig.getProps()).build());
|
||||
writeConfig.setDefaultOnCondition(!isCompactionConfigSet,
|
||||
HoodieCompactionConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
|
||||
writeConfig.setDefaultOnCondition(!isCleanConfigSet,
|
||||
HoodieCleanConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
|
||||
writeConfig.setDefaultOnCondition(!isArchivalConfigSet,
|
||||
HoodieArchivalConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
|
||||
writeConfig.setDefaultOnCondition(!isClusteringConfigSet,
|
||||
HoodieClusteringConfig.newBuilder().withEngineType(engineType)
|
||||
.fromProperties(writeConfig.getProps()).build());
|
||||
@@ -2587,10 +2607,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
if (WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value()
|
||||
.equalsIgnoreCase(writeConcurrencyMode)) {
|
||||
// In this case, we assume that the user takes care of setting the lock provider used
|
||||
writeConfig.setValue(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(),
|
||||
writeConfig.setValue(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(),
|
||||
HoodieFailedWritesCleaningPolicy.LAZY.name());
|
||||
LOG.info(String.format("Automatically set %s=%s since optimistic concurrency control is used",
|
||||
HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(),
|
||||
HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(),
|
||||
HoodieFailedWritesCleaningPolicy.LAZY.name()));
|
||||
}
|
||||
}
|
||||
@@ -2602,9 +2622,34 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
Objects.requireNonNull(writeConfig.getString(BASE_PATH));
|
||||
if (writeConfig.getString(WRITE_CONCURRENCY_MODE)
|
||||
.equalsIgnoreCase(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value())) {
|
||||
ValidationUtils.checkArgument(!writeConfig.getString(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY)
|
||||
ValidationUtils.checkArgument(!writeConfig.getString(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY)
|
||||
.equals(HoodieFailedWritesCleaningPolicy.EAGER.name()), "To enable optimistic concurrency control, set hoodie.cleaner.policy.failed.writes=LAZY");
|
||||
}
|
||||
|
||||
HoodieCleaningPolicy.valueOf(writeConfig.getString(CLEANER_POLICY));
|
||||
// Ensure minInstantsToKeep > cleanerCommitsRetained, otherwise we will archive some
|
||||
// commit instant on timeline, that still has not been cleaned. Could miss some data via incr pull
|
||||
int minInstantsToKeep = Integer.parseInt(writeConfig.getStringOrDefault(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP));
|
||||
int maxInstantsToKeep = Integer.parseInt(writeConfig.getStringOrDefault(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP));
|
||||
int cleanerCommitsRetained =
|
||||
Integer.parseInt(writeConfig.getStringOrDefault(HoodieCleanConfig.CLEANER_COMMITS_RETAINED));
|
||||
ValidationUtils.checkArgument(maxInstantsToKeep > minInstantsToKeep,
|
||||
String.format(
|
||||
"Increase %s=%d to be greater than %s=%d.",
|
||||
HoodieArchivalConfig.MAX_COMMITS_TO_KEEP.key(), maxInstantsToKeep,
|
||||
HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep));
|
||||
ValidationUtils.checkArgument(minInstantsToKeep > cleanerCommitsRetained,
|
||||
String.format(
|
||||
"Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
|
||||
+ "missing data from few instants.",
|
||||
HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep,
|
||||
HoodieCleanConfig.CLEANER_COMMITS_RETAINED.key(), cleanerCommitsRetained));
|
||||
|
||||
boolean inlineCompact = writeConfig.getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
|
||||
boolean inlineCompactSchedule = writeConfig.getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
|
||||
ValidationUtils.checkArgument(!(inlineCompact && inlineCompactSchedule), String.format("Either of inline compaction (%s) or "
|
||||
+ "schedule inline compaction (%s) can be enabled. Both can't be set to true at the same time. %s, %s", HoodieCompactionConfig.INLINE_COMPACT.key(),
|
||||
HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), inlineCompact, inlineCompactSchedule));
|
||||
}
|
||||
|
||||
public HoodieWriteConfig build() {
|
||||
|
||||
@@ -58,6 +58,8 @@ import org.apache.hudi.common.util.HoodieTimer;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.config.HoodieArchivalConfig;
|
||||
import org.apache.hudi.config.HoodieCleanConfig;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
|
||||
@@ -255,20 +257,24 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
|
||||
.withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath()))
|
||||
.withSchema(HoodieMetadataRecord.getClassSchema().toString())
|
||||
.forTable(tableName)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
// we will trigger cleaning manually, to control the instant times
|
||||
.withCleanConfig(HoodieCleanConfig.newBuilder()
|
||||
.withAsyncClean(writeConfig.isMetadataAsyncClean())
|
||||
// we will trigger cleaning manually, to control the instant times
|
||||
.withAutoClean(false)
|
||||
.withCleanerParallelism(parallelism)
|
||||
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
|
||||
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
|
||||
.retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
|
||||
.build())
|
||||
// we will trigger archive manually, to ensure only regular writer invokes it
|
||||
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
|
||||
.archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
|
||||
// we will trigger compaction manually, to control the instant times
|
||||
.withAutoArchive(false)
|
||||
.build())
|
||||
// we will trigger compaction manually, to control the instant times
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withInlineCompaction(false)
|
||||
.withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax())
|
||||
// we will trigger archive manually, to ensure only regular writer invokes it
|
||||
.withAutoArchive(false)
|
||||
// by default, the HFile does not keep the metadata fields, set up as false
|
||||
// to always use the metadata of the new record.
|
||||
.withPreserveCommitMetadata(false)
|
||||
|
||||
@@ -26,7 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieCleanConfig;
|
||||
import org.apache.hudi.config.HoodieLockConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieLockException;
|
||||
@@ -57,9 +57,9 @@ public class TestTransactionManager extends HoodieCommonTestHarness {
|
||||
private HoodieWriteConfig getWriteConfig() {
|
||||
return HoodieWriteConfig.newBuilder()
|
||||
.withPath(basePath)
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
|
||||
.build())
|
||||
.withCleanConfig(HoodieCleanConfig.newBuilder()
|
||||
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
|
||||
.build())
|
||||
.withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
|
||||
.withLockConfig(HoodieLockConfig.newBuilder()
|
||||
.withLockProvider(InProcessLockProvider.class)
|
||||
|
||||
@@ -47,10 +47,10 @@ import java.util.Properties;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.ASYNC_ARCHIVE;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.ASYNC_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.AUTO_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY;
|
||||
import static org.apache.hudi.config.HoodieArchivalConfig.ASYNC_ARCHIVE;
|
||||
import static org.apache.hudi.config.HoodieCleanConfig.ASYNC_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCleanConfig.AUTO_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.INLINE_COMPACT;
|
||||
import static org.apache.hudi.config.HoodieWriteConfig.TABLE_SERVICES_ENABLED;
|
||||
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
|
||||
@@ -64,9 +64,9 @@ public class TestHoodieWriteConfig {
|
||||
public void testPropertyLoading(boolean withAlternative) throws IOException {
|
||||
Builder builder = HoodieWriteConfig.newBuilder().withPath("/tmp");
|
||||
Map<String, String> params = new HashMap<>(3);
|
||||
params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1");
|
||||
params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "5");
|
||||
params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2");
|
||||
params.put(HoodieCleanConfig.CLEANER_COMMITS_RETAINED.key(), "1");
|
||||
params.put(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP.key(), "5");
|
||||
params.put(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), "2");
|
||||
if (withAlternative) {
|
||||
params.put("hoodie.avro.schema.externalTransformation", "true");
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user