1
0

[HUDI-2150] Rename/Restructure configs for better modularity (#6061)

- Move clean related configuration to HoodieCleanConfig
- Move Archival related configuration to HoodieArchivalConfig
- hoodie.compaction.payload.class move this to HoodiePayloadConfig
This commit is contained in:
liujinhui
2022-07-09 22:30:48 +08:00
committed by GitHub
parent 6566fc6625
commit 126b88b48d
38 changed files with 920 additions and 672 deletions

View File

@@ -0,0 +1,194 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import javax.annotation.concurrent.Immutable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Properties;
/**
* Archival related config.
*/
@Immutable
@ConfigClassProperty(name = "Archival Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control archival.")
public class HoodieArchivalConfig extends HoodieConfig {
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
.key("hoodie.archive.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
+ " to archive commits if we cross a maximum value of commits."
+ " It's recommended to enable this, to ensure number of active commits is bounded.");
public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
.key("hoodie.archive.async")
.defaultValue("false")
.sinceVersion("0.11.0")
.withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
+ "When turned on runs archiver async with writing, which can speed up overall write performance.");
public static final ConfigProperty<String> MAX_COMMITS_TO_KEEP = ConfigProperty
.key("hoodie.keep.max.commits")
.defaultValue("30")
.withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to "
+ " keep the metadata overhead constant, even as the table size grows."
+ "This config controls the maximum number of instants to retain in the active timeline. ");
public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
.key("hoodie.archive.delete.parallelism")
.defaultValue(100)
.withDocumentation("Parallelism for deleting archived hoodie commits.");
public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
.key("hoodie.keep.min.commits")
.defaultValue("20")
.withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
+ "instants to retain in the active timeline.");
public static final ConfigProperty<String> COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
.key("hoodie.commits.archival.batch")
.defaultValue(String.valueOf(10))
.withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
+ " archive log. This config controls such archival batch size.");
public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
.key("hoodie.archive.merge.files.batch.size")
.defaultValue(10)
.withDocumentation("The number of small archive files to be merged at once.");
public static final ConfigProperty<Long> ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES = ConfigProperty
.key("hoodie.archive.merge.small.file.limit.bytes")
.defaultValue(20L * 1024 * 1024)
.withDocumentation("This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.");
public static final ConfigProperty<Boolean> ARCHIVE_MERGE_ENABLE = ConfigProperty
.key("hoodie.archive.merge.enable")
.defaultValue(false)
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
+ " useful when storage scheme doesn't support append operation.");
/**
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
/**
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
/**
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
*/
@Deprecated
public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
/** @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */
@Deprecated
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
/**
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
/**
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();
private HoodieArchivalConfig() {
super();
}
public static HoodieArchivalConfig.Builder newBuilder() {
return new HoodieArchivalConfig.Builder();
}
public static class Builder {
private final HoodieArchivalConfig archivalConfig = new HoodieArchivalConfig();
public HoodieArchivalConfig.Builder fromFile(File propertiesFile) throws IOException {
try (FileReader reader = new FileReader(propertiesFile)) {
this.archivalConfig.getProps().load(reader);
return this;
}
}
public HoodieArchivalConfig.Builder fromProperties(Properties props) {
this.archivalConfig.getProps().putAll(props);
return this;
}
public HoodieArchivalConfig.Builder withAutoArchive(Boolean autoArchive) {
archivalConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
return this;
}
public HoodieArchivalConfig.Builder withAsyncArchive(Boolean asyncArchive) {
archivalConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
return this;
}
public HoodieArchivalConfig.Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
archivalConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
archivalConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
return this;
}
public HoodieArchivalConfig.Builder withArchiveMergeFilesBatchSize(int number) {
archivalConfig.setValue(ARCHIVE_MERGE_FILES_BATCH_SIZE, String.valueOf(number));
return this;
}
public HoodieArchivalConfig.Builder withArchiveMergeSmallFileLimit(long size) {
archivalConfig.setValue(ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES, String.valueOf(size));
return this;
}
public HoodieArchivalConfig.Builder withArchiveMergeEnable(boolean enable) {
archivalConfig.setValue(ARCHIVE_MERGE_ENABLE, String.valueOf(enable));
return this;
}
public HoodieArchivalConfig.Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
archivalConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
return this;
}
public HoodieArchivalConfig.Builder withCommitsArchivalBatchSize(int batchSize) {
archivalConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
return this;
}
public HoodieArchivalConfig build() {
archivalConfig.setDefaults(HoodieArchivalConfig.class.getName());
return archivalConfig;
}
}
}

View File

@@ -0,0 +1,297 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.config;
import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
import javax.annotation.concurrent.Immutable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Properties;
import java.util.stream.Collectors;
/**
* Clean related config.
*/
@Immutable
@ConfigClassProperty(name = "Clean Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Cleaning (reclamation of older/unused file groups/slices).")
public class HoodieCleanConfig extends HoodieConfig {
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
.key("hoodie.clean.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
+ " growth is bounded.");
public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
.key("hoodie.clean.async")
.defaultValue("false")
.withDocumentation("Only applies when " + AUTO_CLEAN.key() + " is turned on. "
+ "When turned on runs cleaner async with writing, which can speed up overall write performance.");
public static final ConfigProperty<String> CLEANER_COMMITS_RETAINED = ConfigProperty
.key("hoodie.cleaner.commits.retained")
.defaultValue("10")
.withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
+ "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");
public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
.defaultValue("24")
.withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
+ "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group,"
+ " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.");
public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
.key("hoodie.cleaner.policy")
.defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
.withDocumentation("Cleaning policy to be used. The cleaner service deletes older file slices files to re-claim space."
+ " By default, cleaner spares the file slices written by the last N commits, determined by " + CLEANER_COMMITS_RETAINED.key()
+ " Long running query plans may often refer to older file slices and will break if those are cleaned, before the query has had"
+ " a chance to run. So, it is good to make sure that the data is retained for more than the maximum query execution time");
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
.key("hoodie.clean.trigger.strategy")
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
.key("hoodie.clean.max.commits")
.defaultValue("1")
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
.key("hoodie.cleaner.fileversions.retained")
.defaultValue("3")
.withDocumentation("When " + HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, "
+ " the minimum number of file slices to retain in each file group, during cleaning.");
public static final ConfigProperty<String> CLEANER_INCREMENTAL_MODE_ENABLE = ConfigProperty
.key("hoodie.cleaner.incremental.mode")
.defaultValue("true")
.withDocumentation("When enabled, the plans for each cleaner service run is computed incrementally off the events "
+ " in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full"
+ " table for each planning (even with a metadata table).");
public static final ConfigProperty<String> FAILED_WRITES_CLEANER_POLICY = ConfigProperty
.key("hoodie.cleaner.policy.failed.writes")
.defaultValue(HoodieFailedWritesCleaningPolicy.EAGER.name())
.withDocumentation("Cleaning policy for failed writes to be used. Hudi will delete any files written by "
+ "failed writes to re-claim space. Choose to perform this rollback of failed writes eagerly before "
+ "every writer starts (only supported for single writer) or lazily by the cleaner (required for multi-writers)");
public static final ConfigProperty<String> CLEANER_PARALLELISM_VALUE = ConfigProperty
.key("hoodie.cleaner.parallelism")
.defaultValue("200")
.withDocumentation("Parallelism for the cleaning operation. Increase this if cleaning becomes slow.");
public static final ConfigProperty<Boolean> ALLOW_MULTIPLE_CLEANS = ConfigProperty
.key("hoodie.clean.allow.multiple")
.defaultValue(true)
.sinceVersion("0.11.0")
.withDocumentation("Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, "
+ ".i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.");
public static final ConfigProperty<String> CLEANER_BOOTSTRAP_BASE_FILE_ENABLE = ConfigProperty
.key("hoodie.cleaner.delete.bootstrap.base.file")
.defaultValue("false")
.withDocumentation("When set to true, cleaner also deletes the bootstrap base file when it's skeleton base file is "
+ " cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the"
+ " table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap "
+ " base files are also physically deleted, to comply with data privacy enforcement processes.");
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
@Deprecated
public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
@Deprecated
public static final String AUTO_CLEAN_PROP = AUTO_CLEAN.key();
/** @deprecated Use {@link #ASYNC_CLEAN} and its methods instead */
@Deprecated
public static final String ASYNC_CLEAN_PROP = ASYNC_CLEAN.key();
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
@Deprecated
public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = CLEANER_FILE_VERSIONS_RETAINED.key();
/**
* @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead
*/
@Deprecated
public static final String CLEANER_COMMITS_RETAINED_PROP = CLEANER_COMMITS_RETAINED.key();
/**
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
*/
@Deprecated
public static final String CLEANER_INCREMENTAL_MODE = CLEANER_INCREMENTAL_MODE_ENABLE.key();
/**
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
*/
@Deprecated
public static final String CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.key();
/**
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
*/
@Deprecated
public static final String CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.key();
/**
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.defaultValue();
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_POLICY = CLEANER_POLICY.defaultValue();
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
@Deprecated
public static final String FAILED_WRITES_CLEANER_POLICY_PROP = FAILED_WRITES_CLEANER_POLICY.key();
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
@Deprecated
private static final String DEFAULT_FAILED_WRITES_CLEANER_POLICY = FAILED_WRITES_CLEANER_POLICY.defaultValue();
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
@Deprecated
private static final String DEFAULT_AUTO_CLEAN = AUTO_CLEAN.defaultValue();
/**
* @deprecated Use {@link #ASYNC_CLEAN} and its methods instead
*/
@Deprecated
private static final String DEFAULT_ASYNC_CLEAN = ASYNC_CLEAN.defaultValue();
/**
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_INCREMENTAL_CLEANER = CLEANER_INCREMENTAL_MODE_ENABLE.defaultValue();
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = CLEANER_FILE_VERSIONS_RETAINED.defaultValue();
/** @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = CLEANER_COMMITS_RETAINED.defaultValue();
/**
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.defaultValue();
private HoodieCleanConfig() {
super();
}
public static HoodieCleanConfig.Builder newBuilder() {
return new HoodieCleanConfig.Builder();
}
public static class Builder {
private final HoodieCleanConfig cleanConfig = new HoodieCleanConfig();
public HoodieCleanConfig.Builder fromFile(File propertiesFile) throws IOException {
try (FileReader reader = new FileReader(propertiesFile)) {
this.cleanConfig.getProps().load(reader);
return this;
}
}
public HoodieCleanConfig.Builder fromProperties(Properties props) {
this.cleanConfig.getProps().putAll(props);
return this;
}
public HoodieCleanConfig.Builder withAutoClean(Boolean autoClean) {
cleanConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
return this;
}
public HoodieCleanConfig.Builder withAsyncClean(Boolean asyncClean) {
cleanConfig.setValue(ASYNC_CLEAN, String.valueOf(asyncClean));
return this;
}
public HoodieCleanConfig.Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
cleanConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
return this;
}
public HoodieCleanConfig.Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
cleanConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
return this;
}
public HoodieCleanConfig.Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
cleanConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
return this;
}
public HoodieCleanConfig.Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
cleanConfig.setValue(CLEANER_POLICY, policy.name());
return this;
}
public HoodieCleanConfig.Builder retainFileVersions(int fileVersionsRetained) {
cleanConfig.setValue(CLEANER_FILE_VERSIONS_RETAINED, String.valueOf(fileVersionsRetained));
return this;
}
public HoodieCleanConfig.Builder retainCommits(int commitsRetained) {
cleanConfig.setValue(CLEANER_COMMITS_RETAINED, String.valueOf(commitsRetained));
return this;
}
public HoodieCleanConfig.Builder cleanerNumHoursRetained(int cleanerHoursRetained) {
cleanConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
return this;
}
public HoodieCleanConfig.Builder allowMultipleCleans(boolean allowMultipleCleanSchedules) {
cleanConfig.setValue(ALLOW_MULTIPLE_CLEANS, String.valueOf(allowMultipleCleanSchedules));
return this;
}
public HoodieCleanConfig.Builder withCleanerParallelism(int cleanerParallelism) {
cleanConfig.setValue(CLEANER_PARALLELISM_VALUE, String.valueOf(cleanerParallelism));
return this;
}
public HoodieCleanConfig.Builder withCleanBootstrapBaseFileEnabled(Boolean cleanBootstrapSourceFileEnabled) {
cleanConfig.setValue(CLEANER_BOOTSTRAP_BASE_FILE_ENABLE, String.valueOf(cleanBootstrapSourceFileEnabled));
return this;
}
public HoodieCleanConfig.Builder withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy failedWritesPolicy) {
cleanConfig.setValue(FAILED_WRITES_CLEANER_POLICY, failedWritesPolicy.name());
return this;
}
public HoodieCleanConfig build() {
cleanConfig.setDefaults(HoodieCleanConfig.class.getName());
HoodieCleaningPolicy.valueOf(cleanConfig.getString(CLEANER_POLICY));
return cleanConfig;
}
}
}

View File

@@ -22,11 +22,6 @@ import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.HoodieCleaningPolicy;
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.table.action.clean.CleaningTriggerStrategy;
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
@@ -47,57 +42,9 @@ import java.util.stream.Collectors;
@ConfigClassProperty(name = "Compaction Configs",
groupName = ConfigGroups.Names.WRITE_CLIENT,
description = "Configurations that control compaction "
+ "(merging of log files onto a new base files) as well as "
+ "cleaning (reclamation of older/unused file groups/slices).")
+ "(merging of log files onto a new base files).")
public class HoodieCompactionConfig extends HoodieConfig {
public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
.key("hoodie.archive.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the archival table service is invoked immediately after each commit,"
+ " to archive commits if we cross a maximum value of commits."
+ " It's recommended to enable this, to ensure number of active commits is bounded.");
public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
.key("hoodie.archive.async")
.defaultValue("false")
.sinceVersion("0.11.0")
.withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
+ "When turned on runs archiver async with writing, which can speed up overall write performance.");
public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
.key("hoodie.clean.automatic")
.defaultValue("true")
.withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
+ " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
+ " growth is bounded.");
public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
.key("hoodie.clean.async")
.defaultValue("false")
.withDocumentation("Only applies when " + AUTO_CLEAN.key() + " is turned on. "
+ "When turned on runs cleaner async with writing, which can speed up overall write performance.");
public static final ConfigProperty<String> CLEANER_COMMITS_RETAINED = ConfigProperty
.key("hoodie.cleaner.commits.retained")
.defaultValue("10")
.withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
+ "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");
public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
.defaultValue("24")
.withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
+ "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group,"
+ " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.");
public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
.key("hoodie.cleaner.policy")
.defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
.withDocumentation("Cleaning policy to be used. The cleaner service deletes older file slices files to re-claim space."
+ " By default, cleaner spares the file slices written by the last N commits, determined by " + CLEANER_COMMITS_RETAINED.key()
+ " Long running query plans may often refer to older file slices and will break if those are cleaned, before the query has had"
+ " a chance to run. So, it is good to make sure that the data is retained for more than the maximum query execution time");
public static final ConfigProperty<String> INLINE_COMPACT = ConfigProperty
.key("hoodie.compact.inline")
.defaultValue("false")
@@ -130,62 +77,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
.withDocumentation("Controls how compaction scheduling is triggered, by time or num delta commits or combination of both. "
+ "Valid options: " + Arrays.stream(CompactionTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
public static final ConfigProperty<String> CLEAN_TRIGGER_STRATEGY = ConfigProperty
.key("hoodie.clean.trigger.strategy")
.defaultValue(CleaningTriggerStrategy.NUM_COMMITS.name())
.withDocumentation("Controls how cleaning is scheduled. Valid options: "
+ Arrays.stream(CleaningTriggerStrategy.values()).map(Enum::name).collect(Collectors.joining(",")));
public static final ConfigProperty<String> CLEAN_MAX_COMMITS = ConfigProperty
.key("hoodie.clean.max.commits")
.defaultValue("1")
.withDocumentation("Number of commits after the last clean operation, before scheduling of a new clean is attempted.");
public static final ConfigProperty<String> CLEANER_FILE_VERSIONS_RETAINED = ConfigProperty
.key("hoodie.cleaner.fileversions.retained")
.defaultValue("3")
.withDocumentation("When " + HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS.name() + " cleaning policy is used, "
+ " the minimum number of file slices to retain in each file group, during cleaning.");
public static final ConfigProperty<String> CLEANER_INCREMENTAL_MODE_ENABLE = ConfigProperty
.key("hoodie.cleaner.incremental.mode")
.defaultValue("true")
.withDocumentation("When enabled, the plans for each cleaner service run is computed incrementally off the events "
+ " in the timeline, since the last cleaner run. This is much more efficient than obtaining listings for the full"
+ " table for each planning (even with a metadata table).");
public static final ConfigProperty<String> MAX_COMMITS_TO_KEEP = ConfigProperty
.key("hoodie.keep.max.commits")
.defaultValue("30")
.withDocumentation("Archiving service moves older entries from timeline into an archived log after each write, to "
+ " keep the metadata overhead constant, even as the table size grows."
+ "This config controls the maximum number of instants to retain in the active timeline. ");
public static final ConfigProperty<Integer> DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE = ConfigProperty
.key("hoodie.archive.delete.parallelism")
.defaultValue(100)
.withDocumentation("Parallelism for deleting archived hoodie commits.");
public static final ConfigProperty<String> MIN_COMMITS_TO_KEEP = ConfigProperty
.key("hoodie.keep.min.commits")
.defaultValue("20")
.withDocumentation("Similar to " + MAX_COMMITS_TO_KEEP.key() + ", but controls the minimum number of"
+ "instants to retain in the active timeline.");
public static final ConfigProperty<String> COMMITS_ARCHIVAL_BATCH_SIZE = ConfigProperty
.key("hoodie.commits.archival.batch")
.defaultValue(String.valueOf(10))
.withDocumentation("Archiving of instants is batched in best-effort manner, to pack more instants into a single"
+ " archive log. This config controls such archival batch size.");
public static final ConfigProperty<String> CLEANER_BOOTSTRAP_BASE_FILE_ENABLE = ConfigProperty
.key("hoodie.cleaner.delete.bootstrap.base.file")
.defaultValue("false")
.withDocumentation("When set to true, cleaner also deletes the bootstrap base file when it's skeleton base file is "
+ " cleaned. Turn this to true, if you want to ensure the bootstrap dataset storage is reclaimed over time, as the"
+ " table receives updates/deletes. Another reason to turn this on, would be to ensure data residing in bootstrap "
+ " base files are also physically deleted, to comply with data privacy enforcement processes.");
public static final ConfigProperty<String> PARQUET_SMALL_FILE_LIMIT = ConfigProperty
.key("hoodie.parquet.small.file.limit")
.defaultValue(String.valueOf(104857600))
@@ -202,11 +93,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
+ " Hudi will search commits in the reverse order, until we find a commit that has totalBytesWritten "
+ " larger than (PARQUET_SMALL_FILE_LIMIT_BYTES * this_threshold)");
public static final ConfigProperty<String> CLEANER_PARALLELISM_VALUE = ConfigProperty
.key("hoodie.cleaner.parallelism")
.defaultValue("200")
.withDocumentation("Parallelism for the cleaning operation. Increase this if cleaning becomes slow.");
// 500GB of target IO per compaction (both read and write
public static final ConfigProperty<String> TARGET_IO_PER_COMPACTION_IN_MB = ConfigProperty
.key("hoodie.compaction.target.io")
@@ -227,13 +113,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
+ "compaction during each compaction run. By default. Hudi picks the log file "
+ "with most accumulated unmerged data");
public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
.key("hoodie.compaction.payload.class")
.defaultValue(OverwriteWithLatestAvroPayload.class.getName())
.withDocumentation("This needs to be same as class used during insert/upserts. Just like writing, compaction also uses "
+ "the record payload class to merge records in the log against each other, merge again with the base file and "
+ "produce the final record to be written after compaction.");
public static final ConfigProperty<String> COMPACTION_LAZY_BLOCK_READ_ENABLE = ConfigProperty
.key("hoodie.compaction.lazy.block.read")
.defaultValue("true")
@@ -247,13 +126,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
.withDocumentation("HoodieLogFormatReader reads a logfile in the forward direction starting from pos=0 to pos=file_length. "
+ "If this config is set to true, the reader reads the logfile in reverse direction, from pos=file_length to pos=0");
public static final ConfigProperty<String> FAILED_WRITES_CLEANER_POLICY = ConfigProperty
.key("hoodie.cleaner.policy.failed.writes")
.defaultValue(HoodieFailedWritesCleaningPolicy.EAGER.name())
.withDocumentation("Cleaning policy for failed writes to be used. Hudi will delete any files written by "
+ "failed writes to re-claim space. Choose to perform this rollback of failed writes eagerly before "
+ "every writer starts (only supported for single writer) or lazily by the cleaner (required for multi-writers)");
public static final ConfigProperty<String> TARGET_PARTITIONS_PER_DAYBASED_COMPACTION = ConfigProperty
.key("hoodie.compaction.daybased.target.partitions")
.defaultValue("10")
@@ -290,39 +162,8 @@ public class HoodieCompactionConfig extends HoodieConfig {
.withDocumentation("The average record size. If not explicitly specified, hudi will compute the "
+ "record size estimate compute dynamically based on commit metadata. "
+ " This is critical in computing the insert parallelism and bin-packing inserts into small files.");
public static final ConfigProperty<Boolean> ALLOW_MULTIPLE_CLEANS = ConfigProperty
.key("hoodie.clean.allow.multiple")
.defaultValue(true)
.sinceVersion("0.11.0")
.withDocumentation("Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, "
+ ".i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.");
public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
.key("hoodie.archive.merge.files.batch.size")
.defaultValue(10)
.withDocumentation("The number of small archive files to be merged at once.");
public static final ConfigProperty<Long> ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES = ConfigProperty
.key("hoodie.archive.merge.small.file.limit.bytes")
.defaultValue(20L * 1024 * 1024)
.withDocumentation("This config sets the archive file size limit below which an archive file becomes a candidate to be selected as such a small file.");
public static final ConfigProperty<Boolean> ARCHIVE_MERGE_ENABLE = ConfigProperty
.key("hoodie.archive.merge.enable")
.defaultValue(false)
.withDocumentation("When enable, hoodie will auto merge several small archive files into larger one. It's"
+ " useful when storage scheme doesn't support append operation.");
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
@Deprecated
public static final String CLEANER_POLICY_PROP = CLEANER_POLICY.key();
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
@Deprecated
public static final String AUTO_CLEAN_PROP = AUTO_CLEAN.key();
/** @deprecated Use {@link #ASYNC_CLEAN} and its methods instead */
@Deprecated
public static final String ASYNC_CLEAN_PROP = ASYNC_CLEAN.key();
/** @deprecated Use {@link #INLINE_COMPACT} and its methods instead */
@Deprecated
public static final String INLINE_COMPACT_PROP = INLINE_COMPACT.key();
@@ -335,39 +176,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
/** @deprecated Use {@link #INLINE_COMPACT_TRIGGER_STRATEGY} and its methods instead */
@Deprecated
public static final String INLINE_COMPACT_TRIGGER_STRATEGY_PROP = INLINE_COMPACT_TRIGGER_STRATEGY.key();
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
@Deprecated
public static final String CLEANER_FILE_VERSIONS_RETAINED_PROP = CLEANER_FILE_VERSIONS_RETAINED.key();
/**
* @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead
*/
@Deprecated
public static final String CLEANER_COMMITS_RETAINED_PROP = CLEANER_COMMITS_RETAINED.key();
/**
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
*/
@Deprecated
public static final String CLEANER_INCREMENTAL_MODE = CLEANER_INCREMENTAL_MODE_ENABLE.key();
/**
* @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
public static final String MAX_COMMITS_TO_KEEP_PROP = MAX_COMMITS_TO_KEEP.key();
/**
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
public static final String MIN_COMMITS_TO_KEEP_PROP = MIN_COMMITS_TO_KEEP.key();
/**
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
*/
@Deprecated
public static final String COMMITS_ARCHIVAL_BATCH_SIZE_PROP = COMMITS_ARCHIVAL_BATCH_SIZE.key();
/**
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
*/
@Deprecated
public static final String CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.key();
/**
* @deprecated Use {@link #PARQUET_SMALL_FILE_LIMIT} and its methods instead
*/
@@ -418,16 +226,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
*/
@Deprecated
public static final String DEFAULT_COPY_ON_WRITE_TABLE_RECORD_SIZE_ESTIMATE = COPY_ON_WRITE_RECORD_SIZE_ESTIMATE.defaultValue();
/**
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
*/
@Deprecated
public static final String CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.key();
/**
* @deprecated Use {@link #CLEANER_PARALLELISM_VALUE} and its methods instead
*/
@Deprecated
public static final String DEFAULT_CLEANER_PARALLELISM = CLEANER_PARALLELISM_VALUE.defaultValue();
/**
* @deprecated Use {@link #TARGET_IO_PER_COMPACTION_IN_MB} and its methods instead
*/
@@ -446,12 +244,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
/** @deprecated Use {@link #COMPACTION_STRATEGY} and its methods instead */
@Deprecated
public static final String DEFAULT_COMPACTION_STRATEGY = COMPACTION_STRATEGY.defaultValue();
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
@Deprecated
public static final String DEFAULT_PAYLOAD_CLASS = PAYLOAD_CLASS_NAME.defaultValue();
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
@Deprecated
public static final String PAYLOAD_CLASS_PROP = PAYLOAD_CLASS_NAME.key();
/** @deprecated Use {@link #COMPACTION_LAZY_BLOCK_READ_ENABLE} and its methods instead */
@Deprecated
public static final String COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP = COMPACTION_LAZY_BLOCK_READ_ENABLE.key();
@@ -464,33 +256,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
/** @deprecated Use {@link #COMPACTION_REVERSE_LOG_READ_ENABLE} and its methods instead */
@Deprecated
public static final String DEFAULT_COMPACTION_REVERSE_LOG_READ_ENABLED = COMPACTION_REVERSE_LOG_READ_ENABLE.defaultValue();
/** @deprecated Use {@link #CLEANER_POLICY} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_POLICY = CLEANER_POLICY.defaultValue();
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
@Deprecated
public static final String FAILED_WRITES_CLEANER_POLICY_PROP = FAILED_WRITES_CLEANER_POLICY.key();
/** @deprecated Use {@link #FAILED_WRITES_CLEANER_POLICY} and its methods instead */
@Deprecated
private static final String DEFAULT_FAILED_WRITES_CLEANER_POLICY = FAILED_WRITES_CLEANER_POLICY.defaultValue();
/** @deprecated Use {@link #AUTO_CLEAN} and its methods instead */
@Deprecated
private static final String DEFAULT_AUTO_CLEAN = AUTO_CLEAN.defaultValue();
/**
* @deprecated Use {@link #ASYNC_CLEAN} and its methods instead
*/
@Deprecated
private static final String DEFAULT_ASYNC_CLEAN = ASYNC_CLEAN.defaultValue();
/**
* @deprecated Use {@link #INLINE_COMPACT} and its methods instead
*/
@Deprecated
private static final String DEFAULT_INLINE_COMPACT = INLINE_COMPACT.defaultValue();
/**
* @deprecated Use {@link #CLEANER_INCREMENTAL_MODE_ENABLE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_INCREMENTAL_CLEANER = CLEANER_INCREMENTAL_MODE_ENABLE.defaultValue();
/** @deprecated Use {@link #INLINE_COMPACT_NUM_DELTA_COMMITS} and its methods instead */
@Deprecated
private static final String DEFAULT_INLINE_COMPACT_NUM_DELTA_COMMITS = INLINE_COMPACT_NUM_DELTA_COMMITS.defaultValue();
@@ -500,30 +270,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
/** @deprecated Use {@link #INLINE_COMPACT_TRIGGER_STRATEGY} and its methods instead */
@Deprecated
private static final String DEFAULT_INLINE_COMPACT_TRIGGER_STRATEGY = INLINE_COMPACT_TRIGGER_STRATEGY.defaultValue();
/** @deprecated Use {@link #CLEANER_FILE_VERSIONS_RETAINED} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_FILE_VERSIONS_RETAINED = CLEANER_FILE_VERSIONS_RETAINED.defaultValue();
/** @deprecated Use {@link #CLEANER_COMMITS_RETAINED} and its methods instead */
@Deprecated
private static final String DEFAULT_CLEANER_COMMITS_RETAINED = CLEANER_COMMITS_RETAINED.defaultValue();
/** @deprecated Use {@link #MAX_COMMITS_TO_KEEP} and its methods instead */
@Deprecated
private static final String DEFAULT_MAX_COMMITS_TO_KEEP = MAX_COMMITS_TO_KEEP.defaultValue();
/**
* @deprecated Use {@link #MIN_COMMITS_TO_KEEP} and its methods instead
*/
@Deprecated
private static final String DEFAULT_MIN_COMMITS_TO_KEEP = MIN_COMMITS_TO_KEEP.defaultValue();
/**
* @deprecated Use {@link #COMMITS_ARCHIVAL_BATCH_SIZE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_COMMITS_ARCHIVAL_BATCH_SIZE = COMMITS_ARCHIVAL_BATCH_SIZE.defaultValue();
/**
* @deprecated Use {@link #CLEANER_BOOTSTRAP_BASE_FILE_ENABLE} and its methods instead
*/
@Deprecated
private static final String DEFAULT_CLEANER_BOOTSTRAP_BASE_FILE_ENABLED = CLEANER_BOOTSTRAP_BASE_FILE_ENABLE.defaultValue();
/** @deprecated Use {@link #TARGET_PARTITIONS_PER_DAYBASED_COMPACTION} and its methods instead */
@Deprecated
public static final String TARGET_PARTITIONS_PER_DAYBASED_COMPACTION_PROP = TARGET_PARTITIONS_PER_DAYBASED_COMPACTION.key();
@@ -555,31 +301,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withAutoArchive(Boolean autoArchive) {
compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
return this;
}
public Builder withAsyncArchive(Boolean asyncArchive) {
compactionConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
return this;
}
public Builder withAutoClean(Boolean autoClean) {
compactionConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
return this;
}
public Builder withAsyncClean(Boolean asyncClean) {
compactionConfig.setValue(ASYNC_CLEAN, String.valueOf(asyncClean));
return this;
}
public Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
compactionConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
return this;
}
public Builder withInlineCompaction(Boolean inlineCompaction) {
compactionConfig.setValue(INLINE_COMPACT, String.valueOf(inlineCompaction));
return this;
@@ -595,57 +316,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withCleaningTriggerStrategy(String cleaningTriggerStrategy) {
compactionConfig.setValue(CLEAN_TRIGGER_STRATEGY, cleaningTriggerStrategy);
return this;
}
public Builder withMaxCommitsBeforeCleaning(int maxCommitsBeforeCleaning) {
compactionConfig.setValue(CLEAN_MAX_COMMITS, String.valueOf(maxCommitsBeforeCleaning));
return this;
}
public Builder withCleanerPolicy(HoodieCleaningPolicy policy) {
compactionConfig.setValue(CLEANER_POLICY, policy.name());
return this;
}
public Builder retainFileVersions(int fileVersionsRetained) {
compactionConfig.setValue(CLEANER_FILE_VERSIONS_RETAINED, String.valueOf(fileVersionsRetained));
return this;
}
public Builder retainCommits(int commitsRetained) {
compactionConfig.setValue(CLEANER_COMMITS_RETAINED, String.valueOf(commitsRetained));
return this;
}
public Builder cleanerNumHoursRetained(int cleanerHoursRetained) {
compactionConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
return this;
}
public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
compactionConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
compactionConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
return this;
}
public Builder withArchiveMergeFilesBatchSize(int number) {
compactionConfig.setValue(ARCHIVE_MERGE_FILES_BATCH_SIZE, String.valueOf(number));
return this;
}
public Builder withArchiveMergeSmallFileLimit(long size) {
compactionConfig.setValue(ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES, String.valueOf(size));
return this;
}
public Builder withArchiveMergeEnable(boolean enable) {
compactionConfig.setValue(ARCHIVE_MERGE_ENABLE, String.valueOf(enable));
return this;
}
public Builder compactionSmallFileSize(long smallFileLimitBytes) {
compactionConfig.setValue(PARQUET_SMALL_FILE_LIMIT, String.valueOf(smallFileLimitBytes));
return this;
@@ -671,26 +341,11 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder allowMultipleCleans(boolean allowMultipleCleanSchedules) {
compactionConfig.setValue(ALLOW_MULTIPLE_CLEANS, String.valueOf(allowMultipleCleanSchedules));
return this;
}
public Builder withCleanerParallelism(int cleanerParallelism) {
compactionConfig.setValue(CLEANER_PARALLELISM_VALUE, String.valueOf(cleanerParallelism));
return this;
}
public Builder withCompactionStrategy(CompactionStrategy compactionStrategy) {
compactionConfig.setValue(COMPACTION_STRATEGY, compactionStrategy.getClass().getName());
return this;
}
public Builder withPayloadClass(String payloadClassName) {
compactionConfig.setValue(PAYLOAD_CLASS_NAME, payloadClassName);
return this;
}
public Builder withTargetIOPerCompactionInMB(long targetIOPerCompactionInMB) {
compactionConfig.setValue(TARGET_IO_PER_COMPACTION_IN_MB, String.valueOf(targetIOPerCompactionInMB));
return this;
@@ -701,11 +356,6 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withArchiveDeleteParallelism(int archiveDeleteParallelism) {
compactionConfig.setValue(DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE, String.valueOf(archiveDeleteParallelism));
return this;
}
public Builder withMaxDeltaSecondsBeforeCompaction(int maxDeltaSecondsBeforeCompaction) {
compactionConfig.setValue(INLINE_COMPACT_TIME_DELTA_SECONDS, String.valueOf(maxDeltaSecondsBeforeCompaction));
return this;
@@ -736,49 +386,8 @@ public class HoodieCompactionConfig extends HoodieConfig {
return this;
}
public Builder withCommitsArchivalBatchSize(int batchSize) {
compactionConfig.setValue(COMMITS_ARCHIVAL_BATCH_SIZE, String.valueOf(batchSize));
return this;
}
public Builder withCleanBootstrapBaseFileEnabled(Boolean cleanBootstrapSourceFileEnabled) {
compactionConfig.setValue(CLEANER_BOOTSTRAP_BASE_FILE_ENABLE, String.valueOf(cleanBootstrapSourceFileEnabled));
return this;
}
public Builder withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy failedWritesPolicy) {
compactionConfig.setValue(FAILED_WRITES_CLEANER_POLICY, failedWritesPolicy.name());
return this;
}
public HoodieCompactionConfig build() {
compactionConfig.setDefaults(HoodieCompactionConfig.class.getName());
// validation
HoodieCleaningPolicy.valueOf(compactionConfig.getString(CLEANER_POLICY));
// Ensure minInstantsToKeep > cleanerCommitsRetained, otherwise we will archive some
// commit instant on timeline, that still has not been cleaned. Could miss some data via incr pull
int minInstantsToKeep = Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP));
int maxInstantsToKeep = Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP));
int cleanerCommitsRetained =
Integer.parseInt(compactionConfig.getStringOrDefault(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED));
ValidationUtils.checkArgument(maxInstantsToKeep > minInstantsToKeep,
String.format(
"Increase %s=%d to be greater than %s=%d.",
HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), maxInstantsToKeep,
HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep));
ValidationUtils.checkArgument(minInstantsToKeep > cleanerCommitsRetained,
String.format(
"Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
+ "missing data from few instants.",
HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep,
HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), cleanerCommitsRetained));
boolean inlineCompact = compactionConfig.getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
boolean inlineCompactSchedule = compactionConfig.getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
ValidationUtils.checkArgument(!(inlineCompact && inlineCompactSchedule), String.format("Either of inline compaction (%s) or "
+ "schedule inline compaction (%s) can be enabled. Both can't be set to true at the same time. %s, %s", HoodieCompactionConfig.INLINE_COMPACT.key(),
HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), inlineCompact, inlineCompactSchedule));
return compactionConfig;
}
}

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.common.config.ConfigClassProperty;
import org.apache.hudi.common.config.ConfigGroups;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
import java.io.File;
import java.io.FileReader;
@@ -52,6 +53,20 @@ public class HoodiePayloadConfig extends HoodieConfig {
.withDocumentation("Table column/field name to derive timestamp associated with the records. This can"
+ "be useful for e.g, determining the freshness of the table.");
public static final ConfigProperty<String> PAYLOAD_CLASS_NAME = ConfigProperty
.key("hoodie.compaction.payload.class")
.defaultValue(OverwriteWithLatestAvroPayload.class.getName())
.withDocumentation("This needs to be same as class used during insert/upserts. Just like writing, compaction also uses "
+ "the record payload class to merge records in the log against each other, merge again with the base file and "
+ "produce the final record to be written after compaction.");
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
@Deprecated
public static final String DEFAULT_PAYLOAD_CLASS = PAYLOAD_CLASS_NAME.defaultValue();
/** @deprecated Use {@link #PAYLOAD_CLASS_NAME} and its methods instead */
@Deprecated
public static final String PAYLOAD_CLASS_PROP = PAYLOAD_CLASS_NAME.key();
private HoodiePayloadConfig() {
super();
}
@@ -86,6 +101,11 @@ public class HoodiePayloadConfig extends HoodieConfig {
return this;
}
public HoodiePayloadConfig.Builder withPayloadClass(String payloadClassName) {
payloadConfig.setValue(PAYLOAD_CLASS_NAME, payloadClassName);
return this;
}
public HoodiePayloadConfig build() {
payloadConfig.setDefaults(HoodiePayloadConfig.class.getName());
return payloadConfig;

View File

@@ -119,16 +119,16 @@ public class HoodieStorageConfig extends HoodieConfig {
.withDocumentation("Whether to use dictionary encoding");
public static final ConfigProperty<String> PARQUET_WRITE_LEGACY_FORMAT_ENABLED = ConfigProperty
.key("hoodie.parquet.writelegacyformat.enabled")
.defaultValue("false")
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
.key("hoodie.parquet.writelegacyformat.enabled")
.defaultValue("false")
.withDocumentation("Sets spark.sql.parquet.writeLegacyFormat. If true, data will be written in a way of Spark 1.4 and earlier. "
+ "For example, decimal values will be written in Parquet's fixed-length byte array format which other systems such as Apache Hive and Apache Impala use. "
+ "If false, the newer format in Parquet will be used. For example, decimals will be written in int-based format.");
public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
.key("hoodie.parquet.outputtimestamptype")
.defaultValue("TIMESTAMP_MICROS")
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
.key("hoodie.parquet.outputtimestamptype")
.defaultValue("TIMESTAMP_MICROS")
.withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
public static final ConfigProperty<String> HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty
.key("hoodie.hfile.compression.algorithm")

View File

@@ -89,6 +89,8 @@ import java.util.Properties;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static org.apache.hudi.config.HoodieCleanConfig.CLEANER_POLICY;
/**
* Class storing configs for the HoodieWriteClient.
*/
@@ -1148,31 +1150,31 @@ public class HoodieWriteConfig extends HoodieConfig {
* compaction properties.
*/
public HoodieCleaningPolicy getCleanerPolicy() {
return HoodieCleaningPolicy.valueOf(getString(HoodieCompactionConfig.CLEANER_POLICY));
return HoodieCleaningPolicy.valueOf(getString(CLEANER_POLICY));
}
public int getCleanerFileVersionsRetained() {
return getInt(HoodieCompactionConfig.CLEANER_FILE_VERSIONS_RETAINED);
return getInt(HoodieCleanConfig.CLEANER_FILE_VERSIONS_RETAINED);
}
public int getCleanerCommitsRetained() {
return getInt(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED);
return getInt(HoodieCleanConfig.CLEANER_COMMITS_RETAINED);
}
public int getCleanerHoursRetained() {
return getInt(HoodieCompactionConfig.CLEANER_HOURS_RETAINED);
return getInt(HoodieCleanConfig.CLEANER_HOURS_RETAINED);
}
public int getMaxCommitsToKeep() {
return getInt(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP);
return getInt(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP);
}
public int getMinCommitsToKeep() {
return getInt(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP);
return getInt(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP);
}
public int getArchiveMergeFilesBatchSize() {
return getInt(HoodieCompactionConfig.ARCHIVE_MERGE_FILES_BATCH_SIZE);
return getInt(HoodieArchivalConfig.ARCHIVE_MERGE_FILES_BATCH_SIZE);
}
public int getParquetSmallFileLimit() {
@@ -1192,7 +1194,7 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public boolean allowMultipleCleans() {
return getBoolean(HoodieCompactionConfig.ALLOW_MULTIPLE_CLEANS);
return getBoolean(HoodieCleanConfig.ALLOW_MULTIPLE_CLEANS);
}
public boolean shouldAutoTuneInsertSplits() {
@@ -1200,43 +1202,43 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public int getCleanerParallelism() {
return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
return getInt(HoodieCleanConfig.CLEANER_PARALLELISM_VALUE);
}
public int getCleaningMaxCommits() {
return getInt(HoodieCompactionConfig.CLEAN_MAX_COMMITS);
return getInt(HoodieCleanConfig.CLEAN_MAX_COMMITS);
}
public CleaningTriggerStrategy getCleaningTriggerStrategy() {
return CleaningTriggerStrategy.valueOf(getString(HoodieCompactionConfig.CLEAN_TRIGGER_STRATEGY));
return CleaningTriggerStrategy.valueOf(getString(HoodieCleanConfig.CLEAN_TRIGGER_STRATEGY));
}
public boolean isAutoClean() {
return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
return getBoolean(HoodieCleanConfig.AUTO_CLEAN);
}
public boolean getArchiveMergeEnable() {
return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
return getBoolean(HoodieArchivalConfig.ARCHIVE_MERGE_ENABLE);
}
public long getArchiveMergeSmallFileLimitBytes() {
return getLong(HoodieCompactionConfig.ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES);
return getLong(HoodieArchivalConfig.ARCHIVE_MERGE_SMALL_FILE_LIMIT_BYTES);
}
public boolean isAutoArchive() {
return getBoolean(HoodieCompactionConfig.AUTO_ARCHIVE);
return getBoolean(HoodieArchivalConfig.AUTO_ARCHIVE);
}
public boolean isAsyncArchive() {
return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
return getBoolean(HoodieArchivalConfig.ASYNC_ARCHIVE);
}
public boolean isAsyncClean() {
return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
return getBoolean(HoodieCleanConfig.ASYNC_CLEAN);
}
public boolean incrementalCleanerModeEnabled() {
return getBoolean(HoodieCompactionConfig.CLEANER_INCREMENTAL_MODE_ENABLE);
return getBoolean(HoodieCleanConfig.CLEANER_INCREMENTAL_MODE_ENABLE);
}
public boolean inlineCompactionEnabled() {
@@ -1280,7 +1282,7 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public int getArchiveDeleteParallelism() {
return getInt(HoodieCompactionConfig.DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE);
return getInt(HoodieArchivalConfig.DELETE_ARCHIVED_INSTANT_PARALLELISM_VALUE);
}
public boolean inlineClusteringEnabled() {
@@ -1321,7 +1323,7 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public String getPayloadClass() {
return getString(HoodieCompactionConfig.PAYLOAD_CLASS_NAME);
return getString(HoodiePayloadConfig.PAYLOAD_CLASS_NAME);
}
public int getTargetPartitionsPerDayBasedCompaction() {
@@ -1329,11 +1331,11 @@ public class HoodieWriteConfig extends HoodieConfig {
}
public int getCommitArchivalBatchSize() {
return getInt(HoodieCompactionConfig.COMMITS_ARCHIVAL_BATCH_SIZE);
return getInt(HoodieArchivalConfig.COMMITS_ARCHIVAL_BATCH_SIZE);
}
public Boolean shouldCleanBootstrapBaseFile() {
return getBoolean(HoodieCompactionConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLE);
return getBoolean(HoodieCleanConfig.CLEANER_BOOTSTRAP_BASE_FILE_ENABLE);
}
public String getClusteringUpdatesStrategyClass() {
@@ -1342,7 +1344,7 @@ public class HoodieWriteConfig extends HoodieConfig {
public HoodieFailedWritesCleaningPolicy getFailedWritesCleanPolicy() {
return HoodieFailedWritesCleaningPolicy
.valueOf(getString(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY));
.valueOf(getString(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY));
}
/**
@@ -2117,6 +2119,8 @@ public class HoodieWriteConfig extends HoodieConfig {
private boolean isIndexConfigSet = false;
private boolean isStorageConfigSet = false;
private boolean isCompactionConfigSet = false;
private boolean isCleanConfigSet = false;
private boolean isArchivalConfigSet = false;
private boolean isClusteringConfigSet = false;
private boolean isOptimizeConfigSet = false;
private boolean isMetricsConfigSet = false;
@@ -2284,6 +2288,18 @@ public class HoodieWriteConfig extends HoodieConfig {
return this;
}
public Builder withCleanConfig(HoodieCleanConfig cleanConfig) {
writeConfig.getProps().putAll(cleanConfig.getProps());
isCleanConfigSet = true;
return this;
}
public Builder withArchivalConfig(HoodieArchivalConfig cleanConfig) {
writeConfig.getProps().putAll(cleanConfig.getProps());
isArchivalConfigSet = true;
return this;
}
public Builder withClusteringConfig(HoodieClusteringConfig clusteringConfig) {
writeConfig.getProps().putAll(clusteringConfig.getProps());
isClusteringConfigSet = true;
@@ -2517,6 +2533,10 @@ public class HoodieWriteConfig extends HoodieConfig {
writeConfig.getProps()).build());
writeConfig.setDefaultOnCondition(!isCompactionConfigSet,
HoodieCompactionConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
writeConfig.setDefaultOnCondition(!isCleanConfigSet,
HoodieCleanConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
writeConfig.setDefaultOnCondition(!isArchivalConfigSet,
HoodieArchivalConfig.newBuilder().fromProperties(writeConfig.getProps()).build());
writeConfig.setDefaultOnCondition(!isClusteringConfigSet,
HoodieClusteringConfig.newBuilder().withEngineType(engineType)
.fromProperties(writeConfig.getProps()).build());
@@ -2587,10 +2607,10 @@ public class HoodieWriteConfig extends HoodieConfig {
if (WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value()
.equalsIgnoreCase(writeConcurrencyMode)) {
// In this case, we assume that the user takes care of setting the lock provider used
writeConfig.setValue(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(),
writeConfig.setValue(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(),
HoodieFailedWritesCleaningPolicy.LAZY.name());
LOG.info(String.format("Automatically set %s=%s since optimistic concurrency control is used",
HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(),
HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY.key(),
HoodieFailedWritesCleaningPolicy.LAZY.name()));
}
}
@@ -2602,9 +2622,34 @@ public class HoodieWriteConfig extends HoodieConfig {
Objects.requireNonNull(writeConfig.getString(BASE_PATH));
if (writeConfig.getString(WRITE_CONCURRENCY_MODE)
.equalsIgnoreCase(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value())) {
ValidationUtils.checkArgument(!writeConfig.getString(HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY)
ValidationUtils.checkArgument(!writeConfig.getString(HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY)
.equals(HoodieFailedWritesCleaningPolicy.EAGER.name()), "To enable optimistic concurrency control, set hoodie.cleaner.policy.failed.writes=LAZY");
}
HoodieCleaningPolicy.valueOf(writeConfig.getString(CLEANER_POLICY));
// Ensure minInstantsToKeep > cleanerCommitsRetained, otherwise we will archive some
// commit instant on timeline, that still has not been cleaned. Could miss some data via incr pull
int minInstantsToKeep = Integer.parseInt(writeConfig.getStringOrDefault(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP));
int maxInstantsToKeep = Integer.parseInt(writeConfig.getStringOrDefault(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP));
int cleanerCommitsRetained =
Integer.parseInt(writeConfig.getStringOrDefault(HoodieCleanConfig.CLEANER_COMMITS_RETAINED));
ValidationUtils.checkArgument(maxInstantsToKeep > minInstantsToKeep,
String.format(
"Increase %s=%d to be greater than %s=%d.",
HoodieArchivalConfig.MAX_COMMITS_TO_KEEP.key(), maxInstantsToKeep,
HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep));
ValidationUtils.checkArgument(minInstantsToKeep > cleanerCommitsRetained,
String.format(
"Increase %s=%d to be greater than %s=%d. Otherwise, there is risk of incremental pull "
+ "missing data from few instants.",
HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep,
HoodieCleanConfig.CLEANER_COMMITS_RETAINED.key(), cleanerCommitsRetained));
boolean inlineCompact = writeConfig.getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
boolean inlineCompactSchedule = writeConfig.getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
ValidationUtils.checkArgument(!(inlineCompact && inlineCompactSchedule), String.format("Either of inline compaction (%s) or "
+ "schedule inline compaction (%s) can be enabled. Both can't be set to true at the same time. %s, %s", HoodieCompactionConfig.INLINE_COMPACT.key(),
HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), inlineCompact, inlineCompactSchedule));
}
public HoodieWriteConfig build() {

View File

@@ -58,6 +58,8 @@ import org.apache.hudi.common.util.HoodieTimer;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.config.metrics.HoodieMetricsConfig;
@@ -255,20 +257,24 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
.withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath()))
.withSchema(HoodieMetadataRecord.getClassSchema().toString())
.forTable(tableName)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
// we will trigger cleaning manually, to control the instant times
.withCleanConfig(HoodieCleanConfig.newBuilder()
.withAsyncClean(writeConfig.isMetadataAsyncClean())
// we will trigger cleaning manually, to control the instant times
.withAutoClean(false)
.withCleanerParallelism(parallelism)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
.retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
.build())
// we will trigger archive manually, to ensure only regular writer invokes it
.withArchivalConfig(HoodieArchivalConfig.newBuilder()
.archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
// we will trigger compaction manually, to control the instant times
.withAutoArchive(false)
.build())
// we will trigger compaction manually, to control the instant times
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withInlineCompaction(false)
.withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax())
// we will trigger archive manually, to ensure only regular writer invokes it
.withAutoArchive(false)
// by default, the HFile does not keep the metadata fields, set up as false
// to always use the metadata of the new record.
.withPreserveCommitMetadata(false)

View File

@@ -26,7 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieLockConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieLockException;
@@ -57,9 +57,9 @@ public class TestTransactionManager extends HoodieCommonTestHarness {
private HoodieWriteConfig getWriteConfig() {
return HoodieWriteConfig.newBuilder()
.withPath(basePath)
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
.build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
.build())
.withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
.withLockConfig(HoodieLockConfig.newBuilder()
.withLockProvider(InProcessLockProvider.class)

View File

@@ -47,10 +47,10 @@ import java.util.Properties;
import java.util.function.Function;
import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE;
import static org.apache.hudi.config.HoodieCompactionConfig.ASYNC_ARCHIVE;
import static org.apache.hudi.config.HoodieCompactionConfig.ASYNC_CLEAN;
import static org.apache.hudi.config.HoodieCompactionConfig.AUTO_CLEAN;
import static org.apache.hudi.config.HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY;
import static org.apache.hudi.config.HoodieArchivalConfig.ASYNC_ARCHIVE;
import static org.apache.hudi.config.HoodieCleanConfig.ASYNC_CLEAN;
import static org.apache.hudi.config.HoodieCleanConfig.AUTO_CLEAN;
import static org.apache.hudi.config.HoodieCleanConfig.FAILED_WRITES_CLEANER_POLICY;
import static org.apache.hudi.config.HoodieCompactionConfig.INLINE_COMPACT;
import static org.apache.hudi.config.HoodieWriteConfig.TABLE_SERVICES_ENABLED;
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
@@ -64,9 +64,9 @@ public class TestHoodieWriteConfig {
public void testPropertyLoading(boolean withAlternative) throws IOException {
Builder builder = HoodieWriteConfig.newBuilder().withPath("/tmp");
Map<String, String> params = new HashMap<>(3);
params.put(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1");
params.put(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "5");
params.put(HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2");
params.put(HoodieCleanConfig.CLEANER_COMMITS_RETAINED.key(), "1");
params.put(HoodieArchivalConfig.MAX_COMMITS_TO_KEEP.key(), "5");
params.put(HoodieArchivalConfig.MIN_COMMITS_TO_KEEP.key(), "2");
if (withAlternative) {
params.put("hoodie.avro.schema.externalTransformation", "true");
} else {