[HUDI-3404] Automatically adjust write configs based on metadata table and write concurrency mode (#4975)
This commit is contained in:
@@ -21,20 +21,26 @@ package org.apache.hudi.config;
|
||||
import org.apache.hudi.client.transaction.FileSystemBasedLockProviderTestClass;
|
||||
import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
|
||||
import org.apache.hudi.client.transaction.lock.ZookeeperBasedLockProvider;
|
||||
import org.apache.hudi.common.config.HoodieMetadataConfig;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.engine.EngineType;
|
||||
import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
import org.apache.hudi.common.model.WriteConcurrencyMode;
|
||||
import org.apache.hudi.common.table.HoodieTableConfig;
|
||||
import org.apache.hudi.common.table.marker.MarkerType;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.config.HoodieWriteConfig.Builder;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.EnumSource;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
@@ -44,9 +50,11 @@ import java.util.function.Function;
|
||||
import static org.apache.hudi.config.HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.ASYNC_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.AUTO_CLEAN;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY;
|
||||
import static org.apache.hudi.config.HoodieCompactionConfig.INLINE_COMPACT;
|
||||
import static org.apache.hudi.config.HoodieWriteConfig.TABLE_SERVICES_ENABLED;
|
||||
import static org.apache.hudi.config.HoodieWriteConfig.WRITE_CONCURRENCY_MODE;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class TestHoodieWriteConfig {
|
||||
@@ -114,89 +122,195 @@ public class TestHoodieWriteConfig {
|
||||
EngineType.JAVA, MarkerType.DIRECT));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultLockProviderWhenAsyncServicesEnabled() {
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
public void testAutoConcurrencyConfigAdjustmentWithTableServices(HoodieTableType tableType) {
|
||||
final String inProcessLockProviderClassName = InProcessLockProvider.class.getCanonicalName();
|
||||
|
||||
// Any async clustering enabled should use InProcess lock provider
|
||||
// as default when no other lock provider is set.
|
||||
|
||||
// With metadata table enabled by default, any async table service enabled should
|
||||
// use InProcess lock provider as default when no other lock provider is set.
|
||||
// 1. Async clustering
|
||||
HoodieWriteConfig writeConfig = createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "true");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
});
|
||||
assertTrue(writeConfig.areAnyTableServicesAsync());
|
||||
assertEquals(inProcessLockProviderClassName, writeConfig.getLockProviderClass());
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "true");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
}), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
|
||||
HoodieFailedWritesCleaningPolicy.LAZY, inProcessLockProviderClassName);
|
||||
|
||||
// 2. Async clean
|
||||
writeConfig = createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "true");
|
||||
}
|
||||
});
|
||||
assertTrue(writeConfig.areAnyTableServicesAsync());
|
||||
assertEquals(inProcessLockProviderClassName, writeConfig.getLockProviderClass());
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "true");
|
||||
}
|
||||
}), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
|
||||
HoodieFailedWritesCleaningPolicy.LAZY, inProcessLockProviderClassName);
|
||||
|
||||
// 3. Async compaction
|
||||
writeConfig = createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "false");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
});
|
||||
assertTrue(writeConfig.areAnyTableServicesAsync());
|
||||
assertEquals(inProcessLockProviderClassName, writeConfig.getLockProviderClass());
|
||||
// 3. Async compaction configured
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "false");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
}), true,
|
||||
tableType == HoodieTableType.MERGE_ON_READ,
|
||||
tableType == HoodieTableType.MERGE_ON_READ
|
||||
? WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL
|
||||
: WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
tableType == HoodieTableType.MERGE_ON_READ
|
||||
? HoodieFailedWritesCleaningPolicy.LAZY
|
||||
: HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
tableType == HoodieTableType.MERGE_ON_READ
|
||||
? inProcessLockProviderClassName
|
||||
: HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
|
||||
|
||||
// 4. All inline services
|
||||
writeConfig = createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
});
|
||||
assertFalse(writeConfig.areAnyTableServicesAsync());
|
||||
assertTrue(writeConfig.areAnyTableServicesExecutedInline());
|
||||
assertEquals(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue(), writeConfig.getLockProviderClass());
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
}), Option.of(true), Option.of(false), Option.of(true),
|
||||
WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
|
||||
}
|
||||
|
||||
// 5. User override for the lock provider should always take the precedence
|
||||
writeConfig = HoodieWriteConfig.newBuilder()
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
public void testAutoConcurrencyConfigAdjustmentWithUserConfigs(HoodieTableType tableType) {
|
||||
// 1. User override for the lock provider should always take the precedence
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder()
|
||||
.withPath("/tmp")
|
||||
.withLockConfig(HoodieLockConfig.newBuilder()
|
||||
.withLockProvider(FileSystemBasedLockProviderTestClass.class)
|
||||
.build())
|
||||
.build();
|
||||
assertEquals(FileSystemBasedLockProviderTestClass.class.getName(), writeConfig.getLockProviderClass());
|
||||
|
||||
// 6. User can set the lock provider via properties
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(), ZookeeperBasedLockProvider.class.getName());
|
||||
writeConfig = HoodieWriteConfig.newBuilder()
|
||||
.withPath("/tmp")
|
||||
.withProperties(properties)
|
||||
.build();
|
||||
assertEquals(ZookeeperBasedLockProvider.class.getName(), writeConfig.getLockProviderClass());
|
||||
verifyConcurrencyControlRelatedConfigs(writeConfig,
|
||||
true, tableType == HoodieTableType.MERGE_ON_READ,
|
||||
WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
FileSystemBasedLockProviderTestClass.class.getName());
|
||||
|
||||
// Default config should have default lock provider
|
||||
writeConfig = createWriteConfig(Collections.emptyMap());
|
||||
if (!writeConfig.areAnyTableServicesAsync()) {
|
||||
assertEquals(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue(), writeConfig.getLockProviderClass());
|
||||
// 2. User can set the lock provider via properties
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "false");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "true");
|
||||
put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
|
||||
ZookeeperBasedLockProvider.class.getName());
|
||||
}
|
||||
}), true, true,
|
||||
WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
ZookeeperBasedLockProvider.class.getName());
|
||||
|
||||
// 3. Default config should have default lock provider
|
||||
writeConfig = createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
}
|
||||
});
|
||||
if (writeConfig.areAnyTableServicesAsync()) {
|
||||
verifyConcurrencyControlRelatedConfigs(writeConfig,
|
||||
true, true,
|
||||
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
|
||||
HoodieFailedWritesCleaningPolicy.LAZY,
|
||||
InProcessLockProvider.class.getName());
|
||||
} else {
|
||||
assertEquals(inProcessLockProviderClassName, writeConfig.getLockProviderClass());
|
||||
verifyConcurrencyControlRelatedConfigs(writeConfig,
|
||||
true, false,
|
||||
WriteConcurrencyMode.valueOf(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
public void testAutoConcurrencyConfigAdjustmentWithNoTableService(HoodieTableType tableType) {
|
||||
// 1. No table service, concurrency control configs should not be overwritten
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(TABLE_SERVICES_ENABLED.key(), "false");
|
||||
}
|
||||
}), false, tableType == HoodieTableType.MERGE_ON_READ,
|
||||
WriteConcurrencyMode.fromValue(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
|
||||
|
||||
// 2. No table service, with optimistic concurrency control,
|
||||
// failed write clean policy should be updated accordingly
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(TABLE_SERVICES_ENABLED.key(), "false");
|
||||
put(WRITE_CONCURRENCY_MODE.key(),
|
||||
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
|
||||
put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
|
||||
FileSystemBasedLockProviderTestClass.class.getName());
|
||||
}
|
||||
}), false, tableType == HoodieTableType.MERGE_ON_READ,
|
||||
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
|
||||
HoodieFailedWritesCleaningPolicy.LAZY,
|
||||
FileSystemBasedLockProviderTestClass.class.getName());
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@EnumSource(HoodieTableType.class)
|
||||
public void testAutoConcurrencyConfigAdjustmentWithMetadataTableDisabled(HoodieTableType tableType) {
|
||||
// 1. Metadata table disabled, with async table services, concurrency control configs
|
||||
// should not be changed
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(HoodieTableConfig.TYPE.key(), tableType.name());
|
||||
put(HoodieMetadataConfig.ENABLE.key(), "false");
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "true");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
}
|
||||
}), true, true,
|
||||
WriteConcurrencyMode.fromValue(WRITE_CONCURRENCY_MODE.defaultValue()),
|
||||
HoodieFailedWritesCleaningPolicy.valueOf(FAILED_WRITES_CLEANER_POLICY.defaultValue()),
|
||||
HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue());
|
||||
|
||||
// 2. Metadata table disabled, with optimistic concurrency control,
|
||||
// failed write clean policy should be updated accordingly
|
||||
verifyConcurrencyControlRelatedConfigs(createWriteConfig(new HashMap<String, String>() {
|
||||
{
|
||||
put(ASYNC_CLUSTERING_ENABLE.key(), "true");
|
||||
put(INLINE_COMPACT.key(), "true");
|
||||
put(AUTO_CLEAN.key(), "true");
|
||||
put(ASYNC_CLEAN.key(), "false");
|
||||
put(WRITE_CONCURRENCY_MODE.key(),
|
||||
WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.value());
|
||||
put(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
|
||||
FileSystemBasedLockProviderTestClass.class.getName());
|
||||
}
|
||||
}), true, true, WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL,
|
||||
HoodieFailedWritesCleaningPolicy.LAZY, FileSystemBasedLockProviderTestClass.class.getName());
|
||||
}
|
||||
|
||||
private HoodieWriteConfig createWriteConfig(Map<String, String> configs) {
|
||||
final Properties properties = new Properties();
|
||||
configs.forEach(properties::setProperty);
|
||||
@@ -253,4 +367,38 @@ public class TestHoodieWriteConfig {
|
||||
mapping.put(k3, v3);
|
||||
return mapping;
|
||||
}
|
||||
|
||||
private void verifyConcurrencyControlRelatedConfigs(
|
||||
HoodieWriteConfig writeConfig, boolean expectedTableServicesEnabled,
|
||||
boolean expectedAnyTableServicesAsync,
|
||||
WriteConcurrencyMode expectedConcurrencyMode,
|
||||
HoodieFailedWritesCleaningPolicy expectedCleanPolicy,
|
||||
String expectedLockProviderName) {
|
||||
verifyConcurrencyControlRelatedConfigs(writeConfig, Option.of(expectedTableServicesEnabled),
|
||||
Option.of(expectedAnyTableServicesAsync), Option.empty(), expectedConcurrencyMode,
|
||||
expectedCleanPolicy, expectedLockProviderName);
|
||||
}
|
||||
|
||||
private void verifyConcurrencyControlRelatedConfigs(
|
||||
HoodieWriteConfig writeConfig, Option<Boolean> expectedTableServicesEnabled,
|
||||
Option<Boolean> expectedAnyTableServicesAsync,
|
||||
Option<Boolean> expectedAnyTableServicesExecutedInline,
|
||||
WriteConcurrencyMode expectedConcurrencyMode,
|
||||
HoodieFailedWritesCleaningPolicy expectedCleanPolicy,
|
||||
String expectedLockProviderName) {
|
||||
if (expectedTableServicesEnabled.isPresent()) {
|
||||
assertEquals(expectedTableServicesEnabled.get(), writeConfig.areTableServicesEnabled());
|
||||
}
|
||||
if (expectedAnyTableServicesAsync.isPresent()) {
|
||||
assertEquals(expectedAnyTableServicesAsync.get(), writeConfig.areAnyTableServicesAsync());
|
||||
}
|
||||
if (expectedAnyTableServicesExecutedInline.isPresent()) {
|
||||
assertEquals(expectedAnyTableServicesExecutedInline.get(),
|
||||
writeConfig.areAnyTableServicesExecutedInline());
|
||||
}
|
||||
|
||||
assertEquals(expectedConcurrencyMode, writeConfig.getWriteConcurrencyMode());
|
||||
assertEquals(expectedCleanPolicy, writeConfig.getFailedWritesCleanPolicy());
|
||||
assertEquals(expectedLockProviderName, writeConfig.getLockProviderClass());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user