1
0

[HUDI-1930] Bootstrap support configure KeyGenerator by type (#3170)

* [HUDI-1930] Bootstrap support configure KeyGenerator by type
This commit is contained in:
wangxianghu
2021-07-03 20:27:37 +08:00
committed by GitHub
parent 4f215e2938
commit 62a1ad8b3a
7 changed files with 73 additions and 9 deletions

View File

@@ -36,6 +36,7 @@ import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy;
import org.apache.hudi.table.upgrade.SparkUpgradeDowngrade;
import org.apache.hudi.utilities.HDFSParquetImporter;
@@ -58,6 +59,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
/**
* This class deals with initializing spark context based on command entered to hudi-cli.
@@ -353,14 +355,20 @@ public class SparkMain {
private static int doBootstrap(JavaSparkContext jsc, String tableName, String tableType, String basePath,
String sourcePath, String recordKeyCols, String partitionFields, String parallelism, String schemaProviderClass,
String bootstrapIndexClass, String selectorClass, String keyGeneratorClass, String fullBootstrapInputProvider,
String bootstrapIndexClass, String selectorClass, String keyGenerator, String fullBootstrapInputProvider,
String payloadClassName, String enableHiveSync, String propsFilePath, List<String> configs) throws IOException {
TypedProperties properties = propsFilePath == null ? UtilHelpers.buildProperties(configs)
: UtilHelpers.readConfig(FSUtils.getFs(propsFilePath, jsc.hadoopConfiguration()), new Path(propsFilePath), configs).getConfig();
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_BASE_PATH_PROP.key(), sourcePath);
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key(), keyGeneratorClass);
if (!StringUtils.isNullOrEmpty(keyGenerator) && KeyGeneratorType.getNames().contains(keyGenerator.toUpperCase(Locale.ROOT))) {
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_TYPE.key(), keyGenerator.toUpperCase(Locale.ROOT));
} else {
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS.key(), keyGenerator);
}
properties.setProperty(HoodieBootstrapConfig.FULL_BOOTSTRAP_INPUT_PROVIDER.key(), fullBootstrapInputProvider);
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_PARALLELISM.key(), parallelism);
properties.setProperty(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR.key(), selectorClass);

View File

@@ -25,6 +25,7 @@ import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.HoodieConfig;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import java.io.File;
import java.io.FileReader;
@@ -60,6 +61,12 @@ public class HoodieBootstrapConfig extends HoodieConfig {
.sinceVersion("0.6.0")
.withDocumentation("Key generator implementation to be used for generating keys from the bootstrapped dataset");
public static final ConfigProperty<String> BOOTSTRAP_KEYGEN_TYPE = ConfigProperty
.key("hoodie.bootstrap.keygen.type")
.defaultValue(KeyGeneratorType.SIMPLE.name())
.sinceVersion("0.9.0")
.withDocumentation("Type of build-in key generator, currently support SIMPLE, COMPLEX, TIMESTAMP, CUSTOM, NON_PARTITION, GLOBAL_DELETE");
public static final ConfigProperty<String> BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS = ConfigProperty
.key("hoodie.bootstrap.partitionpath.translator.class")
.defaultValue(IdentityBootstrapPartitionPathTranslator.class.getName())
@@ -131,6 +138,11 @@ public class HoodieBootstrapConfig extends HoodieConfig {
return this;
}
public Builder withBootstrapKeyGenType(String keyGenType) {
bootstrapConfig.setValue(BOOTSTRAP_KEYGEN_TYPE, keyGenType);
return this;
}
public Builder withBootstrapPartitionPathTranslatorClass(String partitionPathTranslatorClass) {
bootstrapConfig
.setValue(BOOTSTRAP_PARTITION_PATH_TRANSLATOR_CLASS, partitionPathTranslatorClass);

View File

@@ -1126,6 +1126,10 @@ public class HoodieWriteConfig extends HoodieConfig {
return getString(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_CLASS);
}
public String getBootstrapKeyGeneratorType() {
return getString(HoodieBootstrapConfig.BOOTSTRAP_KEYGEN_TYPE);
}
public String getBootstrapModeSelectorRegex() {
return getString(HoodieBootstrapConfig.BOOTSTRAP_MODE_SELECTOR_REGEX);
}

View File

@@ -18,6 +18,10 @@
package org.apache.hudi.keygen.constant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Types of {@link org.apache.hudi.keygen.KeyGenerator}.
*/
@@ -55,5 +59,12 @@ public enum KeyGeneratorType {
/**
* Key generator for deletes using global indices.
*/
GLOBAL_DELETE
GLOBAL_DELETE;
public static List<String> getNames() {
List<String> names = new ArrayList<>(KeyGeneratorType.values().length);
Arrays.stream(KeyGeneratorType.values())
.forEach(x -> names.add(x.name()));
return names;
}
}

View File

@@ -18,6 +18,7 @@
package org.apache.hudi.keygen.factory;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieKeyGeneratorException;
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
@@ -30,6 +31,9 @@ import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
@@ -41,6 +45,9 @@ import java.util.Objects;
* will not be overwritten by {@link KeyGeneratorType}
*/
public class HoodieAvroKeyGeneratorFactory {
private static final Logger LOG = LoggerFactory.getLogger(HoodieAvroKeyGeneratorFactory.class);
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
// keyGenerator class name has higher priority
KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props);
@@ -50,7 +57,12 @@ public class HoodieAvroKeyGeneratorFactory {
private static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) throws IOException {
// Use KeyGeneratorType.SIMPLE as default keyGeneratorType
String keyGeneratorType =
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), KeyGeneratorType.SIMPLE.name());
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), null);
if (StringUtils.isNullOrEmpty(keyGeneratorType)) {
LOG.info("The value of {} is empty, use SIMPLE", HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key());
keyGeneratorType = KeyGeneratorType.SIMPLE.name();
}
KeyGeneratorType keyGeneratorTypeEnum;
try {

View File

@@ -19,6 +19,7 @@
package org.apache.hudi.keygen.factory;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieKeyGeneratorException;
import org.apache.hudi.keygen.BuiltinKeyGenerator;
@@ -32,6 +33,9 @@ import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.hudi.keygen.TimestampBasedKeyGenerator;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
@@ -44,6 +48,8 @@ import java.util.Objects;
*/
public class HoodieSparkKeyGeneratorFactory {
private static final Logger LOG = LoggerFactory.getLogger(HoodieSparkKeyGeneratorFactory.class);
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
// keyGenerator class name has higher priority
KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props);
@@ -54,7 +60,12 @@ public class HoodieSparkKeyGeneratorFactory {
private static BuiltinKeyGenerator createKeyGeneratorByType(TypedProperties props) throws IOException {
// Use KeyGeneratorType.SIMPLE as default keyGeneratorType
String keyGeneratorType =
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), KeyGeneratorType.SIMPLE.name());
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key(), null);
if (StringUtils.isNullOrEmpty(keyGeneratorType)) {
LOG.info("The value of {} is empty, use SIMPLE", HoodieWriteConfig.KEYGENERATOR_TYPE_PROP.key());
keyGeneratorType = KeyGeneratorType.SIMPLE.name();
}
KeyGeneratorType keyGeneratorTypeEnum;
try {

View File

@@ -57,10 +57,12 @@ import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieCommitException;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieKeyGeneratorException;
import org.apache.hudi.exception.HoodieMetadataException;
import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
import org.apache.hudi.io.HoodieBootstrapHandle;
import org.apache.hudi.keygen.KeyGeneratorInterface;
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
import org.apache.hudi.table.HoodieSparkTable;
@@ -123,8 +125,6 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
"Ensure Bootstrap Source Path is set");
ValidationUtils.checkArgument(config.getBootstrapModeSelectorClass() != null,
"Ensure Bootstrap Partition Selector is set");
ValidationUtils.checkArgument(config.getBootstrapKeyGeneratorClass() != null,
"Ensure bootstrap key generator class is set");
}
@Override
@@ -390,8 +390,14 @@ public class SparkBootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>
TypedProperties properties = new TypedProperties();
properties.putAll(config.getProps());
KeyGeneratorInterface keyGenerator = (KeyGeneratorInterface) ReflectionUtils.loadClass(config.getBootstrapKeyGeneratorClass(),
properties);
KeyGeneratorInterface keyGenerator;
try {
keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(properties);
} catch (IOException e) {
throw new HoodieKeyGeneratorException("Init keyGenerator failed ", e);
}
BootstrapPartitionPathTranslator translator = (BootstrapPartitionPathTranslator) ReflectionUtils.loadClass(
config.getBootstrapPartitionPathTranslatorClass(), properties);