1
0

[HUDI-1929] Support configure KeyGenerator by type (#2993)

This commit is contained in:
wangxianghu
2021-06-08 21:26:10 +08:00
committed by GitHub
parent f760ec543e
commit 7261f08507
20 changed files with 819 additions and 106 deletions

View File

@@ -37,7 +37,7 @@ import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import org.apache.hudi.metrics.MetricsReporterType;
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
@@ -72,8 +72,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
public static final String PRECOMBINE_FIELD_PROP = "hoodie.datasource.write.precombine.field";
public static final String WRITE_PAYLOAD_CLASS = "hoodie.datasource.write.payload.class";
public static final String DEFAULT_WRITE_PAYLOAD_CLASS = OverwriteWithLatestAvroPayload.class.getName();
public static final String KEYGENERATOR_CLASS_PROP = "hoodie.datasource.write.keygenerator.class";
public static final String DEFAULT_KEYGENERATOR_CLASS = SimpleAvroKeyGenerator.class.getName();
public static final String KEYGENERATOR_TYPE_PROP = "hoodie.datasource.write.keygenerator.type";
public static final String DEFAULT_KEYGENERATOR_TYPE = KeyGeneratorType.SIMPLE.name();
public static final String DEFAULT_ROLLBACK_USING_MARKERS = "false";
public static final String ROLLBACK_USING_MARKERS = "hoodie.rollback.using.markers";
public static final String TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
@@ -1370,8 +1373,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM,
DEFAULT_ROLLBACK_PARALLELISM);
setDefaultOnCondition(props, !props.containsKey(KEYGENERATOR_CLASS_PROP),
KEYGENERATOR_CLASS_PROP, DEFAULT_KEYGENERATOR_CLASS);
setDefaultOnCondition(props, !props.containsKey(KEYGENERATOR_TYPE_PROP),
KEYGENERATOR_TYPE_PROP, DEFAULT_KEYGENERATOR_TYPE);
setDefaultOnCondition(props, !props.containsKey(WRITE_PAYLOAD_CLASS),
WRITE_PAYLOAD_CLASS, DEFAULT_WRITE_PAYLOAD_CLASS);
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_USING_MARKERS), ROLLBACK_USING_MARKERS,

View File

@@ -23,6 +23,8 @@ import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieKeyException;
import org.apache.hudi.exception.HoodieNotSupportedException;
import org.apache.hudi.keygen.parser.AbstractHoodieDateTimeParser;
@@ -152,4 +154,24 @@ public class KeyGenUtils {
}
});
}
/**
* Create a key generator class via reflection, passing in any configs needed.
* <p>
* This method is for user-defined classes. To create hudi's built-in key generators, please set proper
* {@link org.apache.hudi.keygen.constant.KeyGeneratorType} conf, and use the relevant factory, see
* {@link org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory}.
*/
public static KeyGenerator createKeyGeneratorByClassName(TypedProperties props) throws IOException {
KeyGenerator keyGenerator = null;
String keyGeneratorClass = props.getString(HoodieWriteConfig.KEYGENERATOR_CLASS_PROP, null);
if (!StringUtils.isNullOrEmpty(keyGeneratorClass)) {
try {
keyGenerator = (KeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, props);
} catch (Throwable e) {
throw new IOException("Could not load key generator class " + keyGeneratorClass, e);
}
}
return keyGenerator;
}
}

View File

@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.keygen.constant;
/**
* Types of {@link org.apache.hudi.keygen.KeyGenerator}.
*/
public enum KeyGeneratorType {
/**
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
SIMPLE,
/**
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
*/
COMPLEX,
/**
* Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
*/
TIMESTAMP,
/**
* This is a generic implementation type of KeyGenerator where users can configure record key as a single field or
* a combination of fields. Similarly partition path can be configured to have multiple fields or only one field.
* <p>
* This KeyGenerator expects value for prop "hoodie.datasource.write.partitionpath.field" in a specific format.
* For example:
* properties.put("hoodie.datasource.write.partitionpath.field", "field1:PartitionKeyType1,field2:PartitionKeyType2").
*/
CUSTOM,
/**
* Simple Key generator for unpartitioned Hive Tables.
*/
NON_PARTITION,
/**
* Key generator for deletes using global indices.
*/
GLOBAL_DELETE
}

View File

@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.keygen.factory;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieKeyGeneratorException;
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
import org.apache.hudi.keygen.CustomAvroKeyGenerator;
import org.apache.hudi.keygen.GlobalAvroDeleteKeyGenerator;
import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.keygen.KeyGenerator;
import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
import org.apache.hudi.keygen.constant.KeyGeneratorType;
import java.io.IOException;
import java.util.Locale;
import java.util.Objects;
/**
* Factory help to create {@link org.apache.hudi.keygen.KeyGenerator}.
* <p>
* This factory will try {@link HoodieWriteConfig#KEYGENERATOR_CLASS_PROP} firstly, this ensures the class prop
* will not be overwritten by {@link KeyGeneratorType}
*/
public class HoodieAvroKeyGeneratorFactory {
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
// keyGenerator class name has higher priority
KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props);
return Objects.isNull(keyGenerator) ? createAvroKeyGeneratorByType(props) : keyGenerator;
}
private static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) throws IOException {
// Use KeyGeneratorType.SIMPLE as default keyGeneratorType
String keyGeneratorType =
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP, KeyGeneratorType.SIMPLE.name());
KeyGeneratorType keyGeneratorTypeEnum;
try {
keyGeneratorTypeEnum = KeyGeneratorType.valueOf(keyGeneratorType.toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
}
switch (keyGeneratorTypeEnum) {
case SIMPLE:
return new SimpleAvroKeyGenerator(props);
case COMPLEX:
return new ComplexAvroKeyGenerator(props);
case TIMESTAMP:
return new TimestampBasedAvroKeyGenerator(props);
case CUSTOM:
return new CustomAvroKeyGenerator(props);
case NON_PARTITION:
return new NonpartitionedAvroKeyGenerator(props);
case GLOBAL_DELETE:
return new GlobalAvroDeleteKeyGenerator(props);
default:
throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
}
}
}