[HUDI-1929] Support configure KeyGenerator by type (#2993)
This commit is contained in:
@@ -37,7 +37,7 @@ import org.apache.hudi.common.util.ReflectionUtils;
|
||||
import org.apache.hudi.common.util.ValidationUtils;
|
||||
import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorType;
|
||||
import org.apache.hudi.metrics.MetricsReporterType;
|
||||
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
|
||||
import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
|
||||
@@ -72,8 +72,11 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
public static final String PRECOMBINE_FIELD_PROP = "hoodie.datasource.write.precombine.field";
|
||||
public static final String WRITE_PAYLOAD_CLASS = "hoodie.datasource.write.payload.class";
|
||||
public static final String DEFAULT_WRITE_PAYLOAD_CLASS = OverwriteWithLatestAvroPayload.class.getName();
|
||||
|
||||
public static final String KEYGENERATOR_CLASS_PROP = "hoodie.datasource.write.keygenerator.class";
|
||||
public static final String DEFAULT_KEYGENERATOR_CLASS = SimpleAvroKeyGenerator.class.getName();
|
||||
public static final String KEYGENERATOR_TYPE_PROP = "hoodie.datasource.write.keygenerator.type";
|
||||
public static final String DEFAULT_KEYGENERATOR_TYPE = KeyGeneratorType.SIMPLE.name();
|
||||
|
||||
public static final String DEFAULT_ROLLBACK_USING_MARKERS = "false";
|
||||
public static final String ROLLBACK_USING_MARKERS = "hoodie.rollback.using.markers";
|
||||
public static final String TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version";
|
||||
@@ -1370,8 +1373,8 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
|
||||
|
||||
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_PARALLELISM), ROLLBACK_PARALLELISM,
|
||||
DEFAULT_ROLLBACK_PARALLELISM);
|
||||
setDefaultOnCondition(props, !props.containsKey(KEYGENERATOR_CLASS_PROP),
|
||||
KEYGENERATOR_CLASS_PROP, DEFAULT_KEYGENERATOR_CLASS);
|
||||
setDefaultOnCondition(props, !props.containsKey(KEYGENERATOR_TYPE_PROP),
|
||||
KEYGENERATOR_TYPE_PROP, DEFAULT_KEYGENERATOR_TYPE);
|
||||
setDefaultOnCondition(props, !props.containsKey(WRITE_PAYLOAD_CLASS),
|
||||
WRITE_PAYLOAD_CLASS, DEFAULT_WRITE_PAYLOAD_CLASS);
|
||||
setDefaultOnCondition(props, !props.containsKey(ROLLBACK_USING_MARKERS), ROLLBACK_USING_MARKERS,
|
||||
|
||||
@@ -23,6 +23,8 @@ import org.apache.hudi.avro.HoodieAvroUtils;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils;
|
||||
import org.apache.hudi.common.util.ReflectionUtils;
|
||||
import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.keygen.parser.AbstractHoodieDateTimeParser;
|
||||
@@ -152,4 +154,24 @@ public class KeyGenUtils {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a key generator class via reflection, passing in any configs needed.
|
||||
* <p>
|
||||
* This method is for user-defined classes. To create hudi's built-in key generators, please set proper
|
||||
* {@link org.apache.hudi.keygen.constant.KeyGeneratorType} conf, and use the relevant factory, see
|
||||
* {@link org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory}.
|
||||
*/
|
||||
public static KeyGenerator createKeyGeneratorByClassName(TypedProperties props) throws IOException {
|
||||
KeyGenerator keyGenerator = null;
|
||||
String keyGeneratorClass = props.getString(HoodieWriteConfig.KEYGENERATOR_CLASS_PROP, null);
|
||||
if (!StringUtils.isNullOrEmpty(keyGeneratorClass)) {
|
||||
try {
|
||||
keyGenerator = (KeyGenerator) ReflectionUtils.loadClass(keyGeneratorClass, props);
|
||||
} catch (Throwable e) {
|
||||
throw new IOException("Could not load key generator class " + keyGeneratorClass, e);
|
||||
}
|
||||
}
|
||||
return keyGenerator;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen.constant;
|
||||
|
||||
/**
|
||||
* Types of {@link org.apache.hudi.keygen.KeyGenerator}.
|
||||
*/
|
||||
public enum KeyGeneratorType {
|
||||
/**
|
||||
* Simple key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
SIMPLE,
|
||||
|
||||
/**
|
||||
* Complex key generator, which takes names of fields to be used for recordKey and partitionPath as configs.
|
||||
*/
|
||||
COMPLEX,
|
||||
|
||||
/**
|
||||
* Key generator, that relies on timestamps for partitioning field. Still picks record key by name.
|
||||
*/
|
||||
TIMESTAMP,
|
||||
|
||||
/**
|
||||
* This is a generic implementation type of KeyGenerator where users can configure record key as a single field or
|
||||
* a combination of fields. Similarly partition path can be configured to have multiple fields or only one field.
|
||||
* <p>
|
||||
* This KeyGenerator expects value for prop "hoodie.datasource.write.partitionpath.field" in a specific format.
|
||||
* For example:
|
||||
* properties.put("hoodie.datasource.write.partitionpath.field", "field1:PartitionKeyType1,field2:PartitionKeyType2").
|
||||
*/
|
||||
CUSTOM,
|
||||
|
||||
/**
|
||||
* Simple Key generator for unpartitioned Hive Tables.
|
||||
*/
|
||||
NON_PARTITION,
|
||||
|
||||
/**
|
||||
* Key generator for deletes using global indices.
|
||||
*/
|
||||
GLOBAL_DELETE
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen.factory;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieKeyGeneratorException;
|
||||
import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.CustomAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.GlobalAvroDeleteKeyGenerator;
|
||||
import org.apache.hudi.keygen.KeyGenUtils;
|
||||
import org.apache.hudi.keygen.KeyGenerator;
|
||||
import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Factory help to create {@link org.apache.hudi.keygen.KeyGenerator}.
|
||||
* <p>
|
||||
* This factory will try {@link HoodieWriteConfig#KEYGENERATOR_CLASS_PROP} firstly, this ensures the class prop
|
||||
* will not be overwritten by {@link KeyGeneratorType}
|
||||
*/
|
||||
public class HoodieAvroKeyGeneratorFactory {
|
||||
public static KeyGenerator createKeyGenerator(TypedProperties props) throws IOException {
|
||||
// keyGenerator class name has higher priority
|
||||
KeyGenerator keyGenerator = KeyGenUtils.createKeyGeneratorByClassName(props);
|
||||
return Objects.isNull(keyGenerator) ? createAvroKeyGeneratorByType(props) : keyGenerator;
|
||||
}
|
||||
|
||||
private static KeyGenerator createAvroKeyGeneratorByType(TypedProperties props) throws IOException {
|
||||
// Use KeyGeneratorType.SIMPLE as default keyGeneratorType
|
||||
String keyGeneratorType =
|
||||
props.getString(HoodieWriteConfig.KEYGENERATOR_TYPE_PROP, KeyGeneratorType.SIMPLE.name());
|
||||
|
||||
KeyGeneratorType keyGeneratorTypeEnum;
|
||||
try {
|
||||
keyGeneratorTypeEnum = KeyGeneratorType.valueOf(keyGeneratorType.toUpperCase(Locale.ROOT));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
|
||||
}
|
||||
|
||||
switch (keyGeneratorTypeEnum) {
|
||||
case SIMPLE:
|
||||
return new SimpleAvroKeyGenerator(props);
|
||||
case COMPLEX:
|
||||
return new ComplexAvroKeyGenerator(props);
|
||||
case TIMESTAMP:
|
||||
return new TimestampBasedAvroKeyGenerator(props);
|
||||
case CUSTOM:
|
||||
return new CustomAvroKeyGenerator(props);
|
||||
case NON_PARTITION:
|
||||
return new NonpartitionedAvroKeyGenerator(props);
|
||||
case GLOBAL_DELETE:
|
||||
return new GlobalAvroDeleteKeyGenerator(props);
|
||||
default:
|
||||
throw new HoodieKeyGeneratorException("Unsupported keyGenerator Type " + keyGeneratorType);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user