1
0

[HUDI-3204] fix problem that spark on TimestampKeyGenerator has no re… (#4714)

This commit is contained in:
Yann Byron
2022-02-15 12:38:38 +08:00
committed by GitHub
parent 27bd7b538e
commit cb6ca7f0d1
16 changed files with 337 additions and 73 deletions

View File

@@ -65,29 +65,6 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
protected final boolean encodePartitionPath;
/**
* Supported configs.
*/
public static class Config {
// One value from TimestampType above
public static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
public static final String INPUT_TIME_UNIT =
"hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit";
//This prop can now accept list of input date formats.
public static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP =
"hoodie.deltastreamer.keygen.timebased.input.dateformat";
public static final String TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex";
public static final String TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.timezone";
public static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
"hoodie.deltastreamer.keygen.timebased.output.dateformat";
//still keeping this prop for backward compatibility so that functionality for existing users does not break.
public static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
"hoodie.deltastreamer.keygen.timebased.timezone";
public static final String TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.timezone";
static final String DATE_TIME_PARSER_PROP = "hoodie.deltastreamer.keygen.datetime.parser.class";
}
public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException {
this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()),
config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
@@ -99,12 +76,12 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
TimestampBasedAvroKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
super(config, recordKeyField, partitionPathField);
String dateTimeParserClass = config.getString(Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParser.class.getName());
String dateTimeParserClass = config.getString(KeyGeneratorOptions.Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParser.class.getName());
this.parser = KeyGenUtils.createDateTimeParser(config, dateTimeParserClass);
this.inputDateTimeZone = parser.getInputDateTimeZone();
this.outputDateTimeZone = parser.getOutputDateTimeZone();
this.outputDateFormat = parser.getOutputDateFormat();
this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
this.timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
switch (this.timestampType) {
case EPOCHMILLISECONDS:
@@ -114,7 +91,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
timeUnit = SECONDS;
break;
case SCALAR:
String timeUnitStr = config.getString(Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
String timeUnitStr = config.getString(KeyGeneratorOptions.Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
timeUnit = TimeUnit.valueOf(timeUnitStr.toUpperCase());
break;
default:
@@ -148,7 +125,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
// {Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP} won't be null, it has been checked in the initialization process of
// inputFormatter
String delimiter = parser.getConfigInputDateFormatDelimiter();
String format = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
String format = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
// if both input and output timeZone are not configured, use GMT.
if (null != inputDateTimeZone) {
@@ -200,7 +177,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
timeMs = convertLongTimeToMillis(((BigDecimal) partitionVal).longValue());
} else if (partitionVal instanceof CharSequence) {
if (!inputFormatter.isPresent()) {
throw new HoodieException("Missing inputformatter. Ensure " + Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
throw new HoodieException("Missing inputformatter. Ensure " + KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
}
DateTime parsedDateTime = inputFormatter.get().parseDateTime(partitionVal.toString());
if (this.outputDateTimeZone == null) {
@@ -224,7 +201,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
private long convertLongTimeToMillis(Long partitionVal) {
if (timeUnit == null) {
// should not be possible
throw new RuntimeException(Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
throw new RuntimeException(KeyGeneratorOptions.Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
}
return MILLISECONDS.convert(partitionVal, timeUnit);
}

View File

@@ -19,7 +19,7 @@ package org.apache.hudi.keygen.parser;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
@@ -36,7 +36,7 @@ public abstract class BaseHoodieDateTimeParser implements Serializable {
}
private String initInputDateFormatDelimiter() {
String inputDateFormatDelimiter = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
String inputDateFormatDelimiter = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
inputDateFormatDelimiter = inputDateFormatDelimiter.isEmpty() ? "," : inputDateFormatDelimiter;
return inputDateFormatDelimiter;
}
@@ -45,7 +45,7 @@ public abstract class BaseHoodieDateTimeParser implements Serializable {
* Returns the output date format in which the partition paths will be created for the hudi dataset.
*/
public String getOutputDateFormat() {
return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
}
/**

View File

@@ -20,8 +20,8 @@ package org.apache.hudi.keygen.parser;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.TimestampType;
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
import org.apache.hudi.keygen.KeyGenUtils;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
@@ -42,13 +42,13 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
public HoodieDateTimeParser(TypedProperties config) {
super(config);
KeyGenUtils.checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
KeyGenUtils.checkRequiredProperties(config, Arrays.asList(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
this.inputDateTimeZone = getInputDateTimeZone();
}
private DateTimeFormatter getInputDateFormatter() {
if (this.configInputDateFormatList.isEmpty()) {
throw new IllegalArgumentException(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
throw new IllegalArgumentException(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
}
DateTimeFormatter formatter = new DateTimeFormatterBuilder()
@@ -72,16 +72,16 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
@Override
public String getOutputDateFormat() {
return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
}
@Override
public Option<DateTimeFormatter> getInputFormatter() {
TimestampType timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
TimestampType timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
KeyGenUtils.checkRequiredProperties(config,
Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
this.configInputDateFormatList = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
Collections.singletonList(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
this.configInputDateFormatList = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
return Option.of(getInputDateFormatter());
}
@@ -91,10 +91,10 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
@Override
public DateTimeZone getInputDateTimeZone() {
String inputTimeZone;
if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
inputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
} else {
inputTimeZone = config.getString(Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
}
return !inputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(inputTimeZone)) : null;
}
@@ -102,10 +102,10 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
@Override
public DateTimeZone getOutputDateTimeZone() {
String outputTimeZone;
if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
outputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
} else {
outputTimeZone = config.getString(Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
}
return !outputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(outputTimeZone)) : null;
}