[HUDI-3204] fix problem that spark on TimestampKeyGenerator has no re… (#4714)
This commit is contained in:
@@ -65,29 +65,6 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
|
||||
protected final boolean encodePartitionPath;
|
||||
|
||||
/**
|
||||
* Supported configs.
|
||||
*/
|
||||
public static class Config {
|
||||
|
||||
// One value from TimestampType above
|
||||
public static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
|
||||
public static final String INPUT_TIME_UNIT =
|
||||
"hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit";
|
||||
//This prop can now accept list of input date formats.
|
||||
public static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP =
|
||||
"hoodie.deltastreamer.keygen.timebased.input.dateformat";
|
||||
public static final String TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex";
|
||||
public static final String TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.timezone";
|
||||
public static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
|
||||
"hoodie.deltastreamer.keygen.timebased.output.dateformat";
|
||||
//still keeping this prop for backward compatibility so that functionality for existing users does not break.
|
||||
public static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
|
||||
"hoodie.deltastreamer.keygen.timebased.timezone";
|
||||
public static final String TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.timezone";
|
||||
static final String DATE_TIME_PARSER_PROP = "hoodie.deltastreamer.keygen.datetime.parser.class";
|
||||
}
|
||||
|
||||
public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException {
|
||||
this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()),
|
||||
config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
|
||||
@@ -99,12 +76,12 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
|
||||
TimestampBasedAvroKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
|
||||
super(config, recordKeyField, partitionPathField);
|
||||
String dateTimeParserClass = config.getString(Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParser.class.getName());
|
||||
String dateTimeParserClass = config.getString(KeyGeneratorOptions.Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParser.class.getName());
|
||||
this.parser = KeyGenUtils.createDateTimeParser(config, dateTimeParserClass);
|
||||
this.inputDateTimeZone = parser.getInputDateTimeZone();
|
||||
this.outputDateTimeZone = parser.getOutputDateTimeZone();
|
||||
this.outputDateFormat = parser.getOutputDateFormat();
|
||||
this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
|
||||
this.timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
|
||||
|
||||
switch (this.timestampType) {
|
||||
case EPOCHMILLISECONDS:
|
||||
@@ -114,7 +91,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
timeUnit = SECONDS;
|
||||
break;
|
||||
case SCALAR:
|
||||
String timeUnitStr = config.getString(Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
|
||||
String timeUnitStr = config.getString(KeyGeneratorOptions.Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
|
||||
timeUnit = TimeUnit.valueOf(timeUnitStr.toUpperCase());
|
||||
break;
|
||||
default:
|
||||
@@ -148,7 +125,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
// {Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP} won't be null, it has been checked in the initialization process of
|
||||
// inputFormatter
|
||||
String delimiter = parser.getConfigInputDateFormatDelimiter();
|
||||
String format = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
|
||||
String format = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
|
||||
|
||||
// if both input and output timeZone are not configured, use GMT.
|
||||
if (null != inputDateTimeZone) {
|
||||
@@ -200,7 +177,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
timeMs = convertLongTimeToMillis(((BigDecimal) partitionVal).longValue());
|
||||
} else if (partitionVal instanceof CharSequence) {
|
||||
if (!inputFormatter.isPresent()) {
|
||||
throw new HoodieException("Missing inputformatter. Ensure " + Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
|
||||
throw new HoodieException("Missing inputformatter. Ensure " + KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
|
||||
}
|
||||
DateTime parsedDateTime = inputFormatter.get().parseDateTime(partitionVal.toString());
|
||||
if (this.outputDateTimeZone == null) {
|
||||
@@ -224,7 +201,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
private long convertLongTimeToMillis(Long partitionVal) {
|
||||
if (timeUnit == null) {
|
||||
// should not be possible
|
||||
throw new RuntimeException(Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
|
||||
throw new RuntimeException(KeyGeneratorOptions.Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
|
||||
}
|
||||
return MILLISECONDS.convert(partitionVal, timeUnit);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ package org.apache.hudi.keygen.parser;
|
||||
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
|
||||
@@ -36,7 +36,7 @@ public abstract class BaseHoodieDateTimeParser implements Serializable {
|
||||
}
|
||||
|
||||
private String initInputDateFormatDelimiter() {
|
||||
String inputDateFormatDelimiter = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
|
||||
String inputDateFormatDelimiter = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
|
||||
inputDateFormatDelimiter = inputDateFormatDelimiter.isEmpty() ? "," : inputDateFormatDelimiter;
|
||||
return inputDateFormatDelimiter;
|
||||
}
|
||||
@@ -45,7 +45,7 @@ public abstract class BaseHoodieDateTimeParser implements Serializable {
|
||||
* Returns the output date format in which the partition paths will be created for the hudi dataset.
|
||||
*/
|
||||
public String getOutputDateFormat() {
|
||||
return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
|
||||
return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -20,8 +20,8 @@ package org.apache.hudi.keygen.parser;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.TimestampType;
|
||||
import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
|
||||
import org.apache.hudi.keygen.KeyGenUtils;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
@@ -42,13 +42,13 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
|
||||
|
||||
public HoodieDateTimeParser(TypedProperties config) {
|
||||
super(config);
|
||||
KeyGenUtils.checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
|
||||
KeyGenUtils.checkRequiredProperties(config, Arrays.asList(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
|
||||
this.inputDateTimeZone = getInputDateTimeZone();
|
||||
}
|
||||
|
||||
private DateTimeFormatter getInputDateFormatter() {
|
||||
if (this.configInputDateFormatList.isEmpty()) {
|
||||
throw new IllegalArgumentException(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
|
||||
throw new IllegalArgumentException(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
|
||||
}
|
||||
|
||||
DateTimeFormatter formatter = new DateTimeFormatterBuilder()
|
||||
@@ -72,16 +72,16 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
|
||||
|
||||
@Override
|
||||
public String getOutputDateFormat() {
|
||||
return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
|
||||
return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<DateTimeFormatter> getInputFormatter() {
|
||||
TimestampType timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
|
||||
TimestampType timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
|
||||
if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
|
||||
KeyGenUtils.checkRequiredProperties(config,
|
||||
Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
|
||||
this.configInputDateFormatList = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
|
||||
Collections.singletonList(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
|
||||
this.configInputDateFormatList = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
|
||||
return Option.of(getInputDateFormatter());
|
||||
}
|
||||
|
||||
@@ -91,10 +91,10 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
|
||||
@Override
|
||||
public DateTimeZone getInputDateTimeZone() {
|
||||
String inputTimeZone;
|
||||
if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
|
||||
inputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
|
||||
if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
|
||||
inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
|
||||
} else {
|
||||
inputTimeZone = config.getString(Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
|
||||
inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
|
||||
}
|
||||
return !inputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(inputTimeZone)) : null;
|
||||
}
|
||||
@@ -102,10 +102,10 @@ public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
|
||||
@Override
|
||||
public DateTimeZone getOutputDateTimeZone() {
|
||||
String outputTimeZone;
|
||||
if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
|
||||
outputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
|
||||
if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
|
||||
outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
|
||||
} else {
|
||||
outputTimeZone = config.getString(Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
|
||||
outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
|
||||
}
|
||||
return !outputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(outputTimeZone)) : null;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user