1
0

[HUDI-3977] Flink hudi table with date type partition path throws HoodieNotSupportedException (#5432)

This commit is contained in:
Danny Chan
2022-04-27 13:19:55 +08:00
committed by GitHub
parent 6ec039ba42
commit e1ccf2e00b
8 changed files with 119 additions and 7 deletions

View File

@@ -367,13 +367,14 @@ public class FlinkOptions extends HoodieConfig {
public static final String PARTITION_FORMAT_HOUR = "yyyyMMddHH";
public static final String PARTITION_FORMAT_DAY = "yyyyMMdd";
public static final String PARTITION_FORMAT_DASHED_DAY = "yyyy-MM-dd";
public static final ConfigOption<String> PARTITION_FORMAT = ConfigOptions
.key("write.partition.format")
.stringType()
.noDefaultValue()
.withDescription("Partition path format, only valid when 'write.datetime.partitioning' is true, default is:\n"
+ "1) 'yyyyMMddHH' for timestamp(3) WITHOUT TIME ZONE, LONG, FLOAT, DOUBLE, DECIMAL;\n"
+ "2) 'yyyyMMdd' for DAY and INT.");
+ "2) 'yyyyMMdd' for DATE and INT.");
public static final ConfigOption<Integer> INDEX_BOOTSTRAP_TASKS = ConfigOptions
.key("write.index_bootstrap.tasks")

View File

@@ -53,6 +53,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
@@ -243,6 +244,11 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
* <p>The UTC timezone is used as default.
*/
public static void setupTimestampKeygenOptions(Configuration conf, DataType fieldType) {
if (conf.contains(FlinkOptions.KEYGEN_CLASS_NAME)) {
// the keygen clazz has been set up explicitly, skipping
return;
}
conf.setString(FlinkOptions.KEYGEN_CLASS_NAME, TimestampBasedAvroKeyGenerator.class.getName());
LOG.info("Table option [{}] is reset to {} because datetime partitioning turns on",
FlinkOptions.KEYGEN_CLASS_NAME.key(), TimestampBasedAvroKeyGenerator.class.getName());
@@ -257,13 +263,17 @@ public class HoodieTableFactory implements DynamicTableSourceFactory, DynamicTab
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP,
TimestampBasedAvroKeyGenerator.TimestampType.EPOCHMILLISECONDS.name());
}
String partitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_HOUR);
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
String outputPartitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_HOUR);
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputPartitionFormat);
} else {
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP,
TimestampBasedAvroKeyGenerator.TimestampType.DATE_STRING.name());
String partitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_DAY);
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
TimestampBasedAvroKeyGenerator.TimestampType.SCALAR.name());
conf.setString(KeyGeneratorOptions.Config.INPUT_TIME_UNIT, TimeUnit.DAYS.toString());
String outputPartitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_DAY);
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputPartitionFormat);
// the option is actually useless, it only works for validation
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, FlinkOptions.PARTITION_FORMAT_DAY);
}
conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "UTC");
}