[HUDI-2909] Handle logical type in TimestampBasedKeyGenerator (#4203)
* [HUDI-2909] Handle logical type in TimestampBasedKeyGenerator Timestampbased key generator was returning diff values for row writer and non row writer path. this patch fixes it and is guarded by a config flag (`hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled`)
This commit is contained in:
@@ -31,6 +31,7 @@ import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieIOException;
|
||||
import org.apache.hudi.keygen.KeyGenerator;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
@@ -68,7 +69,9 @@ public abstract class SparkFullBootstrapDataProviderBase extends FullRecordBoots
|
||||
Option.empty());
|
||||
return genericRecords.toJavaRDD().map(gr -> {
|
||||
String orderingVal = HoodieAvroUtils.getNestedFieldValAsString(
|
||||
gr, props.getString("hoodie.datasource.write.precombine.field"), false);
|
||||
gr, props.getString("hoodie.datasource.write.precombine.field"), false, props.getBoolean(
|
||||
KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
|
||||
Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue())));
|
||||
try {
|
||||
return DataSourceUtils.createHoodieRecord(gr, orderingVal, keyGenerator.getKey(gr),
|
||||
props.getString("hoodie.datasource.write.payload.class"));
|
||||
|
||||
@@ -237,7 +237,9 @@ object HoodieSparkSqlWriter {
|
||||
val hoodieAllIncomingRecords = genericRecords.map(gr => {
|
||||
val processedRecord = getProcessedRecord(partitionColumns, gr, dropPartitionColumns)
|
||||
val hoodieRecord = if (shouldCombine) {
|
||||
val orderingVal = HoodieAvroUtils.getNestedFieldVal(gr, hoodieConfig.getString(PRECOMBINE_FIELD), false)
|
||||
val orderingVal = HoodieAvroUtils.getNestedFieldVal(gr, hoodieConfig.getString(PRECOMBINE_FIELD), false, parameters.getOrElse(
|
||||
DataSourceWriteOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
|
||||
DataSourceWriteOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()).toBoolean)
|
||||
.asInstanceOf[Comparable[_]]
|
||||
DataSourceUtils.createHoodieRecord(processedRecord,
|
||||
orderingVal, keyGenerator.getKey(gr),
|
||||
|
||||
@@ -84,6 +84,7 @@ object HoodieWriterUtils {
|
||||
hoodieConfig.setDefaultValue(ENABLE_ROW_WRITER)
|
||||
hoodieConfig.setDefaultValue(RECONCILE_SCHEMA)
|
||||
hoodieConfig.setDefaultValue(DROP_PARTITION_COLUMNS)
|
||||
hoodieConfig.setDefaultValue(KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED)
|
||||
Map() ++ hoodieConfig.getProps.asScala ++ globalProps ++ DataSourceOptionsHelper.translateConfigurations(parameters)
|
||||
}
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
|
||||
package org.apache.spark.sql.hudi.command
|
||||
|
||||
import java.util.concurrent.TimeUnit.{MICROSECONDS, MILLISECONDS}
|
||||
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.util.PartitionPathEncodeUtils
|
||||
@@ -27,7 +25,10 @@ import org.apache.hudi.keygen._
|
||||
import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
|
||||
import org.apache.spark.sql.Row
|
||||
import org.apache.spark.sql.types.{StructType, TimestampType}
|
||||
import org.joda.time.format.{DateTimeFormat, DateTimeFormatter}
|
||||
import org.joda.time.format.DateTimeFormat
|
||||
|
||||
import java.sql.Timestamp
|
||||
import java.util.concurrent.TimeUnit.{MICROSECONDS, MILLISECONDS}
|
||||
|
||||
/**
|
||||
* A complex key generator for sql command which do some process for the
|
||||
@@ -96,7 +97,11 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props)
|
||||
val timeMs = if (rowType) { // In RowType, the partitionPathValue is the time format string, convert to millis
|
||||
SqlKeyGenerator.sqlTimestampFormat.parseMillis(_partitionValue)
|
||||
} else {
|
||||
MILLISECONDS.convert(_partitionValue.toLong, MICROSECONDS)
|
||||
if (isConsistentLogicalTimestampEnabled) {
|
||||
Timestamp.valueOf(_partitionValue).getTime
|
||||
} else {
|
||||
MILLISECONDS.convert(_partitionValue.toLong, MICROSECONDS)
|
||||
}
|
||||
}
|
||||
val timestampFormat = PartitionPathEncodeUtils.escapePathName(
|
||||
SqlKeyGenerator.timestampTimeFormat.print(timeMs))
|
||||
|
||||
Reference in New Issue
Block a user