[HUDI-3125] spark-sql write timestamp directly (#4471)
This commit is contained in:
@@ -27,6 +27,8 @@ import org.apache.spark.sql.types.DataTypes;
|
||||
import org.apache.spark.sql.types.StructField;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
|
||||
import java.sql.Timestamp;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
@@ -106,7 +108,8 @@ public class RowKeyGeneratorHelper {
|
||||
if (fieldPos == -1 || row.isNullAt(fieldPos)) {
|
||||
val = HUDI_DEFAULT_PARTITION_PATH;
|
||||
} else {
|
||||
val = row.getAs(field).toString();
|
||||
Object data = row.get(fieldPos);
|
||||
val = convertToTimestampIfInstant(data).toString();
|
||||
if (val.isEmpty()) {
|
||||
val = HUDI_DEFAULT_PARTITION_PATH;
|
||||
}
|
||||
@@ -115,11 +118,12 @@ public class RowKeyGeneratorHelper {
|
||||
val = field + "=" + val;
|
||||
}
|
||||
} else { // nested
|
||||
Object nestedVal = getNestedFieldVal(row, partitionPathPositions.get(field));
|
||||
if (nestedVal.toString().contains(NULL_RECORDKEY_PLACEHOLDER) || nestedVal.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
|
||||
Object data = getNestedFieldVal(row, partitionPathPositions.get(field));
|
||||
data = convertToTimestampIfInstant(data);
|
||||
if (data.toString().contains(NULL_RECORDKEY_PLACEHOLDER) || data.toString().contains(EMPTY_RECORDKEY_PLACEHOLDER)) {
|
||||
val = hiveStylePartitioning ? field + "=" + HUDI_DEFAULT_PARTITION_PATH : HUDI_DEFAULT_PARTITION_PATH;
|
||||
} else {
|
||||
val = hiveStylePartitioning ? field + "=" + nestedVal.toString() : nestedVal.toString();
|
||||
val = hiveStylePartitioning ? field + "=" + data.toString() : data.toString();
|
||||
}
|
||||
}
|
||||
return val;
|
||||
@@ -266,4 +270,11 @@ public class RowKeyGeneratorHelper {
|
||||
}
|
||||
return positions;
|
||||
}
|
||||
|
||||
private static Object convertToTimestampIfInstant(Object data) {
|
||||
if (data instanceof Instant) {
|
||||
return Timestamp.from((Instant) data);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ package org.apache.hudi
|
||||
|
||||
import java.nio.ByteBuffer
|
||||
import java.sql.{Date, Timestamp}
|
||||
import java.time.Instant
|
||||
|
||||
import org.apache.avro.Conversions.DecimalConversion
|
||||
import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
|
||||
@@ -301,9 +302,17 @@ object AvroConversionHelper {
|
||||
}.orNull
|
||||
}
|
||||
case TimestampType => (item: Any) =>
|
||||
// Convert time to microseconds since spark-avro by default converts TimestampType to
|
||||
// Avro Logical TimestampMicros
|
||||
Option(item).map(_.asInstanceOf[Timestamp].getTime * 1000).orNull
|
||||
if (item == null) {
|
||||
null
|
||||
} else {
|
||||
val timestamp = item match {
|
||||
case i: Instant => Timestamp.from(i)
|
||||
case t: Timestamp => t
|
||||
}
|
||||
// Convert time to microseconds since spark-avro by default converts TimestampType to
|
||||
// Avro Logical TimestampMicros
|
||||
timestamp.getTime * 1000
|
||||
}
|
||||
case DateType => (item: Any) =>
|
||||
Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
|
||||
case ArrayType(elementType, _) =>
|
||||
|
||||
Reference in New Issue
Block a user