[HUDI-2559] Converting commit timestamp format to millisecs (#4024)
- Adds support for generating commit timestamps with millisecs granularity. - Older commit timestamps (in secs granularity) will be suffixed with 999 and parsed with millisecs format.
This commit is contained in:
committed by
GitHub
parent
89452063b4
commit
fc9ca6a07a
@@ -20,7 +20,6 @@ package org.apache.spark.sql.hudi
|
||||
import scala.collection.JavaConverters._
|
||||
import java.net.URI
|
||||
import java.util.{Date, Locale, Properties}
|
||||
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
import org.apache.hadoop.fs.Path
|
||||
|
||||
@@ -31,8 +30,7 @@ import org.apache.hudi.common.config.HoodieMetadataConfig
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.model.HoodieRecord
|
||||
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline
|
||||
|
||||
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstantTimeGenerator}
|
||||
import org.apache.spark.SPARK_VERSION
|
||||
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
|
||||
import org.apache.spark.sql.catalyst.TableIdentifier
|
||||
@@ -50,7 +48,6 @@ import java.text.SimpleDateFormat
|
||||
import scala.collection.immutable.Map
|
||||
|
||||
object HoodieSqlUtils extends SparkAdapterSupport {
|
||||
private val defaultDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
|
||||
private val defaultDateFormat = new SimpleDateFormat("yyyy-MM-dd")
|
||||
|
||||
def isHoodieTable(table: CatalogTable): Boolean = {
|
||||
@@ -293,13 +290,15 @@ object HoodieSqlUtils extends SparkAdapterSupport {
|
||||
* 3、yyyyMMddHHmmss
|
||||
*/
|
||||
def formatQueryInstant(queryInstant: String): String = {
|
||||
if (queryInstant.length == 19) { // for yyyy-MM-dd HH:mm:ss
|
||||
HoodieActiveTimeline.formatInstantTime(defaultDateTimeFormat.parse(queryInstant))
|
||||
} else if (queryInstant.length == 14) { // for yyyyMMddHHmmss
|
||||
HoodieActiveTimeline.parseInstantTime(queryInstant) // validate the format
|
||||
val instantLength = queryInstant.length
|
||||
if (instantLength == 19 || instantLength == 23) { // for yyyy-MM-dd HH:mm:ss[.SSS]
|
||||
HoodieInstantTimeGenerator.getInstantForDateString(queryInstant)
|
||||
} else if (instantLength == HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT
|
||||
|| instantLength == HoodieInstantTimeGenerator.MILLIS_INSTANT_ID_LENGTH) { // for yyyyMMddHHmmss[SSS]
|
||||
HoodieActiveTimeline.parseDateFromInstantTime(queryInstant) // validate the format
|
||||
queryInstant
|
||||
} else if (queryInstant.length == 10) { // for yyyy-MM-dd
|
||||
HoodieActiveTimeline.formatInstantTime(defaultDateFormat.parse(queryInstant))
|
||||
} else if (instantLength == 10) { // for yyyy-MM-dd
|
||||
HoodieActiveTimeline.formatDate(defaultDateFormat.parse(queryInstant))
|
||||
} else {
|
||||
throw new IllegalArgumentException(s"Unsupported query instant time format: $queryInstant,"
|
||||
+ s"Supported time format are: 'yyyy-MM-dd: HH:mm:ss' or 'yyyy-MM-dd' or 'yyyyMMddHHmmss'")
|
||||
|
||||
@@ -179,10 +179,10 @@ class HoodieStreamSource(
|
||||
startOffset match {
|
||||
case INIT_OFFSET => startOffset.commitTime
|
||||
case HoodieSourceOffset(commitTime) =>
|
||||
val time = HoodieActiveTimeline.parseInstantTime(commitTime).getTime
|
||||
val time = HoodieActiveTimeline.parseDateFromInstantTime(commitTime).getTime
|
||||
// As we consume the data between (start, end], start is not included,
|
||||
// so we +1s to the start commit time here.
|
||||
HoodieActiveTimeline.formatInstantTime(new Date(time + 1000))
|
||||
HoodieActiveTimeline.formatDate(new Date(time + 1000))
|
||||
case _=> throw new IllegalStateException("UnKnow offset type.")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user