1
0

[HUDI-2559] Converting commit timestamp format to millisecs (#4024)

- Adds support for generating commit timestamps with millisecs granularity. 
- Older commit timestamps (in secs granularity) will be suffixed with 999 and parsed with millisecs format.
This commit is contained in:
Sivabalan Narayanan
2021-11-22 11:44:38 -05:00
committed by GitHub
parent 89452063b4
commit fc9ca6a07a
19 changed files with 132 additions and 66 deletions

View File

@@ -20,7 +20,6 @@ package org.apache.spark.sql.hudi
import scala.collection.JavaConverters._
import java.net.URI
import java.util.{Date, Locale, Properties}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
@@ -31,8 +30,7 @@ import org.apache.hudi.common.config.HoodieMetadataConfig
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.model.HoodieRecord
import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline
import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieInstantTimeGenerator}
import org.apache.spark.SPARK_VERSION
import org.apache.spark.sql.{Column, DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
@@ -50,7 +48,6 @@ import java.text.SimpleDateFormat
import scala.collection.immutable.Map
object HoodieSqlUtils extends SparkAdapterSupport {
private val defaultDateTimeFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
private val defaultDateFormat = new SimpleDateFormat("yyyy-MM-dd")
def isHoodieTable(table: CatalogTable): Boolean = {
@@ -293,13 +290,15 @@ object HoodieSqlUtils extends SparkAdapterSupport {
* 3、yyyyMMddHHmmss
*/
def formatQueryInstant(queryInstant: String): String = {
if (queryInstant.length == 19) { // for yyyy-MM-dd HH:mm:ss
HoodieActiveTimeline.formatInstantTime(defaultDateTimeFormat.parse(queryInstant))
} else if (queryInstant.length == 14) { // for yyyyMMddHHmmss
HoodieActiveTimeline.parseInstantTime(queryInstant) // validate the format
val instantLength = queryInstant.length
if (instantLength == 19 || instantLength == 23) { // for yyyy-MM-dd HH:mm:ss[.SSS]
HoodieInstantTimeGenerator.getInstantForDateString(queryInstant)
} else if (instantLength == HoodieInstantTimeGenerator.SECS_INSTANT_TIMESTAMP_FORMAT
|| instantLength == HoodieInstantTimeGenerator.MILLIS_INSTANT_ID_LENGTH) { // for yyyyMMddHHmmss[SSS]
HoodieActiveTimeline.parseDateFromInstantTime(queryInstant) // validate the format
queryInstant
} else if (queryInstant.length == 10) { // for yyyy-MM-dd
HoodieActiveTimeline.formatInstantTime(defaultDateFormat.parse(queryInstant))
} else if (instantLength == 10) { // for yyyy-MM-dd
HoodieActiveTimeline.formatDate(defaultDateFormat.parse(queryInstant))
} else {
throw new IllegalArgumentException(s"Unsupported query instant time format: $queryInstant,"
+ s"Supported time format are: 'yyyy-MM-dd: HH:mm:ss' or 'yyyy-MM-dd' or 'yyyyMMddHHmmss'")

View File

@@ -179,10 +179,10 @@ class HoodieStreamSource(
startOffset match {
case INIT_OFFSET => startOffset.commitTime
case HoodieSourceOffset(commitTime) =>
val time = HoodieActiveTimeline.parseInstantTime(commitTime).getTime
val time = HoodieActiveTimeline.parseDateFromInstantTime(commitTime).getTime
// As we consume the data between (start, end], start is not included,
// so we +1s to the start commit time here.
HoodieActiveTimeline.formatInstantTime(new Date(time + 1000))
HoodieActiveTimeline.formatDate(new Date(time + 1000))
case _=> throw new IllegalStateException("UnKnow offset type.")
}
}