[HUDI-1587] Add latency and freshness support (#2541)
Save min and max of event time in each commit and compute the latency and freshness metrics.
This commit is contained in:
@@ -18,24 +18,22 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
|
||||
import org.apache.hudi.exception.HoodieException
|
||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
|
||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder}
|
||||
import org.apache.hadoop.conf.Configuration
|
||||
import org.apache.hudi.common.model.HoodiePayloadProps
|
||||
import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
|
||||
import org.apache.hudi.common.fs.FSUtils
|
||||
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
|
||||
import org.apache.hudi.config.HoodiePayloadConfig
|
||||
import org.apache.hudi.exception.HoodieException
|
||||
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
|
||||
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer}
|
||||
import org.apache.spark.sql.catalyst.InternalRow
|
||||
import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
|
||||
import org.apache.spark.sql.execution.datasources.PartitionedFile
|
||||
import org.apache.spark.sql.vectorized.ColumnarBatch
|
||||
|
||||
import java.util.Properties
|
||||
import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.collection.mutable
|
||||
@@ -53,9 +51,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
|
||||
private val confBroadcast = sc.broadcast(new SerializableWritable(config))
|
||||
private val preCombineField = tableState.preCombineField
|
||||
private val payloadProps = if (preCombineField.isDefined) {
|
||||
val properties = new Properties()
|
||||
properties.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, preCombineField.get)
|
||||
Some(properties)
|
||||
Some(HoodiePayloadConfig.newBuilder.withPayloadOrderingField(preCombineField.get).build.getProps)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hudi.avro.HoodieAvroUtils
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model.{BaseAvroPayload, DefaultHoodieRecordPayload, EmptyHoodieRecordPayload, HoodieKey, HoodiePayloadProps, OverwriteWithLatestAvroPayload}
|
||||
import org.apache.hudi.common.model._
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil
|
||||
import org.apache.hudi.common.util.Option
|
||||
import org.apache.hudi.config.HoodiePayloadConfig
|
||||
@@ -33,8 +33,6 @@ import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.scalatest.Assertions.fail
|
||||
|
||||
import scala.collection.JavaConverters.mapAsJavaMapConverter
|
||||
|
||||
/**
|
||||
* Tests on the default key generator, payload classes.
|
||||
*/
|
||||
@@ -591,10 +589,9 @@ class TestDataSourceDefaults {
|
||||
}
|
||||
|
||||
@Test def testDefaultHoodieRecordPayloadCombineAndGetUpdateValue() = {
|
||||
val baseOrderingVal: Object = baseRecord.get("favoriteIntNumber")
|
||||
val fieldSchema: Schema = baseRecord.getSchema().getField("favoriteIntNumber").schema()
|
||||
val props = new TypedProperties()
|
||||
props.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, "favoriteIntNumber");
|
||||
val props = HoodiePayloadConfig.newBuilder()
|
||||
.withPayloadOrderingField("favoriteIntNumber").build().getProps;
|
||||
|
||||
val laterRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 2, "001", "f1")
|
||||
|
||||
Reference in New Issue
Block a user