1
0

[HUDI-1587] Add latency and freshness support (#2541)

Save min and max of event time in each commit and compute the latency and freshness metrics.
This commit is contained in:
Raymond Xu
2021-03-03 20:13:12 -08:00
committed by GitHub
parent f11a6c7b2d
commit 899ae70fdb
14 changed files with 283 additions and 26 deletions

View File

@@ -18,24 +18,22 @@
package org.apache.hudi
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder}
import org.apache.hadoop.conf.Configuration
import org.apache.hudi.common.model.HoodiePayloadProps
import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
import org.apache.hudi.config.HoodiePayloadConfig
import org.apache.hudi.exception.HoodieException
import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
import org.apache.spark.sql.execution.datasources.PartitionedFile
import org.apache.spark.sql.vectorized.ColumnarBatch
import java.util.Properties
import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
import scala.collection.JavaConverters._
import scala.collection.mutable
@@ -53,9 +51,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
private val confBroadcast = sc.broadcast(new SerializableWritable(config))
private val preCombineField = tableState.preCombineField
private val payloadProps = if (preCombineField.isDefined) {
val properties = new Properties()
properties.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, preCombineField.get)
Some(properties)
Some(HoodiePayloadConfig.newBuilder.withPayloadOrderingField(preCombineField.get).build.getProps)
} else {
None
}

View File

@@ -21,7 +21,7 @@ import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hudi.avro.HoodieAvroUtils
import org.apache.hudi.common.config.TypedProperties
import org.apache.hudi.common.model.{BaseAvroPayload, DefaultHoodieRecordPayload, EmptyHoodieRecordPayload, HoodieKey, HoodiePayloadProps, OverwriteWithLatestAvroPayload}
import org.apache.hudi.common.model._
import org.apache.hudi.common.testutils.SchemaTestUtil
import org.apache.hudi.common.util.Option
import org.apache.hudi.config.HoodiePayloadConfig
@@ -33,8 +33,6 @@ import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.{BeforeEach, Test}
import org.scalatest.Assertions.fail
import scala.collection.JavaConverters.mapAsJavaMapConverter
/**
* Tests on the default key generator, payload classes.
*/
@@ -591,10 +589,9 @@ class TestDataSourceDefaults {
}
@Test def testDefaultHoodieRecordPayloadCombineAndGetUpdateValue() = {
val baseOrderingVal: Object = baseRecord.get("favoriteIntNumber")
val fieldSchema: Schema = baseRecord.getSchema().getField("favoriteIntNumber").schema()
val props = new TypedProperties()
props.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, "favoriteIntNumber");
val props = HoodiePayloadConfig.newBuilder()
.withPayloadOrderingField("favoriteIntNumber").build().getProps;
val laterRecord = SchemaTestUtil
.generateAvroRecordFromJson(schema, 2, "001", "f1")