1
0

[HUDI-1233] Deltastreamer Kafka consumption delay reporting indicators (#2074)

This commit is contained in:
liujinhui
2020-09-29 13:44:31 +08:00
committed by GitHub
parent c0c0095fa9
commit 20b9b399c9
7 changed files with 59 additions and 15 deletions

View File

@@ -22,6 +22,7 @@ import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
import org.apache.hudi.utilities.deltastreamer.SourceFormatAdapter;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
@@ -46,6 +47,7 @@ import java.util.HashMap;
import java.util.UUID;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.mock;
/**
* Tests against {@link AvroKafkaSource}.
@@ -56,6 +58,7 @@ public class TestKafkaSource extends UtilitiesTestBase {
private FilebasedSchemaProvider schemaProvider;
private KafkaTestUtils testUtils;
private HoodieDeltaStreamerMetrics metrics = mock(HoodieDeltaStreamerMetrics.class);
@BeforeAll
public static void initClass() throws Exception {
@@ -101,7 +104,7 @@ public class TestKafkaSource extends UtilitiesTestBase {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties props = createPropsForJsonSource(null, "earliest");
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider);
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
// 1. Extract without any checkpoint => get all the data, respecting sourceLimit
@@ -149,11 +152,11 @@ public class TestKafkaSource extends UtilitiesTestBase {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties earliestProps = createPropsForJsonSource(null, "earliest");
Source earliestJsonSource = new JsonKafkaSource(earliestProps, jsc, sparkSession, schemaProvider);
Source earliestJsonSource = new JsonKafkaSource(earliestProps, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter earliestKafkaSource = new SourceFormatAdapter(earliestJsonSource);
TypedProperties latestProps = createPropsForJsonSource(null, "latest");
Source latestJsonSource = new JsonKafkaSource(latestProps, jsc, sparkSession, schemaProvider);
Source latestJsonSource = new JsonKafkaSource(latestProps, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter latestKafkaSource = new SourceFormatAdapter(latestJsonSource);
// 1. Extract with a none data kafka checkpoint
@@ -181,7 +184,7 @@ public class TestKafkaSource extends UtilitiesTestBase {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties props = createPropsForJsonSource(Long.MAX_VALUE, "earliest");
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider);
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
Config.maxEventsFromKafkaSource = 500;
@@ -210,7 +213,7 @@ public class TestKafkaSource extends UtilitiesTestBase {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties props = createPropsForJsonSource(Long.MAX_VALUE, "earliest");
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider);
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
Config.maxEventsFromKafkaSource = 500;
@@ -242,7 +245,7 @@ public class TestKafkaSource extends UtilitiesTestBase {
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
TypedProperties props = createPropsForJsonSource(500L, "earliest");
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider);
Source jsonSource = new JsonKafkaSource(props, jsc, sparkSession, schemaProvider, metrics);
SourceFormatAdapter kafkaSource = new SourceFormatAdapter(jsonSource);
// 1. Extract without any checkpoint => get all the data, respecting sourceLimit