New Features in DeltaStreamer :
(1) Apply transformation when using delta-streamer to ingest data. (2) Add Hudi Incremental Source for Delta Streamer (3) Allow delta-streamer config-property to be passed as command-line (4) Add Hive Integration to Delta-Streamer and address Review comments (5) Ensure MultiPartKeysValueExtractor handle hive style partition description (6) Reuse same spark session on both source and transformer (7) Support extracting partition fields from _hoodie_partition_path for HoodieIncrSource (8) Reuse Binary Avro coders (9) Add push down filter for Incremental source (10) Add Hoodie DeltaStreamer metrics to track total time taken
This commit is contained in:
committed by
vinoth chandar
parent
c70dbc13e9
commit
3a0044216c
@@ -16,8 +16,8 @@
|
||||
|
||||
package com.uber.hoodie.metrics;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import static com.uber.hoodie.metrics.Metrics.registerGauge;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.uber.hoodie.common.model.HoodieCommitMetadata;
|
||||
@@ -177,18 +177,6 @@ public class HoodieMetrics {
|
||||
return config == null ? null : String.format("%s.%s.%s", tableName, action, metric);
|
||||
}
|
||||
|
||||
void registerGauge(String metricName, final long value) {
|
||||
try {
|
||||
MetricRegistry registry = Metrics.getInstance().getRegistry();
|
||||
registry.register(metricName, (Gauge<Long>) () -> value);
|
||||
} catch (Exception e) {
|
||||
// Here we catch all exception, so the major upsert pipeline will not be affected if the
|
||||
// metrics system
|
||||
// has some issues.
|
||||
logger.error("Failed to send metrics: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* By default, the timer context returns duration with nano seconds. Convert it to millisecond.
|
||||
*/
|
||||
|
||||
@@ -16,17 +16,21 @@
|
||||
|
||||
package com.uber.hoodie.metrics;
|
||||
|
||||
import com.codahale.metrics.Gauge;
|
||||
import com.codahale.metrics.MetricRegistry;
|
||||
import com.google.common.io.Closeables;
|
||||
import com.uber.hoodie.config.HoodieWriteConfig;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import java.io.Closeable;
|
||||
import org.apache.commons.configuration.ConfigurationException;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* This is the main class of the metrics system.
|
||||
*/
|
||||
public class Metrics {
|
||||
private static Logger logger = LogManager.getLogger(Metrics.class);
|
||||
|
||||
private static volatile boolean initialized = false;
|
||||
private static Metrics metrics = null;
|
||||
@@ -72,6 +76,18 @@ public class Metrics {
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
public static void registerGauge(String metricName, final long value) {
|
||||
try {
|
||||
MetricRegistry registry = Metrics.getInstance().getRegistry();
|
||||
registry.register(metricName, (Gauge<Long>) () -> value);
|
||||
} catch (Exception e) {
|
||||
// Here we catch all exception, so the major upsert pipeline will not be affected if the
|
||||
// metrics system
|
||||
// has some issues.
|
||||
logger.error("Failed to send metrics: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
public MetricRegistry getRegistry() {
|
||||
return registry;
|
||||
}
|
||||
|
||||
@@ -75,7 +75,9 @@ public class HoodieTestDataGenerator {
|
||||
+ "{\"name\": \"end_lat\", \"type\": \"double\"},"
|
||||
+ "{\"name\": \"end_lon\", \"type\": \"double\"},"
|
||||
+ "{\"name\":\"fare\",\"type\": \"double\"}]}";
|
||||
public static Schema avroSchema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA));
|
||||
public static Schema avroSchema = new Schema.Parser().parse(TRIP_EXAMPLE_SCHEMA);
|
||||
public static Schema avroSchemaWithMetadataFields = HoodieAvroUtils.addMetadataFields(avroSchema);
|
||||
|
||||
private static Random rand = new Random(46474747);
|
||||
|
||||
private List<KeyPartition> existingKeysList = new ArrayList<>();
|
||||
@@ -100,7 +102,6 @@ public class HoodieTestDataGenerator {
|
||||
*/
|
||||
public static TestRawTripPayload generateRandomValue(HoodieKey key, String commitTime) throws IOException {
|
||||
GenericRecord rec = generateGenericRecord(key.getRecordKey(), "rider-" + commitTime, "driver-" + commitTime, 0.0);
|
||||
HoodieAvroUtils.addCommitMetadataToRecord(rec, commitTime, "-1");
|
||||
return new TestRawTripPayload(rec.toString(), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA);
|
||||
}
|
||||
|
||||
|
||||
@@ -160,7 +160,8 @@ public class TestHoodieCompactor {
|
||||
|
||||
// Write them to corresponding avro logfiles
|
||||
HoodieTestUtils
|
||||
.writeRecordsToLogFiles(fs, metaClient.getBasePath(), HoodieTestDataGenerator.avroSchema, updatedRecords);
|
||||
.writeRecordsToLogFiles(fs, metaClient.getBasePath(), HoodieTestDataGenerator.avroSchemaWithMetadataFields,
|
||||
updatedRecords);
|
||||
|
||||
// Verify that all data file has one log file
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
package com.uber.hoodie.metrics;
|
||||
|
||||
import static com.uber.hoodie.metrics.Metrics.registerGauge;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
@@ -39,7 +40,7 @@ public class TestHoodieMetrics {
|
||||
|
||||
@Test
|
||||
public void testRegisterGauge() {
|
||||
metrics.registerGauge("metric1", 123L);
|
||||
registerGauge("metric1", 123L);
|
||||
assertTrue(Metrics.getInstance().getRegistry().getGauges().get("metric1").getValue().toString().equals("123"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -671,8 +671,8 @@ public class TestMergeOnReadTable {
|
||||
|
||||
// Write them to corresponding avro logfiles
|
||||
HoodieTestUtils
|
||||
.writeRecordsToLogFiles(metaClient.getFs(), metaClient.getBasePath(), HoodieTestDataGenerator.avroSchema,
|
||||
updatedRecords);
|
||||
.writeRecordsToLogFiles(metaClient.getFs(), metaClient.getBasePath(),
|
||||
HoodieTestDataGenerator.avroSchemaWithMetadataFields, updatedRecords);
|
||||
|
||||
// Verify that all data file has one log file
|
||||
metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath);
|
||||
|
||||
Reference in New Issue
Block a user