[HUDI-1587] Add latency and freshness support (#2541)
Save min and max of event time in each commit and compute the latency and freshness metrics.
This commit is contained in:
@@ -25,6 +25,8 @@ import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.apache.hudi.avro.HoodieAvroUtils.bytesToAvro;
|
||||
@@ -37,6 +39,9 @@ import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldVal;
|
||||
*/
|
||||
public class DefaultHoodieRecordPayload extends OverwriteWithLatestAvroPayload {
|
||||
|
||||
public static final String METADATA_EVENT_TIME_KEY = "metadata.event_time.key";
|
||||
private Option<Object> eventTime = Option.empty();
|
||||
|
||||
public DefaultHoodieRecordPayload(GenericRecord record, Comparable orderingVal) {
|
||||
super(record, orderingVal);
|
||||
}
|
||||
@@ -71,6 +76,10 @@ public class DefaultHoodieRecordPayload extends OverwriteWithLatestAvroPayload {
|
||||
|
||||
/*
|
||||
* We reached a point where the value is disk is older than the incoming record.
|
||||
*/
|
||||
eventTime = Option.ofNullable(getNestedFieldVal(incomingRecord, properties.getProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP), true));
|
||||
|
||||
/*
|
||||
* Now check if the incoming record is a delete record.
|
||||
*/
|
||||
if (isDeleteRecord(incomingRecord)) {
|
||||
@@ -79,4 +88,13 @@ public class DefaultHoodieRecordPayload extends OverwriteWithLatestAvroPayload {
|
||||
return Option.of(incomingRecord);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Option<Map<String, String>> getMetadata() {
|
||||
Map<String, String> metadata = new HashMap<>();
|
||||
if (eventTime.isPresent()) {
|
||||
metadata.put(METADATA_EVENT_TIME_KEY, String.valueOf(eventTime.get()));
|
||||
}
|
||||
return metadata.isEmpty() ? Option.empty() : Option.of(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,14 +18,16 @@
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.fasterxml.jackson.annotation.PropertyAccessor;
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@@ -323,6 +325,20 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
return totalUpsertTime;
|
||||
}
|
||||
|
||||
public Pair<Option<Long>, Option<Long>> getMinAndMaxEventTime() {
|
||||
long minEventTime = Long.MAX_VALUE;
|
||||
long maxEventTime = Long.MIN_VALUE;
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : partitionToWriteStats.entrySet()) {
|
||||
for (HoodieWriteStat writeStat : entry.getValue()) {
|
||||
minEventTime = writeStat.getMinEventTime() != null ? Math.min(writeStat.getMinEventTime(), minEventTime) : minEventTime;
|
||||
maxEventTime = writeStat.getMaxEventTime() != null ? Math.max(writeStat.getMaxEventTime(), maxEventTime) : maxEventTime;
|
||||
}
|
||||
}
|
||||
return Pair.of(
|
||||
minEventTime == Long.MAX_VALUE ? Option.empty() : Option.of(minEventTime),
|
||||
maxEventTime == Long.MIN_VALUE ? Option.empty() : Option.of(maxEventTime));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
|
||||
@@ -24,9 +24,20 @@ package org.apache.hudi.common.model;
|
||||
*/
|
||||
public class HoodiePayloadProps {
|
||||
|
||||
// payload ordering field. This could be used to merge incoming record with that in storage. Implementations of
|
||||
// {@link HoodieRecordPayload} can leverage if required.
|
||||
/**
|
||||
* Property for payload ordering field; to be used to merge incoming record with that in storage.
|
||||
* Implementations of {@link HoodieRecordPayload} can leverage if required.
|
||||
*
|
||||
* @see DefaultHoodieRecordPayload
|
||||
*/
|
||||
public static final String PAYLOAD_ORDERING_FIELD_PROP = "hoodie.payload.ordering.field";
|
||||
public static String DEFAULT_PAYLOAD_ORDERING_FIELD_VAL = "ts";
|
||||
|
||||
/**
|
||||
* Property for payload event time field; to be used to extract source event time info.
|
||||
*
|
||||
* @see DefaultHoodieRecordPayload
|
||||
*/
|
||||
public static final String PAYLOAD_EVENT_TIME_FIELD_PROP = "hoodie.payload.event.time.field";
|
||||
public static String DEFAULT_PAYLOAD_EVENT_TIME_FIELD_VAL = "ts";
|
||||
}
|
||||
|
||||
@@ -143,6 +143,18 @@ public class HoodieWriteStat implements Serializable {
|
||||
*/
|
||||
private long fileSizeInBytes;
|
||||
|
||||
/**
|
||||
* The earliest of incoming records' event times (Epoch ms) for calculating latency.
|
||||
*/
|
||||
@Nullable
|
||||
private Long minEventTime;
|
||||
|
||||
/**
|
||||
* The latest of incoming records' event times (Epoch ms) for calculating freshness.
|
||||
*/
|
||||
@Nullable
|
||||
private Long maxEventTime;
|
||||
|
||||
@Nullable
|
||||
@JsonIgnore
|
||||
private RuntimeStats runtimeStats;
|
||||
@@ -303,6 +315,30 @@ public class HoodieWriteStat implements Serializable {
|
||||
this.fileSizeInBytes = fileSizeInBytes;
|
||||
}
|
||||
|
||||
public Long getMinEventTime() {
|
||||
return minEventTime;
|
||||
}
|
||||
|
||||
public void setMinEventTime(Long minEventTime) {
|
||||
if (this.minEventTime == null) {
|
||||
this.minEventTime = minEventTime;
|
||||
} else {
|
||||
this.minEventTime = Math.min(minEventTime, this.minEventTime);
|
||||
}
|
||||
}
|
||||
|
||||
public Long getMaxEventTime() {
|
||||
return maxEventTime;
|
||||
}
|
||||
|
||||
public void setMaxEventTime(Long maxEventTime) {
|
||||
if (this.maxEventTime == null) {
|
||||
this.maxEventTime = maxEventTime;
|
||||
} else {
|
||||
this.maxEventTime = Math.max(maxEventTime, this.maxEventTime);
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public RuntimeStats getRuntimeStats() {
|
||||
return runtimeStats;
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.format.DateTimeParseException;
|
||||
import java.util.Objects;
|
||||
|
||||
public class DateTimeUtils {
|
||||
|
||||
/**
|
||||
* Parse input String to a {@link java.time.Instant}.
|
||||
* @param s Input String should be Epoch time in millisecond or ISO-8601 format.
|
||||
*/
|
||||
public static Instant parseDateTime(String s) throws DateTimeParseException {
|
||||
ValidationUtils.checkArgument(Objects.nonNull(s), "Input String cannot be null.");
|
||||
try {
|
||||
return Instant.ofEpochMilli(Long.parseLong(s));
|
||||
} catch (NumberFormatException e) {
|
||||
return Instant.parse(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,12 +18,16 @@
|
||||
|
||||
package org.apache.hudi.common.model;
|
||||
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Type;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
@@ -31,6 +35,7 @@ import java.util.Properties;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Unit tests {@link DefaultHoodieRecordPayload}.
|
||||
@@ -50,6 +55,7 @@ public class TestDefaultHoodieRecordPayload {
|
||||
));
|
||||
props = new Properties();
|
||||
props.setProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, "ts");
|
||||
props.setProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP, "ts");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -104,4 +110,36 @@ public class TestDefaultHoodieRecordPayload {
|
||||
assertFalse(payload2.combineAndGetUpdateValue(record1, schema, props).isPresent());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetEmptyMetadata() {
|
||||
GenericRecord record = new GenericData.Record(schema);
|
||||
record.put("id", "1");
|
||||
record.put("partition", "partition0");
|
||||
record.put("ts", 0L);
|
||||
record.put("_hoodie_is_deleted", false);
|
||||
DefaultHoodieRecordPayload payload = new DefaultHoodieRecordPayload(Option.of(record));
|
||||
assertFalse(payload.getMetadata().isPresent());
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(longs = {1L, 1612542030000L})
|
||||
public void testGetEventTimeInMetadata(long eventTime) throws IOException {
|
||||
GenericRecord record1 = new GenericData.Record(schema);
|
||||
record1.put("id", "1");
|
||||
record1.put("partition", "partition0");
|
||||
record1.put("ts", 0L);
|
||||
record1.put("_hoodie_is_deleted", false);
|
||||
|
||||
GenericRecord record2 = new GenericData.Record(schema);
|
||||
record2.put("id", "1");
|
||||
record2.put("partition", "partition0");
|
||||
record2.put("ts", eventTime);
|
||||
record2.put("_hoodie_is_deleted", false);
|
||||
|
||||
DefaultHoodieRecordPayload payload2 = new DefaultHoodieRecordPayload(record2, eventTime);
|
||||
payload2.combineAndGetUpdateValue(record1, schema, props);
|
||||
assertTrue(payload2.getMetadata().isPresent());
|
||||
assertEquals(eventTime,
|
||||
Long.parseLong(payload2.getMetadata().get().get(DefaultHoodieRecordPayload.METADATA_EVENT_TIME_KEY)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.common.util;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
|
||||
import java.time.format.DateTimeParseException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
public class TestDateTimeUtils {
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(strings = {"0", "1612542030000", "2020-01-01T01:01:00Z", "1970-01-01T00:00:00.123456Z"})
|
||||
public void testParseStringIntoInstant(String s) {
|
||||
assertDoesNotThrow(() -> {
|
||||
DateTimeUtils.parseDateTime(s);
|
||||
});
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(strings = {"#", "0L", ""})
|
||||
public void testParseDateTimeThrowsException(String s) {
|
||||
assertThrows(DateTimeParseException.class, () -> {
|
||||
DateTimeUtils.parseDateTime(s);
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseDateTimeWithNull() {
|
||||
assertThrows(IllegalArgumentException.class, () -> {
|
||||
DateTimeUtils.parseDateTime(null);
|
||||
});
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user