[HUDI-2170] [HUDI-1763] Always choose the latest record for HoodieRecordPayload (#3401)
This commit is contained in:
@@ -50,7 +50,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
|
||||
public HoodieJsonPayload preCombine(HoodieJsonPayload oldValue) {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -36,8 +36,8 @@ public class EmptyHoodieRecordPayload implements HoodieRecordPayload<EmptyHoodie
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload another) {
|
||||
return another;
|
||||
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload oldValue) {
|
||||
return oldValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -37,6 +37,10 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
|
||||
// java serializable
|
||||
private final byte[] recordBytes;
|
||||
|
||||
public HoodieAvroPayload(GenericRecord record, Comparable<?> orderingVal) {
|
||||
this(Option.of(record));
|
||||
}
|
||||
|
||||
public HoodieAvroPayload(Option<GenericRecord> record) {
|
||||
if (record.isPresent()) {
|
||||
this.recordBytes = HoodieAvroUtils.avroToBytes(record.get());
|
||||
@@ -46,7 +50,7 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
|
||||
public HoodieAvroPayload preCombine(HoodieAvroPayload oldValue) {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -42,18 +42,20 @@ public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Seri
|
||||
*/
|
||||
@Deprecated
|
||||
@PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
|
||||
T preCombine(T another);
|
||||
T preCombine(T oldValue);
|
||||
|
||||
/**
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to insert/upsert by taking in a property map.
|
||||
* Implementation can leverage the property to decide their business logic to do preCombine.
|
||||
* @param another instance of another {@link HoodieRecordPayload} to be combined with.
|
||||
*
|
||||
* @param oldValue instance of the old {@link HoodieRecordPayload} to be combined with.
|
||||
* @param properties Payload related properties. For example pass the ordering field(s) name to extract from value in storage.
|
||||
*
|
||||
* @return the combined value
|
||||
*/
|
||||
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
|
||||
default T preCombine(T another, Properties properties) {
|
||||
return preCombine(another);
|
||||
default T preCombine(T oldValue, Properties properties) {
|
||||
return preCombine(oldValue);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -47,10 +47,14 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
|
||||
}
|
||||
|
||||
@Override
|
||||
public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload another) {
|
||||
// pick the payload with greatest ordering value
|
||||
if (another.orderingVal.compareTo(orderingVal) > 0) {
|
||||
return another;
|
||||
public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload oldValue) {
|
||||
if (oldValue.recordBytes.length == 0) {
|
||||
// use natural order for delete record
|
||||
return this;
|
||||
}
|
||||
if (oldValue.orderingVal.compareTo(orderingVal) > 0) {
|
||||
// pick the payload with greatest ordering value
|
||||
return oldValue;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -83,6 +83,8 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
private final HoodieTableMetaClient hoodieTableMetaClient;
|
||||
// Merge strategy to use when combining records from log
|
||||
private final String payloadClassFQN;
|
||||
// preCombine field
|
||||
private final String preCombineField;
|
||||
// simple key gen fields
|
||||
private Option<Pair<String, String>> simpleKeyGenFields = Option.empty();
|
||||
// Log File Paths
|
||||
@@ -123,6 +125,7 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
this.hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build();
|
||||
// load class from the payload fully qualified class name
|
||||
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
|
||||
this.preCombineField = this.hoodieTableMetaClient.getTableConfig().getPreCombineField();
|
||||
HoodieTableConfig tableConfig = this.hoodieTableMetaClient.getTableConfig();
|
||||
if (!tableConfig.populateMetaFields()) {
|
||||
this.simpleKeyGenFields = Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()));
|
||||
@@ -316,9 +319,9 @@ public abstract class AbstractHoodieLogRecordScanner {
|
||||
|
||||
protected HoodieRecord<?> createHoodieRecord(IndexedRecord rec) {
|
||||
if (!simpleKeyGenFields.isPresent()) {
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.withOperationField);
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.preCombineField, this.withOperationField);
|
||||
} else {
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.simpleKeyGenFields.get(), this.withOperationField);
|
||||
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.preCombineField, this.simpleKeyGenFields.get(), this.withOperationField);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -40,13 +40,13 @@ public class HoodieFileSliceReader<T extends HoodieRecordPayload> implements Ite
|
||||
|
||||
public static <R extends IndexedRecord, T> HoodieFileSliceReader getFileSliceReader(
|
||||
HoodieFileReader<R> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass,
|
||||
Option<Pair<String,String>> simpleKeyGenFieldsOpt) throws IOException {
|
||||
String preCombineField, Option<Pair<String,String>> simpleKeyGenFieldsOpt) throws IOException {
|
||||
Iterator<R> baseIterator = baseFileReader.getRecordIterator(schema);
|
||||
while (baseIterator.hasNext()) {
|
||||
GenericRecord record = (GenericRecord) baseIterator.next();
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = simpleKeyGenFieldsOpt.isPresent()
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField())
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, scanner.isWithOperationField());
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, preCombineField, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField())
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, preCombineField, scanner.isWithOperationField());
|
||||
scanner.processNextRecord(hoodieRecord);
|
||||
}
|
||||
return new HoodieFileSliceReader(scanner.iterator());
|
||||
|
||||
@@ -27,6 +27,7 @@ import org.apache.hudi.common.util.collection.BitCaskDiskMap.FileEntry;
|
||||
import org.apache.hudi.common.util.collection.Pair;
|
||||
import org.apache.hudi.exception.HoodieCorruptedDataException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -113,26 +114,42 @@ public class SpillableMapUtils {
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class.
|
||||
*/
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, boolean withOperationField) {
|
||||
return convertToHoodieRecordPayload(rec, payloadClazz, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD), withOperationField);
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, String preCombineField, boolean withOperationField) {
|
||||
return convertToHoodieRecordPayload(rec, payloadClazz, preCombineField, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD), withOperationField);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class.
|
||||
*/
|
||||
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz,
|
||||
Pair<String, String> recordKeyPartitionPathPair,
|
||||
String preCombineField, Pair<String, String> recordKeyPartitionPathPair,
|
||||
boolean withOperationField) {
|
||||
String recKey = rec.get(recordKeyPartitionPathPair.getLeft()).toString();
|
||||
String partitionPath = rec.get(recordKeyPartitionPathPair.getRight()).toString();
|
||||
Object preCombineVal = getPreCombineVal(rec, preCombineField);
|
||||
HoodieOperation operation = withOperationField
|
||||
? HoodieOperation.fromName(getNullableValAsString(rec, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class), operation);
|
||||
ReflectionUtils.loadPayload(payloadClazz, new Object[] {rec, preCombineVal}, GenericRecord.class, Comparable.class), operation);
|
||||
|
||||
return (R) hoodieRecord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the preCombine value with given field name.
|
||||
*
|
||||
* @param rec The avro record
|
||||
* @param preCombineField The preCombine field name
|
||||
* @return the preCombine field value or 0 if the field does not exist in the avro schema
|
||||
*/
|
||||
private static Object getPreCombineVal(GenericRecord rec, String preCombineField) {
|
||||
if (preCombineField == null) {
|
||||
return 0;
|
||||
}
|
||||
Schema.Field field = rec.getSchema().getField(preCombineField);
|
||||
return field == null ? 0 : rec.get(field.pos());
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to convert bytes to HoodieRecord using schema and payload class.
|
||||
*/
|
||||
|
||||
@@ -133,11 +133,9 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
|
||||
Option<GenericRecord> baseRecord = baseFileReader.getRecordByKey(key);
|
||||
if (baseRecord.isPresent()) {
|
||||
hoodieRecord = tableConfig.populateMetaFields()
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), false)
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(
|
||||
baseRecord.get(),
|
||||
tableConfig.getPayloadClass(),
|
||||
Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()), false);
|
||||
? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), tableConfig.getPreCombineField(), false)
|
||||
: SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), tableConfig.getPreCombineField(),
|
||||
Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()), false);
|
||||
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,6 +70,10 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
|
||||
private int type = 0;
|
||||
private Map<String, HoodieMetadataFileInfo> filesystemMetadata = null;
|
||||
|
||||
public HoodieMetadataPayload(GenericRecord record, Comparable<?> orderingVal) {
|
||||
this(Option.of(record));
|
||||
}
|
||||
|
||||
public HoodieMetadataPayload(Option<GenericRecord> record) {
|
||||
if (record.isPresent()) {
|
||||
// This can be simplified using SpecificData.deepcopy once this bug is fixed
|
||||
|
||||
@@ -44,7 +44,7 @@ public class AvroBinaryTestPayload implements HoodieRecordPayload {
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieRecordPayload preCombine(HoodieRecordPayload another) {
|
||||
public HoodieRecordPayload preCombine(HoodieRecordPayload oldValue) {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ public class HoodieTestDataGenerator {
|
||||
public static RawTripTestPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException {
|
||||
GenericRecord rec = generateGenericRecord(key.getRecordKey(), key.getPartitionPath(), "rider-" + instantTime, "driver-" + instantTime, 0,
|
||||
true, false);
|
||||
return new RawTripTestPayload(Option.of(rec.toString()), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA, true);
|
||||
return new RawTripTestPayload(Option.of(rec.toString()), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA, true, 0L);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -574,7 +574,7 @@ public class HoodieTestDataGenerator {
|
||||
|
||||
public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
|
||||
RawTripTestPayload payload =
|
||||
new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true);
|
||||
new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true, 0L);
|
||||
return new HoodieRecord(key, payload);
|
||||
}
|
||||
|
||||
|
||||
@@ -53,9 +53,10 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
|
||||
private byte[] jsonDataCompressed;
|
||||
private int dataSize;
|
||||
private boolean isDeleted;
|
||||
private Comparable orderingVal;
|
||||
|
||||
public RawTripTestPayload(Option<String> jsonData, String rowKey, String partitionPath, String schemaStr,
|
||||
Boolean isDeleted) throws IOException {
|
||||
Boolean isDeleted, Comparable orderingVal) throws IOException {
|
||||
if (jsonData.isPresent()) {
|
||||
this.jsonDataCompressed = compressData(jsonData.get());
|
||||
this.dataSize = jsonData.get().length();
|
||||
@@ -63,10 +64,11 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
|
||||
this.rowKey = rowKey;
|
||||
this.partitionPath = partitionPath;
|
||||
this.isDeleted = isDeleted;
|
||||
this.orderingVal = orderingVal;
|
||||
}
|
||||
|
||||
public RawTripTestPayload(String jsonData, String rowKey, String partitionPath, String schemaStr) throws IOException {
|
||||
this(Option.of(jsonData), rowKey, partitionPath, schemaStr, false);
|
||||
this(Option.of(jsonData), rowKey, partitionPath, schemaStr, false, 0L);
|
||||
}
|
||||
|
||||
public RawTripTestPayload(String jsonData) throws IOException {
|
||||
@@ -105,8 +107,13 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
|
||||
}
|
||||
|
||||
@Override
|
||||
public RawTripTestPayload preCombine(RawTripTestPayload another) {
|
||||
return another;
|
||||
public RawTripTestPayload preCombine(RawTripTestPayload oldValue) {
|
||||
if (oldValue.orderingVal.compareTo(orderingVal) > 0) {
|
||||
// pick the payload with greatest ordering value
|
||||
return oldValue;
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user