1
0

[HUDI-2170] [HUDI-1763] Always choose the latest record for HoodieRecordPayload (#3401)

This commit is contained in:
swuferhong
2021-08-11 10:20:55 +08:00
committed by GitHub
parent d1b4aa59bf
commit 5448cdde7e
17 changed files with 110 additions and 46 deletions

View File

@@ -50,7 +50,7 @@ public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload>
}
@Override
public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
public HoodieJsonPayload preCombine(HoodieJsonPayload oldValue) {
return this;
}

View File

@@ -36,8 +36,8 @@ public class EmptyHoodieRecordPayload implements HoodieRecordPayload<EmptyHoodie
}
@Override
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload another) {
return another;
public EmptyHoodieRecordPayload preCombine(EmptyHoodieRecordPayload oldValue) {
return oldValue;
}
@Override

View File

@@ -37,6 +37,10 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
// java serializable
private final byte[] recordBytes;
public HoodieAvroPayload(GenericRecord record, Comparable<?> orderingVal) {
this(Option.of(record));
}
public HoodieAvroPayload(Option<GenericRecord> record) {
if (record.isPresent()) {
this.recordBytes = HoodieAvroUtils.avroToBytes(record.get());
@@ -46,7 +50,7 @@ public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload>
}
@Override
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
public HoodieAvroPayload preCombine(HoodieAvroPayload oldValue) {
return this;
}

View File

@@ -42,18 +42,20 @@ public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Seri
*/
@Deprecated
@PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
T preCombine(T another);
T preCombine(T oldValue);
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them before attempting to insert/upsert by taking in a property map.
* Implementation can leverage the property to decide their business logic to do preCombine.
* @param another instance of another {@link HoodieRecordPayload} to be combined with.
*
* @param oldValue instance of the old {@link HoodieRecordPayload} to be combined with.
* @param properties Payload related properties. For example pass the ordering field(s) name to extract from value in storage.
*
* @return the combined value
*/
@PublicAPIMethod(maturity = ApiMaturityLevel.STABLE)
default T preCombine(T another, Properties properties) {
return preCombine(another);
default T preCombine(T oldValue, Properties properties) {
return preCombine(oldValue);
}
/**

View File

@@ -47,10 +47,14 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
}
@Override
public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload another) {
// pick the payload with greatest ordering value
if (another.orderingVal.compareTo(orderingVal) > 0) {
return another;
public OverwriteWithLatestAvroPayload preCombine(OverwriteWithLatestAvroPayload oldValue) {
if (oldValue.recordBytes.length == 0) {
// use natural order for delete record
return this;
}
if (oldValue.orderingVal.compareTo(orderingVal) > 0) {
// pick the payload with greatest ordering value
return oldValue;
} else {
return this;
}

View File

@@ -83,6 +83,8 @@ public abstract class AbstractHoodieLogRecordScanner {
private final HoodieTableMetaClient hoodieTableMetaClient;
// Merge strategy to use when combining records from log
private final String payloadClassFQN;
// preCombine field
private final String preCombineField;
// simple key gen fields
private Option<Pair<String, String>> simpleKeyGenFields = Option.empty();
// Log File Paths
@@ -123,6 +125,7 @@ public abstract class AbstractHoodieLogRecordScanner {
this.hoodieTableMetaClient = HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(basePath).build();
// load class from the payload fully qualified class name
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
this.preCombineField = this.hoodieTableMetaClient.getTableConfig().getPreCombineField();
HoodieTableConfig tableConfig = this.hoodieTableMetaClient.getTableConfig();
if (!tableConfig.populateMetaFields()) {
this.simpleKeyGenFields = Option.of(Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()));
@@ -316,9 +319,9 @@ public abstract class AbstractHoodieLogRecordScanner {
protected HoodieRecord<?> createHoodieRecord(IndexedRecord rec) {
if (!simpleKeyGenFields.isPresent()) {
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.withOperationField);
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.preCombineField, this.withOperationField);
} else {
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.simpleKeyGenFields.get(), this.withOperationField);
return SpillableMapUtils.convertToHoodieRecordPayload((GenericRecord) rec, this.payloadClassFQN, this.preCombineField, this.simpleKeyGenFields.get(), this.withOperationField);
}
}

View File

@@ -40,13 +40,13 @@ public class HoodieFileSliceReader<T extends HoodieRecordPayload> implements Ite
public static <R extends IndexedRecord, T> HoodieFileSliceReader getFileSliceReader(
HoodieFileReader<R> baseFileReader, HoodieMergedLogRecordScanner scanner, Schema schema, String payloadClass,
Option<Pair<String,String>> simpleKeyGenFieldsOpt) throws IOException {
String preCombineField, Option<Pair<String,String>> simpleKeyGenFieldsOpt) throws IOException {
Iterator<R> baseIterator = baseFileReader.getRecordIterator(schema);
while (baseIterator.hasNext()) {
GenericRecord record = (GenericRecord) baseIterator.next();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = simpleKeyGenFieldsOpt.isPresent()
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField())
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, scanner.isWithOperationField());
? SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, preCombineField, simpleKeyGenFieldsOpt.get(), scanner.isWithOperationField())
: SpillableMapUtils.convertToHoodieRecordPayload(record, payloadClass, preCombineField, scanner.isWithOperationField());
scanner.processNextRecord(hoodieRecord);
}
return new HoodieFileSliceReader(scanner.iterator());

View File

@@ -27,6 +27,7 @@ import org.apache.hudi.common.util.collection.BitCaskDiskMap.FileEntry;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieCorruptedDataException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import java.io.IOException;
@@ -113,26 +114,42 @@ public class SpillableMapUtils {
/**
* Utility method to convert bytes to HoodieRecord using schema and payload class.
*/
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, boolean withOperationField) {
return convertToHoodieRecordPayload(rec, payloadClazz, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD), withOperationField);
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz, String preCombineField, boolean withOperationField) {
return convertToHoodieRecordPayload(rec, payloadClazz, preCombineField, Pair.of(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.PARTITION_PATH_METADATA_FIELD), withOperationField);
}
/**
* Utility method to convert bytes to HoodieRecord using schema and payload class.
*/
public static <R> R convertToHoodieRecordPayload(GenericRecord rec, String payloadClazz,
Pair<String, String> recordKeyPartitionPathPair,
String preCombineField, Pair<String, String> recordKeyPartitionPathPair,
boolean withOperationField) {
String recKey = rec.get(recordKeyPartitionPathPair.getLeft()).toString();
String partitionPath = rec.get(recordKeyPartitionPathPair.getRight()).toString();
Object preCombineVal = getPreCombineVal(rec, preCombineField);
HoodieOperation operation = withOperationField
? HoodieOperation.fromName(getNullableValAsString(rec, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.of(rec)}, Option.class), operation);
ReflectionUtils.loadPayload(payloadClazz, new Object[] {rec, preCombineVal}, GenericRecord.class, Comparable.class), operation);
return (R) hoodieRecord;
}
/**
* Returns the preCombine value with given field name.
*
* @param rec The avro record
* @param preCombineField The preCombine field name
* @return the preCombine field value or 0 if the field does not exist in the avro schema
*/
private static Object getPreCombineVal(GenericRecord rec, String preCombineField) {
if (preCombineField == null) {
return 0;
}
Schema.Field field = rec.getSchema().getField(preCombineField);
return field == null ? 0 : rec.get(field.pos());
}
/**
* Utility method to convert bytes to HoodieRecord using schema and payload class.
*/

View File

@@ -133,11 +133,9 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
Option<GenericRecord> baseRecord = baseFileReader.getRecordByKey(key);
if (baseRecord.isPresent()) {
hoodieRecord = tableConfig.populateMetaFields()
? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), false)
: SpillableMapUtils.convertToHoodieRecordPayload(
baseRecord.get(),
tableConfig.getPayloadClass(),
Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()), false);
? SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), tableConfig.getPreCombineField(), false)
: SpillableMapUtils.convertToHoodieRecordPayload(baseRecord.get(), tableConfig.getPayloadClass(), tableConfig.getPreCombineField(),
Pair.of(tableConfig.getRecordKeyFieldProp(), tableConfig.getPartitionFieldProp()), false);
metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
}
}

View File

@@ -70,6 +70,10 @@ public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadata
private int type = 0;
private Map<String, HoodieMetadataFileInfo> filesystemMetadata = null;
public HoodieMetadataPayload(GenericRecord record, Comparable<?> orderingVal) {
this(Option.of(record));
}
public HoodieMetadataPayload(Option<GenericRecord> record) {
if (record.isPresent()) {
// This can be simplified using SpecificData.deepcopy once this bug is fixed

View File

@@ -44,7 +44,7 @@ public class AvroBinaryTestPayload implements HoodieRecordPayload {
}
@Override
public HoodieRecordPayload preCombine(HoodieRecordPayload another) {
public HoodieRecordPayload preCombine(HoodieRecordPayload oldValue) {
return this;
}

View File

@@ -233,7 +233,7 @@ public class HoodieTestDataGenerator {
public static RawTripTestPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException {
GenericRecord rec = generateGenericRecord(key.getRecordKey(), key.getPartitionPath(), "rider-" + instantTime, "driver-" + instantTime, 0,
true, false);
return new RawTripTestPayload(Option.of(rec.toString()), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA, true);
return new RawTripTestPayload(Option.of(rec.toString()), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA, true, 0L);
}
/**
@@ -574,7 +574,7 @@ public class HoodieTestDataGenerator {
public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
RawTripTestPayload payload =
new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true);
new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true, 0L);
return new HoodieRecord(key, payload);
}

View File

@@ -53,9 +53,10 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
private byte[] jsonDataCompressed;
private int dataSize;
private boolean isDeleted;
private Comparable orderingVal;
public RawTripTestPayload(Option<String> jsonData, String rowKey, String partitionPath, String schemaStr,
Boolean isDeleted) throws IOException {
Boolean isDeleted, Comparable orderingVal) throws IOException {
if (jsonData.isPresent()) {
this.jsonDataCompressed = compressData(jsonData.get());
this.dataSize = jsonData.get().length();
@@ -63,10 +64,11 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
this.rowKey = rowKey;
this.partitionPath = partitionPath;
this.isDeleted = isDeleted;
this.orderingVal = orderingVal;
}
public RawTripTestPayload(String jsonData, String rowKey, String partitionPath, String schemaStr) throws IOException {
this(Option.of(jsonData), rowKey, partitionPath, schemaStr, false);
this(Option.of(jsonData), rowKey, partitionPath, schemaStr, false, 0L);
}
public RawTripTestPayload(String jsonData) throws IOException {
@@ -105,8 +107,13 @@ public class RawTripTestPayload implements HoodieRecordPayload<RawTripTestPayloa
}
@Override
public RawTripTestPayload preCombine(RawTripTestPayload another) {
return another;
public RawTripTestPayload preCombine(RawTripTestPayload oldValue) {
if (oldValue.orderingVal.compareTo(orderingVal) > 0) {
// pick the payload with greatest ordering value
return oldValue;
} else {
return this;
}
}
@Override