[HUDI-115] Adding DefaultHoodieRecordPayload to honor ordering with combineAndGetUpdateValue (#2311)
* Added ability to pass in `properties` to payload methods, so they can perform table/record specific merges * Added default methods so existing payload classes are backwards compatible. * Adding DefaultHoodiePayload to honor ordering while merging two records * Fixing default payload based on feedback
This commit is contained in:
committed by
GitHub
parent
5388c7f7a3
commit
33d338f392
@@ -39,13 +39,14 @@ import org.apache.hudi.common.util.StringUtils;
|
||||
import org.apache.hudi.common.util.TablePathUtils;
|
||||
import org.apache.hudi.config.HoodieCompactionConfig;
|
||||
import org.apache.hudi.config.HoodieIndexConfig;
|
||||
import org.apache.hudi.config.HoodiePayloadConfig;
|
||||
import org.apache.hudi.config.HoodieWriteConfig;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.exception.TableNotFoundException;
|
||||
import org.apache.hudi.hive.HiveSyncConfig;
|
||||
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.HoodieIndex.IndexType;
|
||||
import org.apache.hudi.keygen.KeyGenerator;
|
||||
import org.apache.hudi.keygen.parser.AbstractHoodieDateTimeParser;
|
||||
import org.apache.hudi.table.BulkInsertPartitioner;
|
||||
@@ -177,10 +178,12 @@ public class DataSourceUtils {
|
||||
}
|
||||
|
||||
return builder.forTable(tblName)
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
|
||||
.withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(IndexType.BLOOM).build())
|
||||
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
|
||||
.withPayloadClass(parameters.get(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY()))
|
||||
.withInlineCompaction(inlineCompact).build())
|
||||
.withPayloadConfig(HoodiePayloadConfig.newBuilder().withPayloadOrderingField(parameters.get(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY()))
|
||||
.build())
|
||||
// override above with Hoodie configs specified as options.
|
||||
.withProps(parameters).build();
|
||||
}
|
||||
|
||||
@@ -205,7 +205,6 @@ object DataSourceWriteOptions {
|
||||
val PRECOMBINE_FIELD_OPT_KEY = "hoodie.datasource.write.precombine.field"
|
||||
val DEFAULT_PRECOMBINE_FIELD_OPT_VAL = "ts"
|
||||
|
||||
|
||||
/**
|
||||
* Payload class used. Override this, if you like to roll your own merge logic, when upserting/inserting.
|
||||
* This will render any value set for `PRECOMBINE_FIELD_OPT_VAL` in-effective
|
||||
|
||||
@@ -17,12 +17,14 @@
|
||||
|
||||
package org.apache.hudi
|
||||
|
||||
import org.apache.avro.Schema
|
||||
import org.apache.avro.generic.GenericRecord
|
||||
import org.apache.hudi.avro.HoodieAvroUtils
|
||||
import org.apache.hudi.common.config.TypedProperties
|
||||
import org.apache.hudi.common.model.{EmptyHoodieRecordPayload, HoodieKey, OverwriteWithLatestAvroPayload}
|
||||
import org.apache.hudi.common.model.{BaseAvroPayload, DefaultHoodieRecordPayload, EmptyHoodieRecordPayload, HoodieKey, HoodiePayloadProps, OverwriteWithLatestAvroPayload}
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil
|
||||
import org.apache.hudi.common.util.Option
|
||||
import org.apache.hudi.config.HoodiePayloadConfig
|
||||
import org.apache.hudi.exception.{HoodieException, HoodieKeyException}
|
||||
import org.apache.hudi.keygen._
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities
|
||||
@@ -31,6 +33,8 @@ import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.{BeforeEach, Test}
|
||||
import org.scalatest.Assertions.fail
|
||||
|
||||
import scala.collection.JavaConverters.mapAsJavaMapConverter
|
||||
|
||||
/**
|
||||
* Tests on the default key generator, payload classes.
|
||||
*/
|
||||
@@ -567,6 +571,62 @@ class TestDataSourceDefaults {
|
||||
assertEquals("field2", combinedGR21.get("field1").toString)
|
||||
}
|
||||
|
||||
@Test def testOverwriteWithLatestAvroPayloadCombineAndGetUpdateValue() = {
|
||||
val baseOrderingVal: Object = baseRecord.get("favoriteIntNumber")
|
||||
val fieldSchema: Schema = baseRecord.getSchema().getField("favoriteIntNumber").schema()
|
||||
val props = new TypedProperties()
|
||||
props.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, "favoriteIntNumber");
|
||||
|
||||
val basePayload = new OverwriteWithLatestAvroPayload(baseRecord, HoodieAvroUtils.convertValueForSpecificDataTypes(fieldSchema, baseOrderingVal).asInstanceOf[Comparable[_]])
|
||||
|
||||
val laterRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 2, "001", "f1")
|
||||
val laterOrderingVal: Object = laterRecord.get("favoriteIntNumber")
|
||||
val newerPayload = new OverwriteWithLatestAvroPayload(laterRecord, HoodieAvroUtils.convertValueForSpecificDataTypes(fieldSchema, laterOrderingVal).asInstanceOf[Comparable[_]])
|
||||
|
||||
// it will provide the record with greatest combine value
|
||||
val preCombinedPayload = basePayload.preCombine(newerPayload)
|
||||
val precombinedGR = preCombinedPayload.getInsertValue(schema).get().asInstanceOf[GenericRecord]
|
||||
assertEquals("field2", precombinedGR.get("field1").toString)
|
||||
}
|
||||
|
||||
@Test def testDefaultHoodieRecordPayloadCombineAndGetUpdateValue() = {
|
||||
val baseOrderingVal: Object = baseRecord.get("favoriteIntNumber")
|
||||
val fieldSchema: Schema = baseRecord.getSchema().getField("favoriteIntNumber").schema()
|
||||
val props = new TypedProperties()
|
||||
props.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP, "favoriteIntNumber");
|
||||
|
||||
val laterRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 2, "001", "f1")
|
||||
val laterOrderingVal: Object = laterRecord.get("favoriteIntNumber")
|
||||
|
||||
val earlierRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 1, "000", "f1")
|
||||
val earlierOrderingVal: Object = earlierRecord.get("favoriteIntNumber")
|
||||
|
||||
val laterPayload = new DefaultHoodieRecordPayload(laterRecord,
|
||||
HoodieAvroUtils.convertValueForSpecificDataTypes(fieldSchema, laterOrderingVal).asInstanceOf[Comparable[_]])
|
||||
|
||||
val earlierPayload = new DefaultHoodieRecordPayload(earlierRecord,
|
||||
HoodieAvroUtils.convertValueForSpecificDataTypes(fieldSchema, earlierOrderingVal).asInstanceOf[Comparable[_]])
|
||||
|
||||
// it will provide the record with greatest combine value
|
||||
val preCombinedPayload = laterPayload.preCombine(earlierPayload)
|
||||
val precombinedGR = preCombinedPayload.getInsertValue(schema).get().asInstanceOf[GenericRecord]
|
||||
assertEquals("field2", precombinedGR.get("field1").toString)
|
||||
assertEquals(laterOrderingVal, precombinedGR.get("favoriteIntNumber"))
|
||||
|
||||
val earlierWithLater = earlierPayload.combineAndGetUpdateValue(laterRecord, schema, props)
|
||||
val earlierwithLaterGR = earlierWithLater.get().asInstanceOf[GenericRecord]
|
||||
assertEquals("field2", earlierwithLaterGR.get("field1").toString)
|
||||
assertEquals(laterOrderingVal, earlierwithLaterGR.get("favoriteIntNumber"))
|
||||
|
||||
val laterWithEarlier = laterPayload.combineAndGetUpdateValue(earlierRecord, schema, props)
|
||||
val laterWithEarlierGR = laterWithEarlier.get().asInstanceOf[GenericRecord]
|
||||
assertEquals("field2", laterWithEarlierGR.get("field1").toString)
|
||||
assertEquals(laterOrderingVal, laterWithEarlierGR.get("favoriteIntNumber"))
|
||||
}
|
||||
|
||||
@Test def testEmptyHoodieRecordPayload() = {
|
||||
val emptyPayload1 = new EmptyHoodieRecordPayload(baseRecord, 1)
|
||||
val laterRecord = SchemaTestUtil
|
||||
|
||||
Reference in New Issue
Block a user