[HUDI-2909] Handle logical type in TimestampBasedKeyGenerator (#4203)
* [HUDI-2909] Handle logical type in TimestampBasedKeyGenerator Timestampbased key generator was returning diff values for row writer and non row writer path. this patch fixes it and is guarded by a config flag (`hoodie.datasource.write.keygenerator.consistent.logical.timestamp.enabled`)
This commit is contained in:
@@ -49,6 +49,7 @@ import org.apache.hudi.exception.HoodieNotSupportedException;
|
||||
import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.keygen.SimpleAvroKeyGenerator;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorType;
|
||||
import org.apache.hudi.metrics.MetricsReporterType;
|
||||
import org.apache.hudi.metrics.datadog.DatadogHttpClient.ApiSite;
|
||||
@@ -889,6 +890,10 @@ public class HoodieWriteConfig extends HoodieConfig {
|
||||
return getString(KEYGENERATOR_CLASS_NAME);
|
||||
}
|
||||
|
||||
public boolean isConsistentLogicalTimestampEnabled() {
|
||||
return getBooleanOrDefault(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED);
|
||||
}
|
||||
|
||||
public Boolean shouldAutoCommit() {
|
||||
return getBoolean(AUTO_COMMIT_ENABLE);
|
||||
}
|
||||
|
||||
@@ -40,11 +40,11 @@ public class ComplexAvroKeyGenerator extends BaseKeyGenerator {
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields());
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields(), isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordPartitionPath(record, getPartitionPathFields(), hiveStylePartitioning, encodePartitionPath);
|
||||
return KeyGenUtils.getRecordPartitionPath(record, getPartitionPathFields(), hiveStylePartitioning, encodePartitionPath, isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ public class GlobalAvroDeleteKeyGenerator extends BaseKeyGenerator {
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields());
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields(), isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -65,7 +65,7 @@ public class KeyGenUtils {
|
||||
|
||||
/**
|
||||
* Extracts the record key fields in strings out of the given record key,
|
||||
* this is the reverse operation of {@link #getRecordKey(GenericRecord, String)}.
|
||||
* this is the reverse operation of {@link #getRecordKey(GenericRecord, String, boolean)}.
|
||||
*
|
||||
* @see SimpleAvroKeyGenerator
|
||||
* @see org.apache.hudi.keygen.ComplexAvroKeyGenerator
|
||||
@@ -89,11 +89,11 @@ public class KeyGenUtils {
|
||||
}
|
||||
}
|
||||
|
||||
public static String getRecordKey(GenericRecord record, List<String> recordKeyFields) {
|
||||
public static String getRecordKey(GenericRecord record, List<String> recordKeyFields, boolean consistentLogicalTimestampEnabled) {
|
||||
boolean keyIsNullEmpty = true;
|
||||
StringBuilder recordKey = new StringBuilder();
|
||||
for (String recordKeyField : recordKeyFields) {
|
||||
String recordKeyValue = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true);
|
||||
String recordKeyValue = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true, consistentLogicalTimestampEnabled);
|
||||
if (recordKeyValue == null) {
|
||||
recordKey.append(recordKeyField + ":" + NULL_RECORDKEY_PLACEHOLDER + ",");
|
||||
} else if (recordKeyValue.isEmpty()) {
|
||||
@@ -112,14 +112,14 @@ public class KeyGenUtils {
|
||||
}
|
||||
|
||||
public static String getRecordPartitionPath(GenericRecord record, List<String> partitionPathFields,
|
||||
boolean hiveStylePartitioning, boolean encodePartitionPath) {
|
||||
boolean hiveStylePartitioning, boolean encodePartitionPath, boolean consistentLogicalTimestampEnabled) {
|
||||
if (partitionPathFields.isEmpty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
StringBuilder partitionPath = new StringBuilder();
|
||||
for (String partitionPathField : partitionPathFields) {
|
||||
String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true);
|
||||
String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
|
||||
if (fieldVal == null || fieldVal.isEmpty()) {
|
||||
partitionPath.append(hiveStylePartitioning ? partitionPathField + "=" + HUDI_DEFAULT_PARTITION_PATH
|
||||
: HUDI_DEFAULT_PARTITION_PATH);
|
||||
@@ -135,8 +135,8 @@ public class KeyGenUtils {
|
||||
return partitionPath.toString();
|
||||
}
|
||||
|
||||
public static String getRecordKey(GenericRecord record, String recordKeyField) {
|
||||
String recordKey = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true);
|
||||
public static String getRecordKey(GenericRecord record, String recordKeyField, boolean consistentLogicalTimestampEnabled) {
|
||||
String recordKey = HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyField, true, consistentLogicalTimestampEnabled);
|
||||
if (recordKey == null || recordKey.isEmpty()) {
|
||||
throw new HoodieKeyException("recordKey value: \"" + recordKey + "\" for field: \"" + recordKeyField + "\" cannot be null or empty.");
|
||||
}
|
||||
@@ -144,8 +144,8 @@ public class KeyGenUtils {
|
||||
}
|
||||
|
||||
public static String getPartitionPath(GenericRecord record, String partitionPathField,
|
||||
boolean hiveStylePartitioning, boolean encodePartitionPath) {
|
||||
String partitionPath = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true);
|
||||
boolean hiveStylePartitioning, boolean encodePartitionPath, boolean consistentLogicalTimestampEnabled) {
|
||||
String partitionPath = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true, consistentLogicalTimestampEnabled);
|
||||
if (partitionPath == null || partitionPath.isEmpty()) {
|
||||
partitionPath = HUDI_DEFAULT_PARTITION_PATH;
|
||||
}
|
||||
|
||||
@@ -57,9 +57,9 @@ public class NonpartitionedAvroKeyGenerator extends BaseKeyGenerator {
|
||||
// 1. if there is only one record key field, the format of record key is just "<value>"
|
||||
// 2. if there are multiple record key fields, the format is "<field1>:<value1>,<field2>:<value2>,..."
|
||||
if (getRecordKeyFieldNames().size() == 1) {
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields().get(0));
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields().get(0), isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields());
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields(), isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
|
||||
public String getEmptyPartition() {
|
||||
|
||||
@@ -47,11 +47,11 @@ public class SimpleAvroKeyGenerator extends BaseKeyGenerator {
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields().get(0));
|
||||
return KeyGenUtils.getRecordKey(record, getRecordKeyFields().get(0), isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
return KeyGenUtils.getPartitionPath(record, getPartitionPathFields().get(0), hiveStylePartitioning, encodePartitionPath);
|
||||
return KeyGenUtils.getPartitionPath(record, getPartitionPathFields().get(0), hiveStylePartitioning, encodePartitionPath, isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@ import org.joda.time.format.DateTimeFormatter;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.math.BigDecimal;
|
||||
import java.sql.Timestamp;
|
||||
import java.util.TimeZone;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@@ -125,7 +126,7 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
|
||||
@Override
|
||||
public String getPartitionPath(GenericRecord record) {
|
||||
Object partitionVal = HoodieAvroUtils.getNestedFieldVal(record, getPartitionPathFields().get(0), true);
|
||||
Object partitionVal = HoodieAvroUtils.getNestedFieldVal(record, getPartitionPathFields().get(0), true, isConsistentLogicalTimestampEnabled());
|
||||
if (partitionVal == null) {
|
||||
partitionVal = getDefaultPartitionVal();
|
||||
}
|
||||
@@ -191,6 +192,8 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
timeMs = convertLongTimeToMillis(((Float) partitionVal).longValue());
|
||||
} else if (partitionVal instanceof Long) {
|
||||
timeMs = convertLongTimeToMillis((Long) partitionVal);
|
||||
} else if (partitionVal instanceof Timestamp && isConsistentLogicalTimestampEnabled()) {
|
||||
timeMs = ((Timestamp) partitionVal).getTime();
|
||||
} else if (partitionVal instanceof Integer) {
|
||||
timeMs = convertLongTimeToMillis(((Integer) partitionVal).longValue());
|
||||
} else if (partitionVal instanceof BigDecimal) {
|
||||
@@ -225,5 +228,4 @@ public class TimestampBasedAvroKeyGenerator extends SimpleAvroKeyGenerator {
|
||||
}
|
||||
return MILLISECONDS.convert(partitionVal, timeUnit);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ public class HoodieTableMetadataKeyGenerator extends BaseKeyGenerator {
|
||||
|
||||
@Override
|
||||
public String getRecordKey(GenericRecord record) {
|
||||
return KeyGenUtils.getRecordKey(record, HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY);
|
||||
return KeyGenUtils.getRecordKey(record, HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY, isConsistentLogicalTimestampEnabled());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user