[HUDI-4039] Make sure all builtin KeyGenerators properly implement Spark specific APIs (#5523)
This set of changes makes sure that all builtin KeyGenerators properly implement Spark-specific APIs in a performant way (minimizing key-generators overhead)
This commit is contained in:
@@ -28,6 +28,7 @@ import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.InternalRow;
|
||||
import org.apache.spark.unsafe.types.UTF8String;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -83,7 +84,6 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
public void testWrongRecordKeyField() {
|
||||
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -98,7 +98,7 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
Assertions.assertEquals(UTF8String.fromString("timestamp=4357686/ts_ms=2020-03-21"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -107,7 +107,7 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "timestamp");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFieldNames().size(), 1);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
@@ -119,9 +119,9 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA,
|
||||
AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
|
||||
Assertions.assertEquals(partitionPath, compositeKeyGenerator.getPartitionPath(row));
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
|
||||
Assertions.assertEquals(UTF8String.fromString(partitionPath), compositeKeyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -130,7 +130,7 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key,timestamp");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "rider,driver");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFieldNames().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 2);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
@@ -144,10 +144,10 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA,
|
||||
AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
|
||||
Assertions.assertEquals(partitionPath, compositeKeyGenerator.getPartitionPath(row));
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
|
||||
Assertions.assertEquals(UTF8String.fromString(partitionPath), compositeKeyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -156,7 +156,7 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key,timestamp");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFieldNames().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
@@ -170,9 +170,9 @@ public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record, HoodieTestDataGenerator.AVRO_SCHEMA,
|
||||
AvroConversionUtils.convertAvroSchemaToStructType(HoodieTestDataGenerator.AVRO_SCHEMA));
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(row), partitionPath);
|
||||
Assertions.assertEquals(partitionPath, compositeKeyGenerator.getPartitionPath(row));
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(compositeKeyGenerator.getPartitionPath(internalRow, row.schema()), partitionPath);
|
||||
Assertions.assertEquals(UTF8String.fromString(partitionPath), compositeKeyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.InternalRow;
|
||||
import org.apache.spark.unsafe.types.UTF8String;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -145,13 +146,13 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
(BuiltinKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
Assertions.assertEquals("key1", key.getRecordKey());
|
||||
Assertions.assertEquals("timestamp=4357686", key.getPartitionPath());
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
Assertions.assertEquals("key1", keyGenerator.getRecordKey(row));
|
||||
Assertions.assertEquals("timestamp=4357686", keyGenerator.getPartitionPath(row));
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686");
|
||||
Assertions.assertEquals(UTF8String.fromString("timestamp=4357686"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -170,13 +171,13 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "ts_ms=20200321");
|
||||
Assertions.assertEquals("key1", key.getRecordKey());
|
||||
Assertions.assertEquals("ts_ms=20200321", key.getPartitionPath());
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "ts_ms=20200321");
|
||||
Assertions.assertEquals("key1", keyGenerator.getRecordKey(row));
|
||||
Assertions.assertEquals("ts_ms=20200321", keyGenerator.getPartitionPath(row));
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "ts_ms=20200321");
|
||||
Assertions.assertEquals(UTF8String.fromString("ts_ms=20200321"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -202,7 +203,7 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
Assertions.assertTrue(keyGenerator.getPartitionPath(row).isEmpty());
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertTrue(keyGenerator.getPartitionPath(internalRow, row.schema()).isEmpty());
|
||||
Assertions.assertEquals(0, keyGenerator.getPartitionPath(internalRow, row.schema()).numBytes());
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -345,15 +346,15 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
Assertions.assertEquals("_row_key:key1,pii_col:pi", key.getRecordKey());
|
||||
Assertions.assertEquals("timestamp=4357686", key.getPartitionPath());
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
Assertions.assertEquals("_row_key:key1,pii_col:pi", keyGenerator.getRecordKey(row));
|
||||
Assertions.assertEquals("timestamp=4357686", keyGenerator.getPartitionPath(row));
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686");
|
||||
Assertions.assertEquals(UTF8String.fromString("timestamp=4357686"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -372,14 +373,14 @@ public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=20200321");
|
||||
Assertions.assertEquals("_row_key:key1,pii_col:pi", key.getRecordKey());
|
||||
Assertions.assertEquals("timestamp=4357686/ts_ms=20200321", key.getPartitionPath());
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=20200321");
|
||||
Assertions.assertEquals("_row_key:key1,pii_col:pi", keyGenerator.getRecordKey(row));
|
||||
Assertions.assertEquals("timestamp=4357686/ts_ms=20200321", keyGenerator.getPartitionPath(row));
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686/ts_ms=20200321");
|
||||
Assertions.assertEquals(UTF8String.fromString("timestamp=4357686/ts_ms=20200321"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +68,6 @@ public class TestGlobalDeleteRecordGenerator extends KeyGeneratorTestUtilities {
|
||||
public void testWrongRecordKeyField() {
|
||||
GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -78,7 +77,6 @@ public class TestGlobalDeleteRecordGenerator extends KeyGeneratorTestUtilities {
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "");
|
||||
keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType);
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
|
||||
|
||||
@@ -94,7 +94,6 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
public void testWrongRecordKeyField() {
|
||||
NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -103,7 +102,7 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "timestamp");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "");
|
||||
NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(properties);
|
||||
assertEquals(keyGenerator.getRecordKeyFields().size(), 1);
|
||||
assertEquals(keyGenerator.getRecordKeyFieldNames().size(), 1);
|
||||
assertEquals(keyGenerator.getPartitionPathFields().size(), 0);
|
||||
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
@@ -120,7 +119,7 @@ public class TestNonpartitionedKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
properties.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "timestamp,driver");
|
||||
properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "");
|
||||
NonpartitionedKeyGenerator keyGenerator = new NonpartitionedKeyGenerator(properties);
|
||||
assertEquals(keyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(keyGenerator.getRecordKeyFieldNames().size(), 2);
|
||||
assertEquals(keyGenerator.getPartitionPathFields().size(), 0);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
|
||||
@@ -21,11 +21,13 @@ package org.apache.hudi.keygen;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieException;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.InternalRow;
|
||||
import org.apache.spark.unsafe.types.UTF8String;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
@@ -34,6 +36,7 @@ import org.junit.jupiter.params.provider.MethodSource;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
private TypedProperties getCommonProps() {
|
||||
@@ -88,35 +91,37 @@ public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
@Test
|
||||
public void testNullPartitionPathFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongPartitionPathField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongPartitionPathFieldProps());
|
||||
GenericRecord record = getRecord();
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(record), KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(KeyGeneratorTestUtilities.getRow(record)),
|
||||
KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH);
|
||||
// TODO this should throw as well
|
||||
//assertThrows(HoodieException.class, () -> {
|
||||
// keyGenerator.getPartitionPath(record);
|
||||
//});
|
||||
assertThrows(HoodieException.class, () -> {
|
||||
keyGenerator.getPartitionPath(KeyGeneratorTestUtilities.getRow(record));
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getComplexRecordKeyProp());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldSchemaInfoIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -124,15 +129,15 @@ public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(getRecord());
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
Assertions.assertEquals("key1", key.getRecordKey());
|
||||
Assertions.assertEquals("timestamp=4357686", key.getPartitionPath());
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
Assertions.assertEquals("key1", keyGenerator.getRecordKey(row));
|
||||
Assertions.assertEquals("timestamp=4357686", keyGenerator.getPartitionPath(row));
|
||||
|
||||
InternalRow internalRow = KeyGeneratorTestUtilities.getInternalRow(row);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(internalRow, row.schema()), "timestamp=4357686");
|
||||
Assertions.assertEquals(UTF8String.fromString("timestamp=4357686"), keyGenerator.getPartitionPath(internalRow, row.schema()));
|
||||
}
|
||||
|
||||
private static Stream<GenericRecord> nestedColTestRecords() {
|
||||
|
||||
@@ -34,6 +34,7 @@ import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.InternalRow;
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.apache.spark.unsafe.types.UTF8String;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@@ -138,7 +139,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("2020-01-06 12"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
|
||||
// timezone is GMT+8:00, createTime is BigDecimal
|
||||
BigDecimal decimal = new BigDecimal("1578283932000.0001");
|
||||
@@ -186,7 +187,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 08", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("1970-01-01 08", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("1970-01-01 08"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
|
||||
// timestamp is DATE_STRING, timezone is GMT, createTime is null
|
||||
baseRecord.put("createTimeString", null);
|
||||
@@ -198,7 +199,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 12:00:00", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("1970-01-01 12:00:00", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("1970-01-01 12:00:00"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -216,7 +217,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2024-10-04 12", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("2024-10-04 12", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("2024-10-04 12"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
|
||||
// timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
@@ -229,7 +230,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-02 12", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("1970-01-02 12", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("1970-01-02 12"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
|
||||
// timezone is GMT. number of days store integer in mysql
|
||||
baseRecord.put("createTime", 18736L);
|
||||
@@ -260,7 +261,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2021/12/03", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("2021/12/03", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("2021/12/03"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
|
||||
// timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
@@ -274,7 +275,7 @@ public class TestTimestampBasedKeyGenerator {
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970/01/01", keyGen.getPartitionPath(baseRow));
|
||||
internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
|
||||
assertEquals("1970/01/01", keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
assertEquals(UTF8String.fromString("1970/01/01"), keyGen.getPartitionPath(internalRow, baseRow.schema()));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -43,7 +43,7 @@ import java.util.stream.Collectors;
|
||||
public class KeyGeneratorTestUtilities {
|
||||
|
||||
public static final String NESTED_COL_SCHEMA = "{\"type\":\"record\", \"name\":\"nested_col\",\"fields\": ["
|
||||
+ "{\"name\": \"prop1\",\"type\": \"string\"},{\"name\": \"prop2\", \"type\": \"long\"}]}";
|
||||
+ "{\"name\": \"prop1\",\"type\": [\"null\", \"string\"]},{\"name\": \"prop2\", \"type\": \"long\"}]}";
|
||||
public static final String EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"ts_ms\", \"type\": \"string\"},"
|
||||
|
||||
Reference in New Issue
Block a user