[HUDI-912] Refactor and relocate KeyGenerator to support more engines (#2200)
* [HUDI-912] Refactor and relocate KeyGenerator to support more engines * Rename KeyGenerators
This commit is contained in:
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static junit.framework.TestCase.assertEquals;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
|
||||
import org.apache.hudi.keygen.ComplexKeyGenerator;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestComplexKeyGenerator {
|
||||
|
||||
@Test
|
||||
public void testSingleValueKeyGenerator() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 1);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 1);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey = record.get("_row_key").toString();
|
||||
String partitionPath = record.get("timestamp").toString();
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals("_row_key:" + rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleValueKeyGenerator() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key,timestamp");
|
||||
properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "rider,driver");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 2);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey =
|
||||
"_row_key" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + ","
|
||||
+ "timestamp" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString();
|
||||
String partitionPath = record.get("rider").toString() + "/" + record.get("driver").toString();
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals(rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleValueKeyGeneratorNonPartitioned() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key,timestamp");
|
||||
properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "");
|
||||
ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties);
|
||||
assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2);
|
||||
assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0);
|
||||
HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
|
||||
GenericRecord record = dataGenerator.generateGenericRecords(1).get(0);
|
||||
String rowKey =
|
||||
"_row_key" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + ","
|
||||
+ "timestamp" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString();
|
||||
String partitionPath = "";
|
||||
HoodieKey hoodieKey = compositeKeyGenerator.getKey(record);
|
||||
assertEquals(rowKey, hoodieKey.getRecordKey());
|
||||
assertEquals(partitionPath, hoodieKey.getPartitionPath());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestComplexKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key, pii_col");
|
||||
} else {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
}
|
||||
properties.put(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutPartitionPathProp() {
|
||||
return getCommonProps(false);
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp,ts_ms");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullPartitionPathFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new ComplexKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
ComplexKeyGenerator keyGenerator = new ComplexKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=2020-03-21");
|
||||
}
|
||||
}
|
||||
@@ -1,224 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestCustomKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key, pii_col");
|
||||
} else {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
}
|
||||
properties.put(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForSimpleKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getImproperPartitionFieldFormatProp() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getInvalidPartitionKeyTypeProps() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp:dummy");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyWithSimplePartitionProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyAndPartitionPathProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp:simple,ts_ms:timestamp");
|
||||
populateNecessaryPropsForTimestampBasedKeyGen(properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropsWithoutRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp:simple");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private void populateNecessaryPropsForTimestampBasedKeyGen(TypedProperties properties) {
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.timestamp.type", "DATE_STRING");
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd");
|
||||
properties.put("hoodie.deltastreamer.keygen.timebased.output.dateformat", "yyyyMMdd");
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForTimestampBasedKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "ts_ms:timestamp");
|
||||
populateNecessaryPropsForTimestampBasedKeyGen(properties);
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesForNonPartitionedKeyGen() {
|
||||
TypedProperties properties = getCommonProps(false);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleKeyGenerator() {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForSimpleKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimestampBasedKeyGenerator() {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForTimestampBasedKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "ts_ms=20200321");
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "ts_ms=20200321");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonPartitionedKeyGenerator() {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getPropertiesForNonPartitionedKeyGen());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertTrue(key.getPartitionPath().isEmpty());
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertTrue(keyGenerator.getPartitionPath(row).isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidPartitionKeyType() {
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getInvalidPartitionKeyTypeProps());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when invalid PartitionKeyType is provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomKeyGenerator.PartitionKeyType.DUMMY"));
|
||||
}
|
||||
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getInvalidPartitionKeyTypeProps());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getPartitionPath(row);
|
||||
Assertions.fail("should fail when invalid PartitionKeyType is provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("No enum constant org.apache.hudi.keygen.CustomKeyGenerator.PartitionKeyType.DUMMY"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoRecordKeyFieldProp() {
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getPropsWithoutRecordKeyFieldProps());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when record key field is not provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
|
||||
}
|
||||
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getPropsWithoutRecordKeyFieldProps());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getRecordKey(row);
|
||||
Assertions.fail("should fail when record key field is not provided!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Property hoodie.datasource.write.recordkey.field not found"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPartitionFieldsInImproperFormat() {
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getImproperPartitionFieldFormatProp());
|
||||
keyGenerator.getKey(getRecord());
|
||||
Assertions.fail("should fail when partition key field is provided in improper format!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
|
||||
}
|
||||
|
||||
try {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getImproperPartitionFieldFormatProp());
|
||||
GenericRecord record = getRecord();
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
keyGenerator.getPartitionPath(row);
|
||||
Assertions.fail("should fail when partition key field is provided in improper format!");
|
||||
} catch (Exception e) {
|
||||
Assertions.assertTrue(e.getMessage().contains("Unable to find field names for partition path in proper format"));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeyWithSimplePartitionPath() {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyWithSimplePartitionProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeysWithComplexPartitionPath() {
|
||||
KeyGenerator keyGenerator = new CustomKeyGenerator(getComplexRecordKeyAndPartitionPathProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686/ts_ms=20200321");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686/ts_ms=20200321");
|
||||
}
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestGlobalDeleteKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps(boolean getComplexRecordKey) {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
if (getComplexRecordKey) {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key,pii_col");
|
||||
} else {
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
}
|
||||
properties.put(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps(true);
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp,ts_ms");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new GlobalDeleteKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
GlobalDeleteKeyGenerator keyGenerator = new GlobalDeleteKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(record);
|
||||
Assertions.assertEquals(key.getRecordKey(), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "");
|
||||
keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType);
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "_row_key:key1,pii_col:pi");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "");
|
||||
}
|
||||
}
|
||||
@@ -1,124 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.exception.HoodieKeyException;
|
||||
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
public class TestSimpleKeyGenerator extends KeyGeneratorTestUtilities {
|
||||
|
||||
private TypedProperties getCommonProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
properties.put(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "true");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutPartitionPathProp() {
|
||||
return getCommonProps();
|
||||
}
|
||||
|
||||
private TypedProperties getPropertiesWithoutRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongRecordKeyFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_wrong_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getWrongPartitionPathFieldProps() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "_wrong_partition_path");
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getComplexRecordKeyProp() {
|
||||
TypedProperties properties = new TypedProperties();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
properties.put(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key,pii_col");
|
||||
return properties;
|
||||
}
|
||||
|
||||
private TypedProperties getProps() {
|
||||
TypedProperties properties = getCommonProps();
|
||||
properties.put(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "timestamp");
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullPartitionPathFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutPartitionPathProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNullRecordKeyFields() {
|
||||
Assertions.assertThrows(IllegalArgumentException.class, () -> new SimpleKeyGenerator(getPropertiesWithoutRecordKeyProp()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongRecordKeyFieldProps());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWrongPartitionPathField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getWrongPartitionPathFieldProps());
|
||||
GenericRecord record = getRecord();
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(record), KeyGenUtils.DEFAULT_PARTITION_PATH);
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(KeyGeneratorTestUtilities.getRow(record)),
|
||||
KeyGenUtils.DEFAULT_PARTITION_PATH);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexRecordKeyField() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getComplexRecordKeyProp());
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.getRecordKey(getRecord()));
|
||||
Assertions.assertThrows(HoodieKeyException.class, () -> keyGenerator.buildFieldPositionMapIfNeeded(KeyGeneratorTestUtilities.structType));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHappyFlow() {
|
||||
SimpleKeyGenerator keyGenerator = new SimpleKeyGenerator(getProps());
|
||||
GenericRecord record = getRecord();
|
||||
HoodieKey key = keyGenerator.getKey(getRecord());
|
||||
Assertions.assertEquals(key.getRecordKey(), "key1");
|
||||
Assertions.assertEquals(key.getPartitionPath(), "timestamp=4357686");
|
||||
|
||||
Row row = KeyGeneratorTestUtilities.getRow(record);
|
||||
Assertions.assertEquals(keyGenerator.getRecordKey(row), "key1");
|
||||
Assertions.assertEquals(keyGenerator.getPartitionPath(row), "timestamp=4357686");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,366 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.keygen;
|
||||
|
||||
import org.apache.hudi.AvroConversionHelper;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
import org.apache.hudi.DataSourceWriteOptions;
|
||||
import org.apache.hudi.common.config.TypedProperties;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.testutils.SchemaTestUtil;
|
||||
import org.apache.hudi.exception.HoodieDeltaStreamerException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import scala.Function1;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class TestTimestampBasedKeyGenerator {
|
||||
|
||||
private GenericRecord baseRecord;
|
||||
private TypedProperties properties = new TypedProperties();
|
||||
|
||||
private Schema schema;
|
||||
private StructType structType;
|
||||
private Row baseRow;
|
||||
|
||||
@BeforeEach
|
||||
public void initialize() throws IOException {
|
||||
schema = SchemaTestUtil.getTimestampEvolvedSchema();
|
||||
structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
|
||||
baseRecord = SchemaTestUtil
|
||||
.generateAvroRecordFromJson(schema, 1, "001", "f1");
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
|
||||
properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "field1");
|
||||
properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "createTime");
|
||||
properties.setProperty(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(), "false");
|
||||
}
|
||||
|
||||
private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone, String scalarType) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, dateFormat);
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, timezone);
|
||||
|
||||
if (scalarType != null) {
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit", scalarType);
|
||||
}
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
private Row genericRecordToRow(GenericRecord baseRecord) {
|
||||
Function1<Object, Object> convertor = AvroConversionHelper.createConverterToRow(schema, structType);
|
||||
Row row = (Row) convertor.apply(baseRecord);
|
||||
int fieldCount = structType.fieldNames().length;
|
||||
Object[] values = new Object[fieldCount];
|
||||
for (int i = 0; i < fieldCount; i++) {
|
||||
values[i] = row.get(i);
|
||||
}
|
||||
return new GenericRowWithSchema(values, structType);
|
||||
}
|
||||
|
||||
private TypedProperties getBaseKeyConfig(String timestampType, String inputFormatList, String inputFormatDelimiterRegex, String inputTimezone, String outputFormat, String outputTimezone) {
|
||||
if (timestampType != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
|
||||
}
|
||||
if (inputFormatList != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, inputFormatList);
|
||||
}
|
||||
if (inputFormatDelimiterRegex != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, inputFormatDelimiterRegex);
|
||||
}
|
||||
if (inputTimezone != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, inputTimezone);
|
||||
}
|
||||
if (outputFormat != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputFormat);
|
||||
}
|
||||
if (outputTimezone != null) {
|
||||
properties.setProperty(TimestampBasedKeyGenerator.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, outputTimezone);
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTimestampBasedKeyGenerator() throws IOException {
|
||||
// timezone is GMT+8:00
|
||||
baseRecord.put("createTime", 1578283932000L);
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk1.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk2 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 04", hk2.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
assertEquals("2020-01-06 04", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timestamp is DATE_STRING, timezone is GMT+8:00
|
||||
baseRecord.put("createTime", "2020-01-06 12:12:12");
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk3 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk3.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk4 = keyGen.getKey(baseRecord);
|
||||
assertEquals("2020-01-06 12", hk4.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT+8:00, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk5 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-01 08", hk5.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 08", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timestamp is DATE_STRING, timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh:mm:ss", "GMT", null);
|
||||
properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk6 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-01 12:00:00", hk6.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-01 12:00:00", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScalar() throws IOException {
|
||||
// timezone is GMT+8:00
|
||||
baseRecord.put("createTime", 20000L);
|
||||
|
||||
// timezone is GMT
|
||||
properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
|
||||
TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
assertEquals(hk1.getPartitionPath(), "2024-10-04 12");
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2024-10-04 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
// timezone is GMT, createTime is null
|
||||
baseRecord.put("createTime", null);
|
||||
properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
|
||||
keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk2 = keyGen.getKey(baseRecord);
|
||||
assertEquals("1970-01-02 12", hk2.getPartitionPath());
|
||||
|
||||
// test w/ Row
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("1970-01-02 12", keyGen.getPartitionPath(baseRow));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_SingleInputFormat_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"GMT");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_SingleInputFormats_ISO8601WithMsZ_OutputTimezoneAsInputDateTimeZone() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsZ_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040113", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040113", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsWithOffset_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040118", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsWithOffset_OutputTimezoneAsUTC() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.123-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040118", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040118", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsEST() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01T13:01:33.123Z");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"EST");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("2020040109", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("2020040109", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() throws IOException {
|
||||
baseRecord.put("createTime", "2020-04-01 13:01:33.123-05:00");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
|
||||
"",
|
||||
"",
|
||||
"yyyyMMddHH",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
Assertions.assertThrows(HoodieDeltaStreamerException.class, () -> keyGen.getKey(baseRecord));
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
Assertions.assertThrows(HoodieDeltaStreamerException.class, () -> keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test_ExpectsMatch_MultipleInputFormats_ShortDate_OutputCustomDate() throws IOException {
|
||||
baseRecord.put("createTime", "20200401");
|
||||
properties = this.getBaseKeyConfig(
|
||||
"DATE_STRING",
|
||||
"yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ,yyyyMMdd",
|
||||
"",
|
||||
"UTC",
|
||||
"MM/dd/yyyy",
|
||||
"UTC");
|
||||
KeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
|
||||
HoodieKey hk1 = keyGen.getKey(baseRecord);
|
||||
Assertions.assertEquals("04/01/2020", hk1.getPartitionPath());
|
||||
|
||||
baseRow = genericRecordToRow(baseRecord);
|
||||
assertEquals("04/01/2020", keyGen.getPartitionPath(baseRow));
|
||||
}
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hudi.testutils;
|
||||
|
||||
import org.apache.hudi.AvroConversionHelper;
|
||||
import org.apache.hudi.AvroConversionUtils;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.spark.sql.Row;
|
||||
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
|
||||
import org.apache.spark.sql.types.StructType;
|
||||
|
||||
import scala.Function1;
|
||||
|
||||
public class KeyGeneratorTestUtilities {
|
||||
|
||||
public static String exampleSchema = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"ts_ms\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"pii_col\", \"type\": \"string\"}]}";
|
||||
|
||||
public static final String TEST_STRUCTNAME = "test_struct_name";
|
||||
public static final String TEST_RECORD_NAMESPACE = "test_record_namespace";
|
||||
public static Schema schema = new Schema.Parser().parse(exampleSchema);
|
||||
public static StructType structType = AvroConversionUtils.convertAvroSchemaToStructType(schema);
|
||||
|
||||
public GenericRecord getRecord() {
|
||||
GenericRecord record = new GenericData.Record(new Schema.Parser().parse(exampleSchema));
|
||||
record.put("timestamp", 4357686);
|
||||
record.put("_row_key", "key1");
|
||||
record.put("ts_ms", "2020-03-21");
|
||||
record.put("pii_col", "pi");
|
||||
return record;
|
||||
}
|
||||
|
||||
public static Row getRow(GenericRecord record) {
|
||||
return getRow(record, schema, structType);
|
||||
}
|
||||
|
||||
public static Row getRow(GenericRecord record, Schema schema, StructType structType) {
|
||||
Function1<Object, Object> converterFn = AvroConversionHelper.createConverterToRow(schema, structType);
|
||||
Row row = (Row) converterFn.apply(record);
|
||||
int fieldCount = structType.fieldNames().length;
|
||||
Object[] values = new Object[fieldCount];
|
||||
for (int i = 0; i < fieldCount; i++) {
|
||||
values[i] = row.get(i);
|
||||
}
|
||||
return new GenericRowWithSchema(values, structType);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user