From f468c20c6cb237c1d190989045f397758db8d685 Mon Sep 17 00:00:00 2001 From: Satish Kotha Date: Tue, 25 Aug 2020 13:37:48 -0700 Subject: [PATCH] [HUDI-1226] Fix ComplexKeyGenerator for non-partitioned tables --- .../hudi/keygen/ComplexKeyGenerator.java | 4 ++-- .../org/apache/hudi/keygen/KeyGenUtils.java | 4 ++++ .../test/java/TestComplexKeyGenerator.java | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java index 664824c31..e679e99ad 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java +++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java @@ -37,9 +37,9 @@ public class ComplexKeyGenerator extends BuiltinKeyGenerator { public ComplexKeyGenerator(TypedProperties props) { super(props); this.recordKeyFields = Arrays.stream(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()) - .split(",")).map(String::trim).collect(Collectors.toList()); + .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); this.partitionPathFields = Arrays.stream(props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY()) - .split(",")).map(String::trim).collect(Collectors.toList()); + .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); } @Override diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java index c4ac29b2b..8e9700b27 100644 --- a/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java +++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java @@ -59,6 +59,10 @@ public class KeyGenUtils { public static String getRecordPartitionPath(GenericRecord record, List partitionPathFields, boolean hiveStylePartitioning, boolean encodePartitionPath) { + if (partitionPathFields.isEmpty()) { + return ""; + } + StringBuilder partitionPath = new StringBuilder(); for (String partitionPathField : partitionPathFields) { String fieldVal = HoodieAvroUtils.getNestedFieldValAsString(record, partitionPathField, true); diff --git a/hudi-spark/src/test/java/TestComplexKeyGenerator.java b/hudi-spark/src/test/java/TestComplexKeyGenerator.java index f0671fa71..a5a88c2d7 100644 --- a/hudi-spark/src/test/java/TestComplexKeyGenerator.java +++ b/hudi-spark/src/test/java/TestComplexKeyGenerator.java @@ -64,4 +64,23 @@ public class TestComplexKeyGenerator { assertEquals(partitionPath, hoodieKey.getPartitionPath()); } + @Test + public void testMultipleValueKeyGeneratorNonPartitioned() { + TypedProperties properties = new TypedProperties(); + properties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key,timestamp"); + properties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), ""); + ComplexKeyGenerator compositeKeyGenerator = new ComplexKeyGenerator(properties); + assertEquals(compositeKeyGenerator.getRecordKeyFields().size(), 2); + assertEquals(compositeKeyGenerator.getPartitionPathFields().size(), 0); + HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(); + GenericRecord record = dataGenerator.generateGenericRecords(1).get(0); + String rowKey = + "_row_key" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("_row_key").toString() + "," + + "timestamp" + ComplexKeyGenerator.DEFAULT_RECORD_KEY_SEPARATOR + record.get("timestamp").toString(); + String partitionPath = ""; + HoodieKey hoodieKey = compositeKeyGenerator.getKey(record); + assertEquals(rowKey, hoodieKey.getRecordKey()); + assertEquals(partitionPath, hoodieKey.getPartitionPath()); + } + } \ No newline at end of file