From 83796b3189570182c68a9c41e57b356124c301ca Mon Sep 17 00:00:00 2001 From: Alexander Filipchik Date: Wed, 13 May 2020 18:04:38 -0700 Subject: [PATCH] [HUDI-793] Adding proper default to hudi metadata fields and proper handling to rewrite routine (#1513) * Adding proper default to hudi metadata fields and proper handling to rewrite routine * Handle fields declared with a null default Co-authored-by: Alex Filipchik --- .../org/apache/hudi/avro/HoodieAvroUtils.java | 17 +++++++++++------ .../apache/hudi/avro/TestHoodieAvroUtils.java | 7 ++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java index d56b7d92d..bffe8df23 100644 --- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java +++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java @@ -18,6 +18,7 @@ package org.apache.hudi.avro; +import org.apache.avro.JsonProperties.Null; import org.apache.hudi.common.model.HoodieRecord; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.SchemaCompatabilityException; @@ -141,15 +142,15 @@ public class HoodieAvroUtils { List parentFields = new ArrayList<>(); Schema.Field commitTimeField = - new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", (Object) null); + new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); Schema.Field commitSeqnoField = - new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", (Object) null); + new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); Schema.Field recordKeyField = - new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", (Object) null); + new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); Schema.Field partitionPathField = - new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", (Object) null); + new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); Schema.Field fileNameField = - new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", (Object) null); + new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()); parentFields.add(commitTimeField); parentFields.add(commitSeqnoField); @@ -253,7 +254,11 @@ public class HoodieAvroUtils { GenericRecord newRecord = new GenericData.Record(newSchema); for (Schema.Field f : fieldsToWrite) { if (record.get(f.name()) == null) { - newRecord.put(f.name(), f.defaultVal()); + if (f.defaultVal() instanceof Null) { + newRecord.put(f.name(), null); + } else { + newRecord.put(f.name(), f.defaultVal()); + } } else { newRecord.put(f.name(), record.get(f.name())); } diff --git a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java index e2c12661e..9c5e046e9 100644 --- a/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java +++ b/hudi-common/src/test/java/org/apache/hudi/avro/TestHoodieAvroUtils.java @@ -47,12 +47,13 @@ public class TestHoodieAvroUtils { + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}"; - - private static String SCHEMA_WITH_METADATA_FIELD = "{\"type\": \"record\",\"name\": \"testrec2\",\"fields\": [ " + private static String SCHEMA_WITH_METADATA_FIELD = + "{\"type\": \"record\",\"name\": \"testrec2\",\"fields\": [ " + "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"}," + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}," - + "{\"name\": \"_hoodie_commit_time\", \"type\": [\"null\", \"string\"]}]}"; + + "{\"name\": \"_hoodie_commit_time\", \"type\": [\"null\", \"string\"]}," + + "{\"name\": \"nullable_field\",\"type\": [\"null\" ,\"string\"],\"default\": null}]}"; @Test public void testPropsPresent() {