diff --git a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java index 9c1dc2273..ae92b00f9 100644 --- a/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java +++ b/hoodie-common/src/main/java/com/uber/hoodie/common/util/HoodieAvroUtils.java @@ -27,6 +27,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.zip.DeflaterOutputStream; import java.util.zip.InflaterInputStream; @@ -40,6 +41,7 @@ import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.EncoderFactory; +import org.codehaus.jackson.JsonNode; /** * Helper class to do common stuff across Avro. @@ -108,7 +110,11 @@ public class HoodieAvroUtils { parentFields.add(fileNameField); for (Schema.Field field : schema.getFields()) { if (!isMetadataField(field.name())) { - parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null)); + Schema.Field newField = new Schema.Field(field.name(), field.schema(), field.doc(), null); + for (Map.Entry prop : field.getJsonProps().entrySet()) { + newField.addProp(prop.getKey(), prop.getValue()); + } + parentFields.add(newField); } } diff --git a/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestHoodieAvroUtils.java b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestHoodieAvroUtils.java new file mode 100644 index 000000000..fdae208c1 --- /dev/null +++ b/hoodie-common/src/test/java/com/uber/hoodie/common/util/TestHoodieAvroUtils.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 Uber Technologies, Inc. (hoodie-dev-group@uber.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.uber.hoodie.common.util; + +import java.util.Map; + +import org.apache.avro.Schema; +import org.codehaus.jackson.JsonNode; +import org.junit.Assert; +import org.junit.Test; + + +public class TestHoodieAvroUtils { + + private static String EXAMPLE_SCHEMA = "{\"type\": \"record\"," + "\"name\": \"testrec\"," + "\"fields\": [ " + + "{\"name\": \"timestamp\",\"type\": \"double\"}," + + "{\"name\": \"_row_key\", \"type\": \"string\"}," + + "{\"name\": \"non_pii_col\", \"type\": \"string\"}," + + "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}"; + + @Test + public void testPropsPresent() { + Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(EXAMPLE_SCHEMA)); + boolean piiPresent = false; + for (Schema.Field field : schema.getFields()) { + if (HoodieAvroUtils.isMetadataField(field.name())) { + continue; + } + + Assert.assertTrue("field name is null", field.name() != null); + Map props = field.getJsonProps(); + Assert.assertTrue("The property is null", props != null); + + if (field.name().equals("pii_col")) { + piiPresent = true; + Assert.assertTrue("sensitivity_level is removed in field 'pii_col'", props.containsKey("column_category")); + } else { + Assert.assertTrue("The property shows up but not set", props.size() == 0); + } + } + Assert.assertTrue("column pii_col doesn't show up", piiPresent); + } +}