[HUDI-727]: Copy default values of fields if not present when rewriting incoming record with new schema (#1427)
This commit is contained in:
@@ -19,7 +19,8 @@
|
||||
package org.apache.hudi.avro;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.codehaus.jackson.JsonNode;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
@@ -30,11 +31,25 @@ import java.util.Map;
|
||||
*/
|
||||
public class TestHoodieAvroUtils {
|
||||
|
||||
private static String EVOLVED_SCHEMA = "{\"type\": \"record\",\"name\": \"testrec1\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"},"
|
||||
+ "{\"name\": \"new_col1\", \"type\": \"string\", \"default\": \"dummy_val\"},"
|
||||
+ "{\"name\": \"new_col2\", \"type\": [\"int\", \"null\"]}]}";
|
||||
|
||||
private static String EXAMPLE_SCHEMA = "{\"type\": \"record\",\"name\": \"testrec\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"}]}";
|
||||
|
||||
|
||||
private static String SCHEMA_WITH_METADATA_FIELD = "{\"type\": \"record\",\"name\": \"testrec2\",\"fields\": [ "
|
||||
+ "{\"name\": \"timestamp\",\"type\": \"double\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"non_pii_col\", \"type\": \"string\"},"
|
||||
+ "{\"name\": \"pii_col\", \"type\": \"string\", \"column_category\": \"user_profile\"},"
|
||||
+ "{\"name\": \"_hoodie_commit_time\", \"type\": [\"null\", \"string\"]}]}";
|
||||
|
||||
@Test
|
||||
public void testPropsPresent() {
|
||||
Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(EXAMPLE_SCHEMA));
|
||||
@@ -45,7 +60,7 @@ public class TestHoodieAvroUtils {
|
||||
}
|
||||
|
||||
Assert.assertNotNull("field name is null", field.name());
|
||||
Map<String, JsonNode> props = field.getJsonProps();
|
||||
Map<String, Object> props = field.getObjectProps();
|
||||
Assert.assertNotNull("The property is null", props);
|
||||
|
||||
if (field.name().equals("pii_col")) {
|
||||
@@ -57,4 +72,39 @@ public class TestHoodieAvroUtils {
|
||||
}
|
||||
Assert.assertTrue("column pii_col doesn't show up", piiPresent);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultValue() {
|
||||
GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EVOLVED_SCHEMA));
|
||||
rec.put("_row_key", "key1");
|
||||
rec.put("non_pii_col", "val1");
|
||||
rec.put("pii_col", "val2");
|
||||
rec.put("timestamp", 3.5);
|
||||
GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(EVOLVED_SCHEMA));
|
||||
Assert.assertEquals(rec1.get("new_col1"), "dummy_val");
|
||||
Assert.assertNull(rec1.get("new_col2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultValueWithSchemaEvolution() {
|
||||
GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA));
|
||||
rec.put("_row_key", "key1");
|
||||
rec.put("non_pii_col", "val1");
|
||||
rec.put("pii_col", "val2");
|
||||
rec.put("timestamp", 3.5);
|
||||
GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(EVOLVED_SCHEMA));
|
||||
Assert.assertEquals(rec1.get("new_col1"), "dummy_val");
|
||||
Assert.assertNull(rec1.get("new_col2"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMetadataField() {
|
||||
GenericRecord rec = new GenericData.Record(new Schema.Parser().parse(EXAMPLE_SCHEMA));
|
||||
rec.put("_row_key", "key1");
|
||||
rec.put("non_pii_col", "val1");
|
||||
rec.put("pii_col", "val2");
|
||||
rec.put("timestamp", 3.5);
|
||||
GenericRecord rec1 = HoodieAvroUtils.rewriteRecord(rec, new Schema.Parser().parse(SCHEMA_WITH_METADATA_FIELD));
|
||||
Assert.assertNull(rec1.get("_hoodie_commit_time"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user