1
0

[HUDI-1522] Add a new pipeline for Flink writer (#2430)

* [HUDI-1522] Add a new pipeline for Flink writer
This commit is contained in:
Danny Chan
2021-01-28 08:53:13 +08:00
committed by GitHub
parent 7b2e658ac0
commit bc0325f6ea
40 changed files with 3613 additions and 302 deletions

View File

@@ -57,10 +57,10 @@
/* overlaps with 'commitsRollback' field. Adding this to track action type for all the instants being rolled back. */
{
"name": "instantsRollback",
"default": null,
"default": [],
"type": {
"type": "array",
"default": null,
"default": [],
"items": "HoodieInstantInfo"
}
}

View File

@@ -217,7 +217,7 @@ public class HoodieAvroUtils {
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField =
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", NullNode.getInstance());
new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Collections.singletonList(recordKeyField));
return recordKeySchema;
@@ -263,9 +263,9 @@ public class HoodieAvroUtils {
*/
public static Schema appendNullSchemaFields(Schema schema, List<String> newFieldNames) {
List<Field> newFields = schema.getFields().stream()
.map(field -> new Field(field.name(), field.schema(), field.doc(), field.defaultValue())).collect(Collectors.toList());
.map(field -> new Field(field.name(), field.schema(), field.doc(), field.defaultVal())).collect(Collectors.toList());
for (String newField : newFieldNames) {
newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", NullNode.getInstance()));
newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE));
}
Schema newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
newSchema.setFields(newFields);
@@ -329,7 +329,8 @@ public class HoodieAvroUtils {
private static void copyOldValueOrSetDefault(GenericRecord oldRecord, GenericRecord newRecord, Schema.Field f) {
// cache the result of oldRecord.get() to save CPU expensive hash lookup
Object fieldValue = oldRecord.get(f.name());
Schema oldSchema = oldRecord.getSchema();
Object fieldValue = oldSchema.getField(f.name()) == null ? null : oldRecord.get(f.name());
if (fieldValue == null) {
if (f.defaultVal() instanceof JsonProperties.Null) {
newRecord.put(f.name(), null);
@@ -381,7 +382,7 @@ public class HoodieAvroUtils {
throw new HoodieException("Field " + fn + " not found in log schema. Query cannot proceed! "
+ "Derived Schema Fields: " + new ArrayList<>(schemaFieldsMap.keySet()));
} else {
projectedFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue()));
projectedFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
}
}

View File

@@ -79,7 +79,14 @@ public class OverwriteWithLatestAvroPayload extends BaseAvroPayload
* @returns {@code true} if record represents a delete record. {@code false} otherwise.
*/
protected boolean isDeleteRecord(GenericRecord genericRecord) {
Object deleteMarker = genericRecord.get("_hoodie_is_deleted");
final String isDeleteKey = "_hoodie_is_deleted";
// Modify to be compatible with new version Avro.
// The new version Avro throws for GenericRecord.get if the field name
// does not exist in the schema.
if (genericRecord.getSchema().getField(isDeleteKey) == null) {
return false;
}
Object deleteMarker = genericRecord.get(isDeleteKey);
return (deleteMarker instanceof Boolean && (boolean) deleteMarker);
}

View File

@@ -98,8 +98,8 @@ public class HoodieTestDataGenerator {
+ "{\"name\": \"amount\",\"type\": \"double\"},{\"name\": \"currency\", \"type\": \"string\"}]}},";
public static final String FARE_FLATTENED_SCHEMA = "{\"name\": \"fare\", \"type\": \"double\"},"
+ "{\"name\": \"currency\", \"type\": \"string\"},";
public static final String TIP_NESTED_SCHEMA = "{\"name\": \"tip_history\", \"default\": null, \"type\": {\"type\": "
+ "\"array\", \"items\": {\"type\": \"record\", \"default\": null, \"name\": \"tip_history\", \"fields\": ["
public static final String TIP_NESTED_SCHEMA = "{\"name\": \"tip_history\", \"default\": [], \"type\": {\"type\": "
+ "\"array\", \"default\": [], \"items\": {\"type\": \"record\", \"default\": null, \"name\": \"tip_history\", \"fields\": ["
+ "{\"name\": \"amount\", \"type\": \"double\"}, {\"name\": \"currency\", \"type\": \"string\"}]}}},";
public static final String MAP_TYPE_SCHEMA = "{\"name\": \"city_to_state\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},";
public static final String EXTRA_TYPE_SCHEMA = "{\"name\": \"distance_in_meters\", \"type\": \"int\"},"