Moving depedencies off cdh to apache + Hive2 support
- Tests redone in the process - Main changes are to RealtimeRecordReader and how it treats maps/arrays - Make hive sync work with Hive 1/2 and CDH environments - Fixes to make corner cases for Hive queries - Spark Hive integration - Working version across Apache and CDH versions - Known Issue - https://github.com/uber/hudi/issues/439
This commit is contained in:
committed by
vinoth chandar
parent
2b1af18941
commit
a5359662be
@@ -333,4 +333,13 @@ public class HoodieCommitMetadata implements Serializable {
|
||||
mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieCommitMetadata{"
|
||||
+ "partitionToWriteStats=" + partitionToWriteStats
|
||||
+ ", compacted=" + compacted
|
||||
+ ", extraMetadataMap=" + extraMetadataMap
|
||||
+ '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,4 +94,4 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
public void remove() {
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,9 +27,11 @@ import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.DeflaterOutputStream;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Field;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
@@ -136,6 +138,26 @@ public class HoodieAvroUtils {
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates.
|
||||
* As different query engines have varying constraints regarding treating the case-sensitivity of fields, its best
|
||||
* to let caller determine that.
|
||||
* @param schema Passed in schema
|
||||
* @param newFieldNames Null Field names to be added
|
||||
* @return
|
||||
*/
|
||||
public static Schema appendNullSchemaFields(Schema schema, List<String> newFieldNames) {
|
||||
List<Field> newFields = schema.getFields().stream().map(field -> {
|
||||
return new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue());
|
||||
}).collect(Collectors.toList());
|
||||
for (String newField : newFieldNames) {
|
||||
newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", null));
|
||||
}
|
||||
Schema newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
|
||||
newSchema.setFields(newFields);
|
||||
return newSchema;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the Hoodie commit metadata into the provided Generic Record.
|
||||
*/
|
||||
@@ -155,7 +177,7 @@ public class HoodieAvroUtils {
|
||||
for (Schema.Field f : record.getSchema().getFields()) {
|
||||
newRecord.put(f.name(), record.get(f.name()));
|
||||
}
|
||||
if (!new GenericData().validate(newSchema, newRecord)) {
|
||||
if (!GenericData.get().validate(newSchema, newRecord)) {
|
||||
throw new SchemaCompatabilityException(
|
||||
"Unable to validate the rewritten record " + record + " against schema "
|
||||
+ newSchema);
|
||||
|
||||
Reference in New Issue
Block a user