1
0

Moving depedencies off cdh to apache + Hive2 support

- Tests redone in the process
 - Main changes are to RealtimeRecordReader and how it treats maps/arrays
 - Make hive sync work with Hive 1/2 and CDH environments
 - Fixes to make corner cases for Hive queries
 - Spark Hive integration - Working version across Apache and CDH versions
 - Known Issue - https://github.com/uber/hudi/issues/439
This commit is contained in:
Vinoth Chandar
2018-07-15 22:34:02 -07:00
committed by vinoth chandar
parent 2b1af18941
commit a5359662be
32 changed files with 1983 additions and 407 deletions

View File

@@ -333,4 +333,13 @@ public class HoodieCommitMetadata implements Serializable {
mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper;
}
@Override
public String toString() {
return "HoodieCommitMetadata{"
+ "partitionToWriteStats=" + partitionToWriteStats
+ ", compacted=" + compacted
+ ", extraMetadataMap=" + extraMetadataMap
+ '}';
}
}

View File

@@ -94,4 +94,4 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
public void remove() {
}
}
}

View File

@@ -27,9 +27,11 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
@@ -136,6 +138,26 @@ public class HoodieAvroUtils {
return record;
}
/**
* Add null fields to passed in schema. Caller is responsible for ensuring there is no duplicates.
* As different query engines have varying constraints regarding treating the case-sensitivity of fields, its best
* to let caller determine that.
* @param schema Passed in schema
* @param newFieldNames Null Field names to be added
* @return
*/
public static Schema appendNullSchemaFields(Schema schema, List<String> newFieldNames) {
List<Field> newFields = schema.getFields().stream().map(field -> {
return new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue());
}).collect(Collectors.toList());
for (String newField : newFieldNames) {
newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", null));
}
Schema newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
newSchema.setFields(newFields);
return newSchema;
}
/**
* Adds the Hoodie commit metadata into the provided Generic Record.
*/
@@ -155,7 +177,7 @@ public class HoodieAvroUtils {
for (Schema.Field f : record.getSchema().getFields()) {
newRecord.put(f.name(), record.get(f.name()));
}
if (!new GenericData().validate(newSchema, newRecord)) {
if (!GenericData.get().validate(newSchema, newRecord)) {
throw new SchemaCompatabilityException(
"Unable to validate the rewritten record " + record + " against schema "
+ newSchema);