1
0

Reformatting code per Google Code Style all over

This commit is contained in:
Vinoth Chandar
2017-11-12 22:54:56 -08:00
committed by vinoth chandar
parent 5a62480a92
commit e45679f5e2
254 changed files with 21580 additions and 21108 deletions

View File

@@ -17,60 +17,59 @@
package com.uber.hoodie.avro;
import com.uber.hoodie.common.BloomFilter;
import java.util.HashMap;
import org.apache.avro.Schema;
import org.apache.parquet.avro.AvroWriteSupport;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.schema.MessageType;
import java.util.HashMap;
/**
* Wrap AvroWriterSupport for plugging in the bloom filter.
*/
public class HoodieAvroWriteSupport extends AvroWriteSupport {
private BloomFilter bloomFilter;
private String minRecordKey;
private String maxRecordKey;
private BloomFilter bloomFilter;
private String minRecordKey;
private String maxRecordKey;
public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
"com.uber.hoodie.bloomfilter";
public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
"com.uber.hoodie.bloomfilter";
public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
super(schema, avroSchema);
this.bloomFilter = bloomFilter;
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
super(schema, avroSchema);
this.bloomFilter = bloomFilter;
}
@Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
HashMap<String, String> extraMetaData = new HashMap<>();
if (bloomFilter != null) {
extraMetaData
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
if (minRecordKey != null && maxRecordKey != null) {
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
}
}
return new WriteSupport.FinalizedWriteContext(extraMetaData);
}
public void add(String recordKey) {
this.bloomFilter.add(recordKey);
if (minRecordKey != null) {
minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
} else {
minRecordKey = recordKey;
}
@Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
HashMap<String, String> extraMetaData = new HashMap<>();
if (bloomFilter != null) {
extraMetaData
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
if (minRecordKey != null && maxRecordKey != null) {
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
}
}
return new WriteSupport.FinalizedWriteContext(extraMetaData);
}
public void add(String recordKey) {
this.bloomFilter.add(recordKey);
if (minRecordKey != null) {
minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
} else {
minRecordKey = recordKey;
}
if (maxRecordKey != null) {
maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
} else {
maxRecordKey = recordKey;
}
if (maxRecordKey != null) {
maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
} else {
maxRecordKey = recordKey;
}
}
}

View File

@@ -17,151 +17,148 @@
package com.uber.hoodie.avro;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.codehaus.jackson.map.ObjectMapper;
/**
* Marjority of this is copied from
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
* Marjority of this is copied from https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
* Adjusted for expected behavior of our use cases
*/
public class MercifulJsonConverter {
private final ObjectMapper mapper = new ObjectMapper();
private final Schema baseSchema;
public MercifulJsonConverter(Schema schema) {
this.baseSchema = schema;
private final ObjectMapper mapper = new ObjectMapper();
private final Schema baseSchema;
public MercifulJsonConverter(Schema schema) {
this.baseSchema = schema;
}
public GenericRecord convert(String json) throws IOException {
try {
return convert(mapper.readValue(json, Map.class), baseSchema);
} catch (IOException e) {
throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
}
}
private GenericRecord convert(Map<String, Object> raw, Schema schema)
throws IOException {
GenericRecord result = new GenericData.Record(schema);
for (Schema.Field f : schema.getFields()) {
String name = f.name();
Object rawValue = raw.get(name);
if (rawValue != null) {
result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
}
}
return result;
}
public GenericRecord convert(String json) throws IOException {
try {
return convert(mapper.readValue(json, Map.class), baseSchema);
} catch (IOException e) {
throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
private Object typeConvert(Object value, String name, Schema schema) throws IOException {
if (isOptional(schema)) {
if (value == null) {
return null;
} else {
schema = getNonNull(schema);
}
} else if (value == null) {
// Always fail on null for non-nullable schemas
throw new JsonConversionException(null, name, schema);
}
switch (schema.getType()) {
case BOOLEAN:
if (value instanceof Boolean) {
return (Boolean) value;
}
}
private GenericRecord convert(Map<String, Object> raw, Schema schema)
throws IOException {
GenericRecord result = new GenericData.Record(schema);
for (Schema.Field f : schema.getFields()) {
String name = f.name();
Object rawValue = raw.get(name);
if (rawValue != null) {
result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
}
break;
case DOUBLE:
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
return result;
}
private Object typeConvert(Object value, String name, Schema schema) throws IOException {
if (isOptional(schema)) {
if (value == null) {
return null;
} else {
schema = getNonNull(schema);
}
} else if (value == null) {
// Always fail on null for non-nullable schemas
throw new JsonConversionException(null, name, schema);
break;
case FLOAT:
if (value instanceof Number) {
return ((Number) value).floatValue();
}
switch (schema.getType()) {
case BOOLEAN:
if (value instanceof Boolean) {
return (Boolean) value;
}
break;
case DOUBLE:
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
break;
case FLOAT:
if (value instanceof Number) {
return ((Number) value).floatValue();
}
break;
case INT:
if (value instanceof Number) {
return ((Number) value).intValue();
}
break;
case LONG:
if (value instanceof Number) {
return ((Number) value).longValue();
}
break;
case STRING:
return value.toString();
case ENUM:
if (schema.getEnumSymbols().contains(value.toString())) {
return new GenericData.EnumSymbol(schema, value.toString());
}
throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
schema.getFullName(), schema);
case RECORD:
return convert((Map<String, Object>) value, schema);
case ARRAY:
Schema elementSchema = schema.getElementType();
List listRes = new ArrayList();
for (Object v : (List) value) {
listRes.add(typeConvert(v, name, elementSchema));
}
return listRes;
case MAP:
Schema valueSchema = schema.getValueType();
Map<String, Object> mapRes = new HashMap<String, Object>();
for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
}
return mapRes;
default:
throw new IllegalArgumentException(
"JsonConverter cannot handle type: " + schema.getType());
break;
case INT:
if (value instanceof Number) {
return ((Number) value).intValue();
}
throw new JsonConversionException(value, name, schema);
}
private boolean isOptional(Schema schema) {
return schema.getType().equals(Schema.Type.UNION) &&
schema.getTypes().size() == 2 &&
(schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
}
private Schema getNonNull(Schema schema) {
List<Schema> types = schema.getTypes();
return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
}
public static class JsonConversionException extends RuntimeException {
private Object value;
private String fieldName;
private Schema schema;
public JsonConversionException(Object value, String fieldName, Schema schema) {
this.value = value;
this.fieldName = fieldName;
this.schema = schema;
break;
case LONG:
if (value instanceof Number) {
return ((Number) value).longValue();
}
@Override
public String toString() {
return String.format("Type conversion error for field %s, %s for %s",
fieldName, value, schema);
break;
case STRING:
return value.toString();
case ENUM:
if (schema.getEnumSymbols().contains(value.toString())) {
return new GenericData.EnumSymbol(schema, value.toString());
}
throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
schema.getFullName(), schema);
case RECORD:
return convert((Map<String, Object>) value, schema);
case ARRAY:
Schema elementSchema = schema.getElementType();
List listRes = new ArrayList();
for (Object v : (List) value) {
listRes.add(typeConvert(v, name, elementSchema));
}
return listRes;
case MAP:
Schema valueSchema = schema.getValueType();
Map<String, Object> mapRes = new HashMap<String, Object>();
for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
}
return mapRes;
default:
throw new IllegalArgumentException(
"JsonConverter cannot handle type: " + schema.getType());
}
throw new JsonConversionException(value, name, schema);
}
private boolean isOptional(Schema schema) {
return schema.getType().equals(Schema.Type.UNION) &&
schema.getTypes().size() == 2 &&
(schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
}
private Schema getNonNull(Schema schema) {
List<Schema> types = schema.getTypes();
return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
}
public static class JsonConversionException extends RuntimeException {
private Object value;
private String fieldName;
private Schema schema;
public JsonConversionException(Object value, String fieldName, Schema schema) {
this.value = value;
this.fieldName = fieldName;
this.schema = schema;
}
@Override
public String toString() {
return String.format("Type conversion error for field %s, %s for %s",
fieldName, value, schema);
}
}
}

View File

@@ -17,84 +17,86 @@
package com.uber.hoodie.common;
import com.uber.hoodie.exception.HoodieIndexException;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.util.bloom.Key;
import org.apache.hadoop.util.hash.Hash;
import javax.xml.bind.DatatypeConverter;
import java.io.*;
import java.nio.charset.StandardCharsets;
/**
* A Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}.
*/
public class BloomFilter {
/**
* Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
*/
public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
private org.apache.hadoop.util.bloom.BloomFilter filter = null;
/**
* Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
*/
public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
public BloomFilter(int numEntries, double errorRate) {
this(numEntries, errorRate, Hash.MURMUR_HASH);
private org.apache.hadoop.util.bloom.BloomFilter filter = null;
public BloomFilter(int numEntries, double errorRate) {
this(numEntries, errorRate, Hash.MURMUR_HASH);
}
/**
* Create a new Bloom filter with the given configurations.
*/
public BloomFilter(int numEntries, double errorRate, int hashType) {
// Bit size
int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
// Number of the hash functions
int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
// The filter
this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
}
/**
* Create the bloom filter from serialized string.
*/
public BloomFilter(String filterStr) {
this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
try {
this.filter.readFields(dis);
dis.close();
} catch (IOException e) {
throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
}
}
/**
* Create a new Bloom filter with the given configurations.
*/
public BloomFilter(int numEntries, double errorRate, int hashType) {
// Bit size
int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
// Number of the hash functions
int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
// The filter
this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
public void add(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
/**
* Create the bloom filter from serialized string.
*/
public BloomFilter(String filterStr) {
this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
try {
this.filter.readFields(dis);
dis.close();
} catch (IOException e) {
throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
}
public boolean mightContain(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
public void add(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
public boolean mightContain(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
/**
* Serialize the bloom filter as a string.
*/
public String serializeToString() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
try {
filter.write(dos);
byte[] bytes = baos.toByteArray();
dos.close();
return DatatypeConverter.printBase64Binary(bytes);
} catch (IOException e) {
throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
}
/**
* Serialize the bloom filter as a string.
*/
public String serializeToString() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
try {
filter.write(dos);
byte[] bytes = baos.toByteArray();
dos.close();
return DatatypeConverter.printBase64Binary(bytes);
} catch (IOException e) {
throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
}
}
}

View File

@@ -18,7 +18,6 @@ package com.uber.hoodie.common;
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
@@ -27,100 +26,102 @@ import java.util.Optional;
* Collects stats about a single partition clean operation
*/
public class HoodieCleanStat implements Serializable {
// Policy used
private final HoodieCleaningPolicy policy;
// Partition path cleaned
private final String partitionPath;
// The patterns that were generated for the delete operation
private final List<String> deletePathPatterns;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
// Earliest commit that was retained in this clean
private final String earliestCommitToRetain;
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
List<String> deletePathPatterns, List<String> successDeleteFiles,
List<String> failedDeleteFiles, String earliestCommitToRetain) {
this.policy = policy;
this.partitionPath = partitionPath;
this.deletePathPatterns = deletePathPatterns;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
this.earliestCommitToRetain = earliestCommitToRetain;
// Policy used
private final HoodieCleaningPolicy policy;
// Partition path cleaned
private final String partitionPath;
// The patterns that were generated for the delete operation
private final List<String> deletePathPatterns;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
// Earliest commit that was retained in this clean
private final String earliestCommitToRetain;
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
List<String> deletePathPatterns, List<String> successDeleteFiles,
List<String> failedDeleteFiles, String earliestCommitToRetain) {
this.policy = policy;
this.partitionPath = partitionPath;
this.deletePathPatterns = deletePathPatterns;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
this.earliestCommitToRetain = earliestCommitToRetain;
}
public HoodieCleaningPolicy getPolicy() {
return policy;
}
public String getPartitionPath() {
return partitionPath;
}
public List<String> getDeletePathPatterns() {
return deletePathPatterns;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
}
public String getEarliestCommitToRetain() {
return earliestCommitToRetain;
}
public static HoodieCleanStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private HoodieCleaningPolicy policy;
private List<String> deletePathPatterns;
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private String partitionPath;
private String earliestCommitToRetain;
public Builder withPolicy(HoodieCleaningPolicy policy) {
this.policy = policy;
return this;
}
public HoodieCleaningPolicy getPolicy() {
return policy;
public Builder withDeletePathPattern(List<String> deletePathPatterns) {
this.deletePathPatterns = deletePathPatterns;
return this;
}
public String getPartitionPath() {
return partitionPath;
public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
this.successDeleteFiles = successDeleteFiles;
return this;
}
public List<String> getDeletePathPatterns() {
return deletePathPatterns;
public Builder withFailedDeletes(List<String> failedDeleteFiles) {
this.failedDeleteFiles = failedDeleteFiles;
return this;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
earliestCommitToRetain.get().getTimestamp() :
"-1";
return this;
}
public String getEarliestCommitToRetain() {
return earliestCommitToRetain;
}
public static HoodieCleanStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private HoodieCleaningPolicy policy;
private List<String> deletePathPatterns;
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private String partitionPath;
private String earliestCommitToRetain;
public Builder withPolicy(HoodieCleaningPolicy policy) {
this.policy = policy;
return this;
}
public Builder withDeletePathPattern(List<String> deletePathPatterns) {
this.deletePathPatterns = deletePathPatterns;
return this;
}
public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
this.successDeleteFiles = successDeleteFiles;
return this;
}
public Builder withFailedDeletes(List<String> failedDeleteFiles) {
this.failedDeleteFiles= failedDeleteFiles;
return this;
}
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
earliestCommitToRetain.get().getTimestamp() :
"-1";
return this;
}
public HoodieCleanStat build() {
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
}
public HoodieCleanStat build() {
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
}
}
}

View File

@@ -19,13 +19,6 @@ package com.uber.hoodie.common;
import com.uber.hoodie.avro.MercifulJsonConverter;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.exception.HoodieException;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.io.IOUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -34,75 +27,85 @@ import java.util.Optional;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.commons.io.IOUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload> {
private byte[] jsonDataCompressed;
private int dataSize;
public HoodieJsonPayload(String json) throws IOException {
this.jsonDataCompressed = compressData(json);
this.dataSize = json.length();
private byte[] jsonDataCompressed;
private int dataSize;
public HoodieJsonPayload(String json) throws IOException {
this.jsonDataCompressed = compressData(json);
this.dataSize = json.length();
}
@Override
public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
return this;
}
@Override
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
throws IOException {
return getInsertValue(schema);
}
@Override
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
return Optional.of(jsonConverter.convert(getJsonData()));
}
private String getJsonData() throws IOException {
return unCompressData(jsonDataCompressed);
}
private byte[] compressData(String jsonData) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
DeflaterOutputStream dos =
new DeflaterOutputStream(baos, deflater, true);
try {
dos.write(jsonData.getBytes());
} finally {
dos.flush();
dos.close();
// Its important to call this.
// Deflater takes off-heap native memory and does not release until GC kicks in
deflater.end();
}
return baos.toByteArray();
}
@Override public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
return this;
private String unCompressData(byte[] data) throws IOException {
InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
try {
StringWriter sw = new StringWriter(dataSize);
IOUtils.copy(iis, sw);
return sw.toString();
} finally {
iis.close();
}
}
@Override public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
return getInsertValue(schema);
private String getFieldFromJsonOrFail(String field) throws IOException {
JsonNode node = new ObjectMapper().readTree(getJsonData());
if (!node.has(field)) {
throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
}
return node.get(field).getTextValue();
}
@Override public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
return Optional.of(jsonConverter.convert(getJsonData()));
}
public String getRowKey(String keyColumnField) throws IOException {
return getFieldFromJsonOrFail(keyColumnField);
}
private String getJsonData() throws IOException {
return unCompressData(jsonDataCompressed);
}
private byte[] compressData(String jsonData) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
DeflaterOutputStream dos =
new DeflaterOutputStream(baos, deflater, true);
try {
dos.write(jsonData.getBytes());
} finally {
dos.flush();
dos.close();
// Its important to call this.
// Deflater takes off-heap native memory and does not release until GC kicks in
deflater.end();
}
return baos.toByteArray();
}
private String unCompressData(byte[] data) throws IOException {
InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
try {
StringWriter sw = new StringWriter(dataSize);
IOUtils.copy(iis, sw);
return sw.toString();
} finally {
iis.close();
}
}
private String getFieldFromJsonOrFail(String field) throws IOException {
JsonNode node = new ObjectMapper().readTree(getJsonData());
if(!node.has(field)) {
throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
}
return node.get(field).getTextValue();
}
public String getRowKey(String keyColumnField) throws IOException {
return getFieldFromJsonOrFail(keyColumnField);
}
public String getPartitionPath(String partitionPathField) throws IOException {
return getFieldFromJsonOrFail(partitionPathField);
}
public String getPartitionPath(String partitionPathField) throws IOException {
return getFieldFromJsonOrFail(partitionPathField);
}
}

View File

@@ -16,81 +16,82 @@
package com.uber.hoodie.common;
import org.apache.hadoop.fs.FileStatus;
import java.io.File;
import java.io.Serializable;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileStatus;
/**
* Collects stats about a single partition clean operation
*/
public class HoodieRollbackStat implements Serializable {
// Partition path
private final String partitionPath;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
// Count of HoodieLogFile to commandBlocks written for a particular rollback
private final Map<FileStatus, Long> commandBlocksCount;
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
this.partitionPath = partitionPath;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
this.commandBlocksCount = commandBlocksCount;
// Partition path
private final String partitionPath;
private final List<String> successDeleteFiles;
// Files that could not be deleted
private final List<String> failedDeleteFiles;
// Count of HoodieLogFile to commandBlocks written for a particular rollback
private final Map<FileStatus, Long> commandBlocksCount;
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
this.partitionPath = partitionPath;
this.successDeleteFiles = successDeleteFiles;
this.failedDeleteFiles = failedDeleteFiles;
this.commandBlocksCount = commandBlocksCount;
}
public Map<FileStatus, Long> getCommandBlocksCount() {
return commandBlocksCount;
}
public String getPartitionPath() {
return partitionPath;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
}
public static HoodieRollbackStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private Map<FileStatus, Long> commandBlocksCount;
private String partitionPath;
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
//noinspection Convert2MethodRef
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
return this;
}
public Map<FileStatus, Long> getCommandBlocksCount() {
return commandBlocksCount;
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
this.commandBlocksCount = commandBlocksCount;
return this;
}
public String getPartitionPath() {
return partitionPath;
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public List<String> getSuccessDeleteFiles() {
return successDeleteFiles;
}
public List<String> getFailedDeleteFiles() {
return failedDeleteFiles;
}
public static HoodieRollbackStat.Builder newBuilder() {
return new Builder();
}
public static class Builder {
private List<String> successDeleteFiles;
private List<String> failedDeleteFiles;
private Map<FileStatus, Long> commandBlocksCount;
private String partitionPath;
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
//noinspection Convert2MethodRef
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
return this;
}
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
this.commandBlocksCount = commandBlocksCount;
return this;
}
public Builder withPartitionPath(String partitionPath) {
this.partitionPath = partitionPath;
return this;
}
public HoodieRollbackStat build() {
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
}
public HoodieRollbackStat build() {
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles,
commandBlocksCount);
}
}
}

View File

@@ -17,5 +17,5 @@
package com.uber.hoodie.common.model;
public enum ActionType {
commit, savepoint, compaction, clean, rollback;
commit, savepoint, compaction, clean, rollback;
}

View File

@@ -17,13 +17,7 @@
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
@JsonIgnoreProperties(ignoreUnknown = true)
public class CompactionWriteStat implements Serializable {
@@ -34,7 +28,8 @@ public class CompactionWriteStat implements Serializable {
private long totalLogFiles;
private long totalRecordsToBeUpdate;
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles, long totalLogRecords,
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles,
long totalLogRecords,
long totalRecordsToUpdate) {
this.writeStat = writeStat;
this.partitionPath = partitionPath;
@@ -58,6 +53,7 @@ public class CompactionWriteStat implements Serializable {
public long getTotalRecordsToBeUpdate() {
return totalRecordsToBeUpdate;
}
public HoodieWriteStat getHoodieWriteStat() {
return writeStat;
}

View File

@@ -19,79 +19,75 @@
package com.uber.hoodie.common.model;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Within a file group, a slice is a combination of data file written at a commit time
* and list of log files, containing changes to the data file from that commit time
* Within a file group, a slice is a combination of data file written at a commit time and list of
* log files, containing changes to the data file from that commit time
*/
public class FileSlice implements Serializable {
/**
* id of the slice
*/
private String fileId;
/**
* id of the slice
*/
private String fileId;
/**
* Point in the timeline, at which the slice was created
*/
private String baseCommitTime;
/**
* Point in the timeline, at which the slice was created
*/
private String baseCommitTime;
/**
* data file, with the compacted data, for this slice
*
*/
private HoodieDataFile dataFile;
/**
* data file, with the compacted data, for this slice
*/
private HoodieDataFile dataFile;
/**
* List of appendable log files with real time data
* - Sorted with greater log version first
* - Always empty for copy_on_write storage.
*/
private final TreeSet<HoodieLogFile> logFiles;
/**
* List of appendable log files with real time data - Sorted with greater log version first -
* Always empty for copy_on_write storage.
*/
private final TreeSet<HoodieLogFile> logFiles;
public FileSlice(String baseCommitTime, String fileId) {
this.fileId = fileId;
this.baseCommitTime = baseCommitTime;
this.dataFile = null;
this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
}
public FileSlice(String baseCommitTime, String fileId) {
this.fileId = fileId;
this.baseCommitTime = baseCommitTime;
this.dataFile = null;
this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
}
public void setDataFile(HoodieDataFile dataFile) {
this.dataFile = dataFile;
}
public void setDataFile(HoodieDataFile dataFile) {
this.dataFile = dataFile;
}
public void addLogFile(HoodieLogFile logFile) {
this.logFiles.add(logFile);
}
public void addLogFile(HoodieLogFile logFile) {
this.logFiles.add(logFile);
}
public Stream<HoodieLogFile> getLogFiles() {
return logFiles.stream();
}
public Stream<HoodieLogFile> getLogFiles() {
return logFiles.stream();
}
public String getBaseCommitTime() {
return baseCommitTime;
}
public String getBaseCommitTime() {
return baseCommitTime;
}
public String getFileId() {
return fileId;
}
public String getFileId() {
return fileId;
}
public Optional<HoodieDataFile> getDataFile() {
return Optional.ofNullable(dataFile);
}
public Optional<HoodieDataFile> getDataFile() {
return Optional.ofNullable(dataFile);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FileSlice {");
sb.append("baseCommitTime=").append(baseCommitTime);
sb.append(", dataFile='").append(dataFile).append('\'');
sb.append(", logFiles='").append(logFiles).append('\'');
sb.append('}');
return sb.toString();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("FileSlice {");
sb.append("baseCommitTime=").append(baseCommitTime);
sb.append(", dataFile='").append(dataFile).append('\'');
sb.append(", logFiles='").append(logFiles).append('\'');
sb.append('}');
return sb.toString();
}
}

View File

@@ -23,18 +23,18 @@ import org.apache.hadoop.fs.Path;
public class HoodieArchivedLogFile extends HoodieLogFile {
public static final String ARCHIVE_EXTENSION = ".archive";
public static final String ARCHIVE_EXTENSION = ".archive";
public HoodieArchivedLogFile(FileStatus fileStatus) {
super(fileStatus);
}
public HoodieArchivedLogFile(FileStatus fileStatus) {
super(fileStatus);
}
public HoodieArchivedLogFile(Path logPath) {
super(logPath);
}
public HoodieArchivedLogFile(Path logPath) {
super(logPath);
}
@Override
public String toString() {
return "HoodieArchivedLogFile {" + super.getPath() + '}';
}
@Override
public String toString() {
return "HoodieArchivedLogFile {" + super.getPath() + '}';
}
}

View File

@@ -17,40 +17,37 @@
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.HoodieAvroUtils;
import java.io.IOException;
import java.util.Optional;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
/**
* This is a payload to wrap a existing Hoodie Avro Record.
* Useful to create a HoodieRecord over existing GenericRecords in a hoodie datasets (useful in compactions)
*
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over
* existing GenericRecords in a hoodie datasets (useful in compactions)
*/
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
private final Optional<GenericRecord> record;
public HoodieAvroPayload(Optional<GenericRecord> record) {
this.record = record;
}
private final Optional<GenericRecord> record;
@Override
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
return this;
}
public HoodieAvroPayload(Optional<GenericRecord> record) {
this.record = record;
}
@Override
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException {
return getInsertValue(schema);
}
@Override
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
return this;
}
@Override
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
}
@Override
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException {
return getInsertValue(schema);
}
@Override
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
}
}

View File

@@ -17,6 +17,6 @@
package com.uber.hoodie.common.model;
public enum HoodieCleaningPolicy {
KEEP_LATEST_FILE_VERSIONS,
KEEP_LATEST_COMMITS
KEEP_LATEST_FILE_VERSIONS,
KEEP_LATEST_COMMITS
}

View File

@@ -17,8 +17,13 @@
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.DeserializationFeature;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
@@ -27,196 +32,195 @@ import org.codehaus.jackson.annotate.JsonMethod;
import org.codehaus.jackson.map.DeserializationConfig.Feature;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* All the metadata that gets stored along with a commit.
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieCommitMetadata implements Serializable {
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
private Map<String, String> extraMetadataMap;
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
public HoodieCommitMetadata() {
extraMetadataMap = new HashMap<>();
partitionToWriteStats = new HashMap<>();
private Map<String, String> extraMetadataMap;
public HoodieCommitMetadata() {
extraMetadataMap = new HashMap<>();
partitionToWriteStats = new HashMap<>();
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
if (!partitionToWriteStats.containsKey(partitionPath)) {
partitionToWriteStats.put(partitionPath, new ArrayList<>());
}
partitionToWriteStats.get(partitionPath).add(stat);
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
if (!partitionToWriteStats.containsKey(partitionPath)) {
partitionToWriteStats.put(partitionPath, new ArrayList<>());
public void addMetadata(String metaKey, String value) {
extraMetadataMap.put(metaKey, value);
}
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
return partitionToWriteStats.get(partitionPath);
}
public Map<String, String> getExtraMetadata() {
return extraMetadataMap;
}
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
return partitionToWriteStats;
}
public String getMetadata(String metaKey) {
return extraMetadataMap.get(metaKey);
}
public HashMap<String, String> getFileIdAndRelativePaths() {
HashMap<String, String> filePaths = new HashMap<>();
// list all partitions paths
for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat : entry.getValue()) {
filePaths.put(stat.getFileId(), stat.getPath());
}
}
return filePaths;
}
public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
HashMap<String, String> fullPaths = new HashMap<>();
for (Map.Entry<String, String> entry : getFileIdAndRelativePaths().entrySet()) {
String fullPath =
(entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
fullPaths.put(entry.getKey(), fullPath);
}
return fullPaths;
}
public String toJsonString() throws IOException {
if (partitionToWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToWriteStats.get(null));
partitionToWriteStats.remove(null);
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
}
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file (no data or somethings bad happen).
return new HoodieCommitMetadata();
}
ObjectMapper mapper = new ObjectMapper();
mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
}
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
public long fetchTotalPartitionsWritten() {
return partitionToWriteStats.size();
}
public long fetchTotalFilesInsert() {
long totalFilesInsert = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalFilesInsert++;
}
partitionToWriteStats.get(partitionPath).add(stat);
}
}
return totalFilesInsert;
}
public void addMetadata(String metaKey, String value) {
extraMetadataMap.put(metaKey, value);
}
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
return partitionToWriteStats.get(partitionPath);
}
public Map<String, String> getExtraMetadata() { return extraMetadataMap; }
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
return partitionToWriteStats;
}
public String getMetadata(String metaKey) {
return extraMetadataMap.get(metaKey);
}
public HashMap<String, String> getFileIdAndRelativePaths() {
HashMap<String, String> filePaths = new HashMap<>();
// list all partitions paths
for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat: entry.getValue()) {
filePaths.put(stat.getFileId(), stat.getPath());
}
public long fetchTotalFilesUpdated() {
long totalFilesUpdated = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
totalFilesUpdated++;
}
return filePaths;
}
}
return totalFilesUpdated;
}
public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
HashMap<String, String> fullPaths = new HashMap<>();
for (Map.Entry<String, String> entry: getFileIdAndRelativePaths().entrySet()) {
String fullPath = (entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
fullPaths.put(entry.getKey(), fullPath);
} return fullPaths;
public long fetchTotalUpdateRecordsWritten() {
long totalUpdateRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
}
}
return totalUpdateRecordsWritten;
}
public String toJsonString() throws IOException {
if(partitionToWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToWriteStats.get(null));
partitionToWriteStats.remove(null);
public long fetchTotalInsertRecordsWritten() {
long totalInsertRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalInsertRecordsWritten += stat.getNumWrites();
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
}
}
return totalInsertRecordsWritten;
}
public long fetchTotalRecordsWritten() {
long totalRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalRecordsWritten += stat.getNumWrites();
}
}
return totalRecordsWritten;
}
public long fetchTotalBytesWritten() {
long totalBytesWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalBytesWritten += stat.getTotalWriteBytes();
}
}
return totalBytesWritten;
}
public long fetchTotalWriteErrors() {
long totalWriteErrors = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalWriteErrors += stat.getTotalWriteErrors();
}
}
return totalWriteErrors;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file (no data or somethings bad happen).
return new HoodieCommitMetadata();
}
ObjectMapper mapper = new ObjectMapper();
mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
}
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
public long fetchTotalPartitionsWritten() {
return partitionToWriteStats.size();
}
return partitionToWriteStats != null ?
partitionToWriteStats.equals(that.partitionToWriteStats) :
that.partitionToWriteStats == null;
public long fetchTotalFilesInsert() {
long totalFilesInsert = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalFilesInsert ++;
}
}
}
return totalFilesInsert;
}
}
public long fetchTotalFilesUpdated() {
long totalFilesUpdated = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
totalFilesUpdated ++;
}
}
}
return totalFilesUpdated;
}
@Override
public int hashCode() {
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
}
public long fetchTotalUpdateRecordsWritten() {
long totalUpdateRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
}
}
return totalUpdateRecordsWritten;
}
public long fetchTotalInsertRecordsWritten() {
long totalInsertRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalInsertRecordsWritten += stat.getNumWrites();
}
}
}
return totalInsertRecordsWritten;
}
public long fetchTotalRecordsWritten() {
long totalRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalRecordsWritten += stat.getNumWrites();
}
}
return totalRecordsWritten;
}
public long fetchTotalBytesWritten() {
long totalBytesWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalBytesWritten += stat.getTotalWriteBytes();
}
}
return totalBytesWritten;
}
public long fetchTotalWriteErrors() {
long totalWriteErrors = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalWriteErrors += stat.getTotalWriteErrors();
}
}
return totalWriteErrors;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
return partitionToWriteStats != null ?
partitionToWriteStats.equals(that.partitionToWriteStats) :
that.partitionToWriteStats == null;
}
@Override
public int hashCode() {
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
}
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
}
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
}
}

View File

@@ -16,15 +16,12 @@
package com.uber.hoodie.common.model;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.BinaryOperator;
import java.util.function.Supplier;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.codehaus.jackson.annotate.JsonAutoDetect;
@@ -33,9 +30,11 @@ import org.codehaus.jackson.map.DeserializationConfig.Feature;
import org.codehaus.jackson.map.ObjectMapper;
/**
* Place holder for the compaction specific meta-data, uses all the details used in a normal HoodieCommitMetadata
* Place holder for the compaction specific meta-data, uses all the details used in a normal
* HoodieCommitMetadata
*/
public class HoodieCompactionMetadata extends HoodieCommitMetadata {
private static volatile Logger log = LogManager.getLogger(HoodieCompactionMetadata.class);
protected HashMap<String, List<CompactionWriteStat>> partitionToCompactionWriteStats;
@@ -60,7 +59,7 @@ public class HoodieCompactionMetadata extends HoodieCommitMetadata {
}
public String toJsonString() throws IOException {
if(partitionToCompactionWriteStats.containsKey(null)) {
if (partitionToCompactionWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToCompactionWriteStats.get(null));
partitionToCompactionWriteStats.remove(null);
}

View File

@@ -17,56 +17,54 @@
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import java.io.Serializable;
import java.util.Comparator;
import org.apache.hadoop.fs.FileStatus;
public class HoodieDataFile implements Serializable {
private FileStatus fileStatus;
public HoodieDataFile(FileStatus fileStatus) {
this.fileStatus = fileStatus;
}
private FileStatus fileStatus;
public String getFileId() {
return FSUtils.getFileId(fileStatus.getPath().getName());
}
public HoodieDataFile(FileStatus fileStatus) {
this.fileStatus = fileStatus;
}
public String getCommitTime() {
return FSUtils.getCommitTime(fileStatus.getPath().getName());
}
public String getFileId() {
return FSUtils.getFileId(fileStatus.getPath().getName());
}
public String getPath() {
return fileStatus.getPath().toString();
}
public String getCommitTime() {
return FSUtils.getCommitTime(fileStatus.getPath().getName());
}
public String getFileName() {
return fileStatus.getPath().getName();
}
public String getPath() {
return fileStatus.getPath().toString();
}
public FileStatus getFileStatus() {
return fileStatus;
}
public String getFileName() {
return fileStatus.getPath().getName();
}
public static Comparator<HoodieDataFile> getCommitTimeComparator() {
return (o1, o2) -> {
// reverse the order
return o2.getCommitTime().compareTo(o1.getCommitTime());
};
}
public FileStatus getFileStatus() {
return fileStatus;
}
public long getFileSize() {
return fileStatus.getLen();
}
public static Comparator<HoodieDataFile> getCommitTimeComparator() {
return (o1, o2) -> {
// reverse the order
return o2.getCommitTime().compareTo(o1.getCommitTime());
};
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieDataFile {");
sb.append("fileStatus=").append(fileStatus);
sb.append('}');
return sb.toString();
}
public long getFileSize() {
return fileStatus.getLen();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieDataFile {");
sb.append("fileStatus=").append(fileStatus);
sb.append('}');
return sb.toString();
}
}

View File

@@ -24,22 +24,22 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieDeltaWriteStat extends HoodieWriteStat {
private int logVersion;
private long logOffset;
private int logVersion;
private long logOffset;
public void setLogVersion(int logVersion) {
this.logVersion = logVersion;
}
public void setLogVersion(int logVersion) {
this.logVersion = logVersion;
}
public int getLogVersion() {
return logVersion;
}
public int getLogVersion() {
return logVersion;
}
public void setLogOffset(long logOffset) {
this.logOffset = logOffset;
}
public void setLogOffset(long logOffset) {
this.logOffset = logOffset;
}
public long getLogOffset() {
return logOffset;
}
public long getLogOffset() {
return logOffset;
}
}

View File

@@ -17,15 +17,15 @@
package com.uber.hoodie.common.model;
public enum HoodieFileFormat {
PARQUET(".parquet"), HOODIE_LOG(".log");
PARQUET(".parquet"), HOODIE_LOG(".log");
private final String extension;
private final String extension;
HoodieFileFormat(String extension) {
this.extension = extension;
}
HoodieFileFormat(String extension) {
this.extension = extension;
}
public String getFileExtension() {
return extension;
}
public String getFileExtension() {
return extension;
}
}

View File

@@ -20,9 +20,6 @@ package com.uber.hoodie.common.model;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import org.apache.commons.lang3.tuple.Pair;
import java.io.Serializable;
import java.util.Comparator;
import java.util.List;
@@ -35,212 +32,184 @@ import java.util.stream.Stream;
*/
public class HoodieFileGroup implements Serializable {
public static Comparator<String> getReverseCommitTimeComparator() {
return (o1, o2) -> {
// reverse the order
return o2.compareTo(o1);
};
public static Comparator<String> getReverseCommitTimeComparator() {
return (o1, o2) -> {
// reverse the order
return o2.compareTo(o1);
};
}
/**
* Partition containing the file group.
*/
private final String partitionPath;
/**
* uniquely identifies the file group
*/
private final String id;
/**
* Slices of files in this group, sorted with greater commit first.
*/
private final TreeMap<String, FileSlice> fileSlices;
/**
* Timeline, based on which all getter work
*/
private final HoodieTimeline timeline;
/**
* The last completed instant, that acts as a high watermark for all getters
*/
private final Optional<HoodieInstant> lastInstant;
public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
this.partitionPath = partitionPath;
this.id = id;
this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
this.timeline = timeline;
this.lastInstant = timeline.lastInstant();
}
/**
* Add a new datafile into the file group
*/
public void addDataFile(HoodieDataFile dataFile) {
if (!fileSlices.containsKey(dataFile.getCommitTime())) {
fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
}
fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
}
/**
* Partition containing the file group.
*/
private final String partitionPath;
/**
* uniquely identifies the file group
*/
private final String id;
/**
* Slices of files in this group, sorted with greater commit first.
*/
private final TreeMap<String, FileSlice> fileSlices;
/**
* Timeline, based on which all getter work
*/
private final HoodieTimeline timeline;
/**
* The last completed instant, that acts as a high watermark for all
* getters
*/
private final Optional<HoodieInstant> lastInstant;
public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
this.partitionPath = partitionPath;
this.id = id;
this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
this.timeline = timeline;
this.lastInstant = timeline.lastInstant();
/**
* Add a new log file into the group
*/
public void addLogFile(HoodieLogFile logFile) {
if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
}
fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
}
/**
* Add a new datafile into the file group
*
* @param dataFile
*/
public void addDataFile(HoodieDataFile dataFile) {
if (!fileSlices.containsKey(dataFile.getCommitTime())) {
fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
}
fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
public String getId() {
return id;
}
public String getPartitionPath() {
return partitionPath;
}
/**
* A FileSlice is considered committed, if one of the following is true - There is a committed
* data file - There are some log files, that are based off a commit or delta commit
*/
private boolean isFileSliceCommitted(FileSlice slice) {
String maxCommitTime = lastInstant.get().getTimestamp();
return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL);
}
/**
* Provides a stream of committed file slices, sorted reverse base commit time.
*/
public Stream<FileSlice> getAllFileSlices() {
if (!timeline.empty()) {
return fileSlices.entrySet().stream()
.map(sliceEntry -> sliceEntry.getValue())
.filter(slice -> isFileSliceCommitted(slice));
}
return Stream.empty();
}
/**
* Add a new log file into the group
*
* @param logFile
*/
public void addLogFile(HoodieLogFile logFile) {
if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
}
fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
/**
* Gets the latest slice - this can contain either
*
* - just the log files without data file - (or) data file with 0 or more log files
*/
public Optional<FileSlice> getLatestFileSlice() {
// there should always be one
return getAllFileSlices().findFirst();
}
/**
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
*/
public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
return getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
return getAllFileSlices()
.filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
.findFirst();
}
/**
* Stream of committed data files, sorted reverse commit time
*/
public Stream<HoodieDataFile> getAllDataFiles() {
return getAllFileSlices()
.filter(slice -> slice.getDataFile().isPresent())
.map(slice -> slice.getDataFile().get());
}
/**
* Get the latest committed data file
*/
public Optional<HoodieDataFile> getLatestDataFile() {
return getAllDataFiles().findFirst();
}
/**
* Get the latest data file, that is <= max commit time
*/
public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
return getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
/**
* Get the latest data file, that is contained within the provided commit range.
*/
public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
return getAllDataFiles()
.filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
.findFirst();
}
/**
* Obtain the latest log file (based on latest committed data file), currently being appended to
*
* @return logfile if present, empty if no log file has been opened already.
*/
public Optional<HoodieLogFile> getLatestLogFile() {
Optional<FileSlice> latestSlice = getLatestFileSlice();
if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
return latestSlice.get().getLogFiles().findFirst();
}
return Optional.empty();
}
public String getId() {
return id;
}
public String getPartitionPath() {
return partitionPath;
}
/**
* A FileSlice is considered committed, if one of the following is true
* - There is a committed data file
* - There are some log files, that are based off a commit or delta commit
*
* @param slice
* @return
*/
private boolean isFileSliceCommitted(FileSlice slice) {
String maxCommitTime = lastInstant.get().getTimestamp();
return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL);
}
/**
* Provides a stream of committed file slices, sorted reverse base commit time.
*
* @return
*/
public Stream<FileSlice> getAllFileSlices() {
if (!timeline.empty()) {
return fileSlices.entrySet().stream()
.map(sliceEntry -> sliceEntry.getValue())
.filter(slice -> isFileSliceCommitted(slice));
}
return Stream.empty();
}
/**
* Gets the latest slice - this can contain either
*
* - just the log files without data file
* - (or) data file with 0 or more log files
*
* @return
*/
public Optional<FileSlice> getLatestFileSlice() {
// there should always be one
return getAllFileSlices().findFirst();
}
/**
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
*
* @param maxCommitTime
* @return
*/
public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
return getAllFileSlices()
.filter(slice ->
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
return getAllFileSlices()
.filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
.findFirst();
}
/**
* Stream of committed data files, sorted reverse commit time
*
* @return
*/
public Stream<HoodieDataFile> getAllDataFiles() {
return getAllFileSlices()
.filter(slice -> slice.getDataFile().isPresent())
.map(slice -> slice.getDataFile().get());
}
/**
* Get the latest committed data file
*
* @return
*/
public Optional<HoodieDataFile> getLatestDataFile() {
return getAllDataFiles().findFirst();
}
/**
* Get the latest data file, that is <= max commit time
*
* @param maxCommitTime
* @return
*/
public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
return getAllDataFiles()
.filter(dataFile ->
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
maxCommitTime,
HoodieTimeline.LESSER_OR_EQUAL))
.findFirst();
}
/**
* Get the latest data file, that is contained within the provided commit range.
*
* @param commitRange
* @return
*/
public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
return getAllDataFiles()
.filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
.findFirst();
}
/**
* Obtain the latest log file (based on latest committed data file),
* currently being appended to
*
* @return logfile if present, empty if no log file has been opened already.
*/
public Optional<HoodieLogFile> getLatestLogFile() {
Optional<FileSlice> latestSlice = getLatestFileSlice();
if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
return latestSlice.get().getLogFiles().findFirst();
}
return Optional.empty();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
sb.append("id=").append(id);
sb.append(", fileSlices='").append(fileSlices).append('\'');
sb.append('}');
return sb.toString();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
sb.append("id=").append(id);
sb.append(", fileSlices='").append(fileSlices).append('\'');
sb.append('}');
return sb.toString();
}
}

View File

@@ -17,57 +17,58 @@
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* HoodieKey consists of
*
* - recordKey : a recordKey that acts as primary key for a record
* - partitionPath : path to the partition that contains the record
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
* partition that contains the record
*/
public class HoodieKey implements Serializable {
private final String recordKey;
private final String recordKey;
private final String partitionPath;
private final String partitionPath;
public HoodieKey(String recordKey, String partitionPath) {
this.recordKey = recordKey;
this.partitionPath = partitionPath;
public HoodieKey(String recordKey, String partitionPath) {
this.recordKey = recordKey;
this.partitionPath = partitionPath;
}
public String getRecordKey() {
return recordKey;
}
public String getPartitionPath() {
return partitionPath;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
public String getRecordKey() {
return recordKey;
if (o == null || getClass() != o.getClass()) {
return false;
}
HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey) &&
Objects.equal(partitionPath, otherKey.partitionPath);
}
public String getPartitionPath() {
return partitionPath;
}
@Override
public int hashCode() {
return Objects.hashCode(recordKey, partitionPath);
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey) &&
Objects.equal(partitionPath, otherKey.partitionPath);
}
@Override
public int hashCode() {
return Objects.hashCode(recordKey, partitionPath);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieKey {");
sb.append(" recordKey=").append(recordKey);
sb.append(" partitionPath=").append(partitionPath);
sb.append('}');
return sb.toString();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieKey {");
sb.append(" recordKey=").append(recordKey);
sb.append(" partitionPath=").append(partitionPath);
sb.append('}');
return sb.toString();
}
}

View File

@@ -19,13 +19,13 @@
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.io.Serializable;
import java.util.Comparator;
import java.util.Optional;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Abstracts a single log file. Contains methods to extract metadata like the fileId, version and
@@ -34,73 +34,74 @@ import java.util.Optional;
* Also contains logic to roll-over the log file
*/
public class HoodieLogFile implements Serializable {
public static final String DELTA_EXTENSION = ".log";
private final Path path;
private Optional<FileStatus> fileStatus;
public static final String DELTA_EXTENSION = ".log";
public HoodieLogFile(FileStatus fileStatus) {
this(fileStatus.getPath());
this.fileStatus = Optional.of(fileStatus);
}
private final Path path;
private Optional<FileStatus> fileStatus;
public HoodieLogFile(Path logPath) {
this.path = logPath;
this.fileStatus = Optional.empty();
}
public HoodieLogFile(FileStatus fileStatus) {
this(fileStatus.getPath());
this.fileStatus = Optional.of(fileStatus);
}
public String getFileId() {
return FSUtils.getFileIdFromLogPath(path);
}
public HoodieLogFile(Path logPath) {
this.path = logPath;
this.fileStatus = Optional.empty();
}
public String getBaseCommitTime() {
return FSUtils.getBaseCommitTimeFromLogPath(path);
}
public String getFileId() {
return FSUtils.getFileIdFromLogPath(path);
}
public int getLogVersion() {
return FSUtils.getFileVersionFromLog(path);
}
public String getBaseCommitTime() {
return FSUtils.getBaseCommitTimeFromLogPath(path);
}
public String getFileExtension() {
return FSUtils.getFileExtensionFromLog(path);
}
public int getLogVersion() {
return FSUtils.getFileVersionFromLog(path);
}
public Path getPath() {
return path;
}
public String getFileExtension() {
return FSUtils.getFileExtensionFromLog(path);
}
public String getFileName() {
return path.getName();
}
public Path getPath() {
return path;
}
public Optional<FileStatus> getFileStatus() {
return fileStatus;
}
public String getFileName() {
return path.getName();
}
public Optional<Long> getFileSize() {
return fileStatus.map(FileStatus::getLen);
}
public Optional<FileStatus> getFileStatus() {
return fileStatus;
}
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
String fileId = getFileId();
String baseCommitTime = getBaseCommitTime();
String extension = "." + FSUtils.getFileExtensionFromLog(path);
int newVersion = FSUtils
.computeNextLogVersion(fs, path.getParent(), fileId,
extension, baseCommitTime);
return new HoodieLogFile(new Path(path.getParent(),
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
}
public Optional<Long> getFileSize() {
return fileStatus.map(FileStatus::getLen);
}
public static Comparator<HoodieLogFile> getLogVersionComparator() {
return (o1, o2) -> {
// reverse the order
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
};
}
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
String fileId = getFileId();
String baseCommitTime = getBaseCommitTime();
String extension = "." + FSUtils.getFileExtensionFromLog(path);
int newVersion = FSUtils
.computeNextLogVersion(fs, path.getParent(), fileId,
extension, baseCommitTime);
return new HoodieLogFile(new Path(path.getParent(),
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
}
@Override
public String toString() {
return "HoodieLogFile {" + path + '}';
}
public static Comparator<HoodieLogFile> getLogVersionComparator() {
return (o1, o2) -> {
// reverse the order
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
};
}
@Override
public String toString() {
return "HoodieLogFile {" + path + '}';
}
}

View File

@@ -17,7 +17,8 @@
package com.uber.hoodie.common.model;
import com.uber.hoodie.exception.HoodieException;
import java.io.IOException;
import java.util.Properties;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -25,117 +26,119 @@ import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.Properties;
/**
* The metadata that goes into the meta file in each partition
*/
public class HoodiePartitionMetadata {
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
public static final String COMMIT_TIME_KEY = "commitTime";
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
public static final String COMMIT_TIME_KEY = "commitTime";
/**
* Contents of the metadata
*/
private final Properties props;
/**
* Contents of the metadata
*/
private final Properties props;
/**
* Path to the partition, about which we have the metadata
*/
private final Path partitionPath;
/**
* Path to the partition, about which we have the metadata
*/
private final Path partitionPath;
private final FileSystem fs;
private final FileSystem fs;
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
/**
* Construct metadata from existing partition
*/
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
this.fs = fs;
this.props = new Properties();
this.partitionPath = partitionPath;
/**
* Construct metadata from existing partition
*/
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
this.fs = fs;
this.props = new Properties();
this.partitionPath = partitionPath;
}
/**
* Construct metadata object to be written out.
*/
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath,
Path partitionPath) {
this(fs, partitionPath);
props.setProperty(COMMIT_TIME_KEY, commitTime);
props
.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
}
public int getPartitionDepth() {
if (!props.containsKey(PARTITION_DEPTH_KEY)) {
throw new HoodieException("Could not find partitionDepth in partition metafile");
}
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
}
/**
* Construct metadata object to be written out.
*/
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
this(fs, partitionPath);
props.setProperty(COMMIT_TIME_KEY, commitTime);
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
}
/**
* Write the metadata safely into partition atomically.
*/
public void trySave(int taskPartitionId) {
Path tmpMetaPath = new Path(partitionPath,
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
boolean metafileExists = false;
public int getPartitionDepth() {
if (!props.containsKey(PARTITION_DEPTH_KEY)) {
throw new HoodieException("Could not find partitionDepth in partition metafile");
}
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
}
/**
* Write the metadata safely into partition atomically.
*
* @param taskPartitionId
*/
public void trySave(int taskPartitionId) {
Path tmpMetaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
boolean metafileExists = false;
try {
metafileExists = fs.exists(metaPath);
if (!metafileExists) {
// write to temporary file
FSDataOutputStream os = fs.create(tmpMetaPath, true);
props.store(os, "partition metadata");
os.hsync();
os.hflush();
os.close();
// move to actual path
fs.rename(tmpMetaPath, metaPath);
}
} catch (IOException ioe) {
log.warn(
"Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), "
+
partitionPath, ioe);
} finally {
if (!metafileExists) {
try {
metafileExists = fs.exists(metaPath);
if (!metafileExists) {
// write to temporary file
FSDataOutputStream os = fs.create(tmpMetaPath, true);
props.store(os, "partition metadata");
os.hsync();
os.hflush();
os.close();
// move to actual path
fs.rename(tmpMetaPath, metaPath);
}
// clean up tmp file, if still lying around
if (fs.exists(tmpMetaPath)) {
fs.delete(tmpMetaPath, false);
}
} catch (IOException ioe) {
log.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " +
partitionPath, ioe);
} finally {
if (!metafileExists) {
try {
// clean up tmp file, if still lying around
if (fs.exists(tmpMetaPath)) {
fs.delete(tmpMetaPath, false);
}
} catch (IOException ioe) {
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
}
}
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
}
}
}
}
/**
* Read out the metadata for this partition
*/
public void readFromFS() {
try {
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
FSDataInputStream is = fs.open(metaFile);
props.load(is);
} catch (IOException ioe) {
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
}
/**
* Read out the metadata for this partition
*/
public void readFromFS() {
try {
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
FSDataInputStream is = fs.open(metaFile);
props.load(is);
} catch (IOException ioe) {
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath,
ioe);
}
}
// methods related to partition meta data
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
try {
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
} catch (IOException ioe) {
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
}
// methods related to partition meta data
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
try {
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
} catch (IOException ioe) {
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath,
ioe);
}
}
}

View File

@@ -17,7 +17,6 @@
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
import java.util.Optional;
@@ -26,129 +25,131 @@ import java.util.Optional;
*/
public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
/**
* Identifies the record across the table
*/
private HoodieKey key;
/**
* Identifies the record across the table
*/
private HoodieKey key;
/**
* Actual payload of the record
*/
private T data;
/**
* Actual payload of the record
*/
private T data;
/**
* Current location of record on storage. Filled in by looking up index
*/
private HoodieRecordLocation currentLocation;
/**
* Current location of record on storage. Filled in by looking up index
*/
private HoodieRecordLocation currentLocation;
/**
* New location of record on storage, after written
*/
private HoodieRecordLocation newLocation;
/**
* New location of record on storage, after written
*/
private HoodieRecordLocation newLocation;
public HoodieRecord(HoodieKey key, T data) {
this.key = key;
this.data = data;
this.currentLocation = null;
this.newLocation = null;
public HoodieRecord(HoodieKey key, T data) {
this.key = key;
this.data = data;
this.currentLocation = null;
this.newLocation = null;
}
public HoodieKey getKey() {
return key;
}
public T getData() {
if (data == null) {
throw new IllegalStateException("Payload already deflated for record.");
}
return data;
}
public HoodieKey getKey() {
return key;
/**
* Release the actual payload, to ease memory pressure. To be called after the record has been
* written to storage. Once deflated, cannot be inflated.
*/
public void deflate() {
this.data = null;
}
/**
* Sets the current currentLocation of the record. This should happen exactly-once
*/
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
assert currentLocation == null;
this.currentLocation = location;
return this;
}
public HoodieRecordLocation getCurrentLocation() {
return currentLocation;
}
/**
* Sets the new currentLocation of the record, after being written. This again should happen
* exactly-once.
*/
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
assert newLocation == null;
this.newLocation = location;
return this;
}
public Optional<HoodieRecordLocation> getNewLocation() {
return Optional.of(this.newLocation);
}
public boolean isCurrentLocationKnown() {
return this.currentLocation != null;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
public T getData() {
if (data == null) {
throw new IllegalStateException("Payload already deflated for record.");
}
return data;
if (o == null || getClass() != o.getClass()) {
return false;
}
HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key) &&
Objects.equal(data, that.data) &&
Objects.equal(currentLocation, that.currentLocation) &&
Objects.equal(newLocation, that.newLocation);
}
/**
* Release the actual payload, to ease memory pressure. To be called after the record
* has been written to storage. Once deflated, cannot be inflated.
*/
public void deflate() {
this.data = null;
}
@Override
public int hashCode() {
return Objects.hashCode(key, data, currentLocation, newLocation);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecord{");
sb.append("key=").append(key);
sb.append(", currentLocation='").append(currentLocation).append('\'');
sb.append(", newLocation='").append(newLocation).append('\'');
sb.append('}');
return sb.toString();
}
/**
* Sets the current currentLocation of the record. This should happen exactly-once
*/
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
assert currentLocation == null;
this.currentLocation = location;
return this;
}
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
return commitTime + "_" + partitionId + "_" + recordIndex;
}
public HoodieRecordLocation getCurrentLocation() {
return currentLocation;
}
public String getPartitionPath() {
assert key != null;
return key.getPartitionPath();
}
/**
* Sets the new currentLocation of the record, after being written. This again should happen
* exactly-once.
*/
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
assert newLocation == null;
this.newLocation = location;
return this;
}
public Optional<HoodieRecordLocation> getNewLocation() {
return Optional.of(this.newLocation);
}
public boolean isCurrentLocationKnown() {
return this.currentLocation != null;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key) &&
Objects.equal(data, that.data) &&
Objects.equal(currentLocation, that.currentLocation) &&
Objects.equal(newLocation, that.newLocation);
}
@Override
public int hashCode() {
return Objects.hashCode(key, data, currentLocation, newLocation);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecord{");
sb.append("key=").append(key);
sb.append(", currentLocation='").append(currentLocation).append('\'');
sb.append(", newLocation='").append(newLocation).append('\'');
sb.append('}');
return sb.toString();
}
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
return commitTime + "_" + partitionId + "_" + recordIndex;
}
public String getPartitionPath() {
assert key != null;
return key.getPartitionPath();
}
public String getRecordKey() {
assert key != null;
return key.getRecordKey();
}
public String getRecordKey() {
assert key != null;
return key.getRecordKey();
}
}

View File

@@ -17,7 +17,6 @@
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
@@ -26,44 +25,46 @@ import java.io.Serializable;
*/
public class HoodieRecordLocation implements Serializable {
private final String commitTime;
private final String fileId;
private final String commitTime;
private final String fileId;
public HoodieRecordLocation(String commitTime, String fileId) {
this.commitTime = commitTime;
this.fileId = fileId;
}
public HoodieRecordLocation(String commitTime, String fileId) {
this.commitTime = commitTime;
this.fileId = fileId;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(commitTime, otherLoc.commitTime) &&
Objects.equal(fileId, otherLoc.fileId);
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(commitTime, otherLoc.commitTime) &&
Objects.equal(fileId, otherLoc.fileId);
}
@Override
public int hashCode() {
return Objects.hashCode(commitTime, fileId);
}
@Override
public int hashCode() {
return Objects.hashCode(commitTime, fileId);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
sb.append("commitTime=").append(commitTime).append(", ");
sb.append("fileId=").append(fileId);
sb.append('}');
return sb.toString();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
sb.append("commitTime=").append(commitTime).append(", ");
sb.append("fileId=").append(fileId);
sb.append('}');
return sb.toString();
}
public String getCommitTime() {
return commitTime;
}
public String getCommitTime() {
return commitTime;
}
public String getFileId() {
return fileId;
}
public String getFileId() {
return fileId;
}
}

View File

@@ -16,54 +16,55 @@
package com.uber.hoodie.common.model;
import java.io.IOException;
import java.io.Serializable;
import java.util.Map;
import java.util.Optional;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
import java.io.Serializable;
import java.util.Optional;
/**
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
* This abstracts out callbacks which depend on record specific logic
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts
* out callbacks which depend on record specific logic
*/
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them
* before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
*/
T preCombine(T another);
/**
*
* This methods lets you write custom merging/combining logic to produce new values
* as a function of current value on storage and whats contained in this object.
*
* eg:
* 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
* 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
*
* @param currentValue Current value in storage, to merge/combine this payload with
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
*/
Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them before
* attempting to insert/upsert (if combining turned on in HoodieClientConfig)
*/
T preCombine(T another);
/**
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
* storage to be combined against. (i.e insert)
* Return EMPTY to skip writing this record.
*/
Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
/**
* This methods lets you write custom merging/combining logic to produce new values as a function
* of current value on storage and whats contained in this object.
*
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back
* updated counts 2) You may be reading DB redo logs, and merge them with current image for a
* database row on storage
*
* @param currentValue Current value in storage, to merge/combine this payload with
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this
* record.
*/
Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
throws IOException;
/**
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is passed
* to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute
* some aggregate metrics using the metadata in the context of a write success or failure.
*/
default Optional<Map<String, String>> getMetadata() {
return Optional.empty();
}
/**
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
* storage to be combined against. (i.e insert) Return EMPTY to skip writing this record.
*/
Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
/**
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is
* passed to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to
* compute some aggregate metrics using the metadata in the context of a write success or
* failure.
*/
default Optional<Map<String, String>> getMetadata() {
return Optional.empty();
}
}

View File

@@ -31,5 +31,5 @@ package com.uber.hoodie.common.model;
* SIMPLE_LSM - A simple 2 level LSM tree.
*/
public enum HoodieTableType {
COPY_ON_WRITE, MERGE_ON_READ
COPY_ON_WRITE, MERGE_ON_READ
}

View File

@@ -17,8 +17,6 @@
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
/**
@@ -27,143 +25,153 @@ import java.io.Serializable;
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieWriteStat implements Serializable {
public static final String NULL_COMMIT = "null";
public static final String NULL_COMMIT = "null";
/**
* Id of the file being written
*/
private String fileId;
/**
* Id of the file being written
*/
private String fileId;
/**
* Relative path to the file from the base path
*/
private String path;
/**
* Relative path to the file from the base path
*/
private String path;
/**
* The previous version of the file. (null if this is the first version. i.e insert)
*/
private String prevCommit;
/**
* The previous version of the file. (null if this is the first version. i.e insert)
*/
private String prevCommit;
/**
* Total number of records written for this file.
* - for updates, its the entire number of records in the file
* - for inserts, its the actual number of records inserted.
*/
private long numWrites;
/**
* Total number of records written for this file. - for updates, its the entire number of records
* in the file - for inserts, its the actual number of records inserted.
*/
private long numWrites;
/**
* Total number of records deleted.
*/
private long numDeletes;
/**
* Total number of records deleted.
*/
private long numDeletes;
/**
* Total number of records actually changed. (0 for inserts)
*/
private long numUpdateWrites;
/**
* Total number of records actually changed. (0 for inserts)
*/
private long numUpdateWrites;
/**
* Total size of file written
*/
private long totalWriteBytes;
/**
* Total size of file written
*/
private long totalWriteBytes;
/**
* Total number of records, that were n't able to be written due to errors.
*/
private long totalWriteErrors;
/**
* Total number of records, that were n't able to be written due to errors.
*/
private long totalWriteErrors;
public HoodieWriteStat() {
// called by jackson json lib
public HoodieWriteStat() {
// called by jackson json lib
}
public void setFileId(String fileId) {
this.fileId = fileId;
}
public void setPath(String path) {
this.path = path;
}
public void setPrevCommit(String prevCommit) {
this.prevCommit = prevCommit;
}
public void setNumWrites(long numWrites) {
this.numWrites = numWrites;
}
public void setNumDeletes(long numDeletes) {
this.numDeletes = numDeletes;
}
public void setNumUpdateWrites(long numUpdateWrites) {
this.numUpdateWrites = numUpdateWrites;
}
public long getTotalWriteBytes() {
return totalWriteBytes;
}
public void setTotalWriteBytes(long totalWriteBytes) {
this.totalWriteBytes = totalWriteBytes;
}
public long getTotalWriteErrors() {
return totalWriteErrors;
}
public void setTotalWriteErrors(long totalWriteErrors) {
this.totalWriteErrors = totalWriteErrors;
}
public String getPrevCommit() {
return prevCommit;
}
public long getNumWrites() {
return numWrites;
}
public long getNumDeletes() {
return numDeletes;
}
public long getNumUpdateWrites() {
return numUpdateWrites;
}
public String getFileId() {
return fileId;
}
public String getPath() {
return path;
}
@Override
public String toString() {
return new StringBuilder()
.append("HoodieWriteStat {")
.append("path=" + path)
.append(", prevCommit='" + prevCommit + '\'')
.append(", numWrites=" + numWrites)
.append(", numDeletes=" + numDeletes)
.append(", numUpdateWrites=" + numUpdateWrites)
.append(", numWriteBytes=" + totalWriteBytes)
.append('}')
.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
public void setFileId(String fileId) {
this.fileId = fileId;
HoodieWriteStat that = (HoodieWriteStat) o;
if (!path.equals(that.path)) {
return false;
}
return prevCommit.equals(that.prevCommit);
public void setPath(String path) { this.path = path; }
}
public void setPrevCommit(String prevCommit) {
this.prevCommit = prevCommit;
}
public void setNumWrites(long numWrites) {
this.numWrites = numWrites;
}
public void setNumDeletes(long numDeletes) {
this.numDeletes = numDeletes;
}
public void setNumUpdateWrites(long numUpdateWrites) {
this.numUpdateWrites = numUpdateWrites;
}
public long getTotalWriteBytes() {
return totalWriteBytes;
}
public void setTotalWriteBytes(long totalWriteBytes) {
this.totalWriteBytes = totalWriteBytes;
}
public long getTotalWriteErrors() { return totalWriteErrors; }
public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
public String getPrevCommit() {
return prevCommit;
}
public long getNumWrites() {
return numWrites;
}
public long getNumDeletes() {
return numDeletes;
}
public long getNumUpdateWrites() {
return numUpdateWrites;
}
public String getFileId() {
return fileId;
}
public String getPath() { return path; }
@Override
public String toString() {
return new StringBuilder()
.append("HoodieWriteStat {")
.append("path=" + path)
.append(", prevCommit='" + prevCommit + '\'')
.append(", numWrites=" + numWrites)
.append(", numDeletes=" + numDeletes)
.append(", numUpdateWrites=" + numUpdateWrites)
.append(", numWriteBytes=" + totalWriteBytes)
.append('}')
.toString();
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieWriteStat that = (HoodieWriteStat) o;
if (!path.equals(that.path))
return false;
return prevCommit.equals(that.prevCommit);
}
@Override
public int hashCode() {
int result = path.hashCode();
result = 31 * result + prevCommit.hashCode();
return result;
}
@Override
public int hashCode() {
int result = path.hashCode();
result = 31 * result + prevCommit.hashCode();
return result;
}
}

View File

@@ -19,8 +19,11 @@ package com.uber.hoodie.common.table;
import com.uber.hoodie.common.model.HoodieAvroPayload;
import com.uber.hoodie.common.model.HoodieFileFormat;
import com.uber.hoodie.common.model.HoodieTableType;
import com.uber.hoodie.exception.HoodieException;
import com.uber.hoodie.exception.HoodieIOException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Date;
import java.util.Properties;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -28,144 +31,134 @@ import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.Serializable;
import java.util.Date;
import java.util.Properties;
/**
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc
* Configurations are loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path
* and never changes during the lifetime of a hoodie dataset.
* Configurations are loaded from hoodie.properties, these properties are usually set during
* initializing a path as hoodie base path and never changes during the lifetime of a hoodie
* dataset.
*
* @see HoodieTableMetaClient
* @since 0.3.0
*/
public class HoodieTableConfig implements Serializable {
private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
"hoodie.table.ro.file.format";
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
"hoodie.table.rt.file.format";
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
private Properties props;
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
"hoodie.table.ro.file.format";
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
"hoodie.table.rt.file.format";
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
public HoodieTableConfig(FileSystem fs, String metaPath) {
Properties props = new Properties();
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
log.info("Loading dataset properties from " + propertyPath);
try {
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
props.load(inputStream);
}
} catch (IOException e) {
throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
}
this.props = props;
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
private Properties props;
public HoodieTableConfig(FileSystem fs, String metaPath) {
Properties props = new Properties();
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
log.info("Loading dataset properties from " + propertyPath);
try {
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
props.load(inputStream);
}
} catch (IOException e) {
throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
}
this.props = props;
}
/**
* For serailizing and de-serializing
* @deprecated
*/
public HoodieTableConfig() {
/**
* For serailizing and de-serializing
*
* @deprecated
*/
public HoodieTableConfig() {
}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the
* hoodie.properties)
*/
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
Properties properties) throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);
}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
*
* @param metadataFolder
* @param properties
* @throws IOException
*/
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
Properties properties) throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataOutputStream outputStream = fs.create(propertyPath);
try {
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
throw new IllegalArgumentException(
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
}
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
}
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
properties
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
} finally {
outputStream.close();
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataOutputStream outputStream = fs.create(propertyPath);
try {
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
throw new IllegalArgumentException(
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
}
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
}
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
.name()
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
properties
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
} finally {
outputStream.close();
}
}
/**
* Read the table type from the table properties and if not found, return the default
*
* @return
*/
public HoodieTableType getTableType() {
if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
}
return DEFAULT_TABLE_TYPE;
/**
* Read the table type from the table properties and if not found, return the default
*/
public HoodieTableType getTableType() {
if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
}
return DEFAULT_TABLE_TYPE;
}
/**
* Read the payload class for HoodieRecords from the table properties
*
* @return
*/
public String getPayloadClass() {
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
/**
* Read the payload class for HoodieRecords from the table properties
*/
public String getPayloadClass() {
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
}
/**
* Read the table name
*
* @return
*/
public String getTableName() {
return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
}
/**
* Read the table name
*/
public String getTableName() {
return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
}
/**
* Get the Read Optimized Storage Format
*
* @return HoodieFileFormat for the Read Optimized Storage format
*/
public HoodieFileFormat getROFileFormat() {
if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
}
return DEFAULT_RO_FILE_FORMAT;
/**
* Get the Read Optimized Storage Format
*
* @return HoodieFileFormat for the Read Optimized Storage format
*/
public HoodieFileFormat getROFileFormat() {
if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
}
return DEFAULT_RO_FILE_FORMAT;
}
/**
* Get the Read Optimized Storage Format
*
* @return HoodieFileFormat for the Read Optimized Storage format
*/
public HoodieFileFormat getRTFileFormat() {
if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
}
return DEFAULT_RT_FILE_FORMAT;
/**
* Get the Read Optimized Storage Format
*
* @return HoodieFileFormat for the Read Optimized Storage format
*/
public HoodieFileFormat getRTFileFormat() {
if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
}
return DEFAULT_RT_FILE_FORMAT;
}
}

View File

@@ -21,6 +21,11 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.DatasetNotFoundException;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Objects;
import java.util.Properties;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -28,225 +33,205 @@ import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.Externalizable;
import java.io.File;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Objects;
import java.util.Properties;
/**
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table
* It returns meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to start getting the meta-data.
* <p>
* All the timelines are computed lazily, once computed the timeline is cached and never refreshed.
* Use the <code>HoodieTimeline.reload()</code> to refresh timelines.
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns
* meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to
* start getting the meta-data. <p> All the timelines are computed lazily, once computed the
* timeline is cached and never refreshed. Use the <code>HoodieTimeline.reload()</code> to refresh
* timelines.
*
* @see HoodieTimeline
* @since 0.3.0
*/
public class HoodieTableMetaClient implements Serializable {
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
public static String METAFOLDER_NAME = ".hoodie";
private String basePath;
private transient FileSystem fs;
private String metaPath;
private HoodieTableType tableType;
private HoodieTableConfig tableConfig;
private HoodieActiveTimeline activeTimeline;
private HoodieArchivedTimeline archivedTimeline;
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
public static String METAFOLDER_NAME = ".hoodie";
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
// Do not load any timeline by default
this(fs, basePath, false);
private String basePath;
private transient FileSystem fs;
private String metaPath;
private HoodieTableType tableType;
private HoodieTableConfig tableConfig;
private HoodieActiveTimeline activeTimeline;
private HoodieArchivedTimeline archivedTimeline;
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
// Do not load any timeline by default
this(fs, basePath, false);
}
public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
throws DatasetNotFoundException {
log.info("Loading HoodieTableMetaClient from " + basePath);
this.basePath = basePath;
this.fs = fs;
Path basePathDir = new Path(this.basePath);
this.metaPath = basePath + File.separator + METAFOLDER_NAME;
Path metaPathDir = new Path(this.metaPath);
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
this.tableConfig = new HoodieTableConfig(fs, metaPath);
this.tableType = tableConfig.getTableType();
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
if (loadActiveTimelineOnLoad) {
log.info("Loading Active commit timeline for " + basePath);
getActiveTimeline();
}
}
public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
throws DatasetNotFoundException {
log.info("Loading HoodieTableMetaClient from " + basePath);
this.basePath = basePath;
this.fs = fs;
Path basePathDir = new Path(this.basePath);
this.metaPath = basePath + File.separator + METAFOLDER_NAME;
Path metaPathDir = new Path(this.metaPath);
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
this.tableConfig = new HoodieTableConfig(fs, metaPath);
this.tableType = tableConfig.getTableType();
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
if (loadActiveTimelineOnLoad) {
log.info("Loading Active commit timeline for " + basePath);
getActiveTimeline();
}
}
/**
* For serailizing and de-serializing
*
* @deprecated
*/
public HoodieTableMetaClient() {
}
/**
* For serailizing and de-serializing
*
* @deprecated
*/
public HoodieTableMetaClient() {
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
out.defaultWriteObject();
}
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
out.defaultWriteObject();
}
/**
* @return Base path
*/
public String getBasePath() {
return basePath;
}
/**
* @return Base path
*/
public String getBasePath() {
return basePath;
}
/**
* @return Hoodie Table Type
*/
public HoodieTableType getTableType() {
return tableType;
}
/**
* @return Hoodie Table Type
*/
public HoodieTableType getTableType() {
return tableType;
}
/**
* @return Meta path
*/
public String getMetaPath() {
return metaPath;
}
/**
* @return Meta path
*/
public String getMetaPath() {
return metaPath;
}
/**
* @return Table Config
*/
public HoodieTableConfig getTableConfig() {
return tableConfig;
}
/**
* @return Table Config
*/
public HoodieTableConfig getTableConfig() {
return tableConfig;
}
/**
* Get the FS implementation for this table
*/
public FileSystem getFs() {
return fs;
}
/**
* Get the FS implementation for this table
* @return
*/
public FileSystem getFs() {
return fs;
/**
* Get the active instants as a timeline
*
* @return Active instants timeline
*/
public synchronized HoodieActiveTimeline getActiveTimeline() {
if (activeTimeline == null) {
activeTimeline = new HoodieActiveTimeline(fs, metaPath);
}
return activeTimeline;
}
/**
* Get the active instants as a timeline
*
* @return Active instants timeline
* @throws IOException
*/
public synchronized HoodieActiveTimeline getActiveTimeline() {
if (activeTimeline == null) {
activeTimeline = new HoodieActiveTimeline(fs, metaPath);
}
return activeTimeline;
/**
* Get the archived commits as a timeline. This is costly operation, as all data from the archived
* files are read. This should not be used, unless for historical debugging purposes
*
* @return Active commit timeline
*/
public synchronized HoodieArchivedTimeline getArchivedTimeline() {
if (archivedTimeline == null) {
archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
}
return archivedTimeline;
}
/**
* Get the archived commits as a timeline. This is costly operation, as all data from the
* archived files are read. This should not be used, unless for historical debugging purposes
*
* @return Active commit timeline
* @throws IOException
*/
public synchronized HoodieArchivedTimeline getArchivedTimeline() {
if (archivedTimeline == null) {
archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
}
return archivedTimeline;
/**
* Helper method to initialize a given path, as a given storage type and table name
*/
public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath,
HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
Properties properties = new Properties();
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
if (tableType == HoodieTableType.MERGE_ON_READ) {
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
}
return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
}
/**
* Helper method to initialize a given path, as a given storage type and table name
*
* @param fs
* @param basePath
* @param tableType
* @param tableName
* @return
* @throws IOException
*/
public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath, HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
Properties properties = new Properties();
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
if(tableType == HoodieTableType.MERGE_ON_READ) {
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
}
return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
/**
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as
* Properties
*
* @return Instance of HoodieTableMetaClient
*/
public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
String basePath, Properties props) throws IOException {
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
Path basePathDir = new Path(basePath);
if (!fs.exists(basePathDir)) {
fs.mkdirs(basePathDir);
}
Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
if (!fs.exists(metaPathDir)) {
fs.mkdirs(metaPathDir);
}
HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
+ " from " + basePath);
return metaClient;
}
/**
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
*
* @param fs
* @param basePath
* @param props
* @return Instance of HoodieTableMetaClient
* @throws IOException
*/
public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
String basePath, Properties props) throws IOException {
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
Path basePathDir = new Path(basePath);
if (!fs.exists(basePathDir)) {
fs.mkdirs(basePathDir);
}
Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
if (!fs.exists(metaPathDir)) {
fs.mkdirs(metaPathDir);
}
HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
+ " from " + basePath);
return metaClient;
}
// HELPER METHODS TO CREATE META FILE NAMES
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
throws IOException {
return fs.listStatus(metaPath, nameFilter);
}
// HELPER METHODS TO CREATE META FILE NAMES
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
throws IOException {
return fs.listStatus(metaPath, nameFilter);
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HoodieTableMetaClient that = (HoodieTableMetaClient) o;
return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieTableMetaClient that = (HoodieTableMetaClient) o;
return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
}
@Override
public int hashCode() {
return Objects.hash(basePath, tableType);
}
@Override
public int hashCode() {
return Objects.hash(basePath, tableType);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
sb.append("basePath='").append(basePath).append('\'');
sb.append(", metaPath='").append(metaPath).append('\'');
sb.append(", tableType=").append(tableType);
sb.append('}');
return sb.toString();
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
sb.append("basePath='").append(basePath).append('\'');
sb.append(", metaPath='").append(metaPath).append('\'');
sb.append(", tableType=").append(tableType);
sb.append('}');
return sb.toString();
}
}

View File

@@ -18,19 +18,16 @@ package com.uber.hoodie.common.table;
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
import java.util.Optional;
import java.util.function.BiPredicate;
import java.util.stream.Stream;
/**
* HoodieTimeline is a view of meta-data instants in the hoodie dataset.
* Instants are specific points in time represented as HoodieInstant.
* <p>
* Timelines are immutable once created and operations create new instance of
* timelines which filter on the instants and this can be chained.
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
* points in time represented as HoodieInstant. <p> Timelines are immutable once created and
* operations create new instance of timelines which filter on the instants and this can be
* chained.
*
* @see com.uber.hoodie.common.table.HoodieTableMetaClient
* @see HoodieDefaultTimeline
@@ -38,205 +35,195 @@ import java.util.stream.Stream;
* @since 0.3.0
*/
public interface HoodieTimeline extends Serializable {
String COMMIT_ACTION = "commit";
String DELTA_COMMIT_ACTION = "deltacommit";
String CLEAN_ACTION = "clean";
String ROLLBACK_ACTION = "rollback";
String SAVEPOINT_ACTION = "savepoint";
String COMPACTION_ACTION = "compaction";
String INFLIGHT_EXTENSION = ".inflight";
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
//this is to preserve backwards compatibility on commit in-flight filenames
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
String COMMIT_ACTION = "commit";
String DELTA_COMMIT_ACTION = "deltacommit";
String CLEAN_ACTION = "clean";
String ROLLBACK_ACTION = "rollback";
String SAVEPOINT_ACTION = "savepoint";
String COMPACTION_ACTION = "compaction";
String INFLIGHT_EXTENSION = ".inflight";
/**
* Filter this timeline to just include the in-flights
*
* @return New instance of HoodieTimeline with just in-flights
*/
HoodieTimeline filterInflights();
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
//this is to preserve backwards compatibility on commit in-flight filenames
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
/**
* Filter this timeline to just include the completed instants
*
* @return New instance of HoodieTimeline with just completed instants
*/
HoodieTimeline filterCompletedInstants();
/**
* Filter this timeline to just include the in-flights
*
* @return New instance of HoodieTimeline with just in-flights
*/
HoodieTimeline filterInflights();
/**
* Filter this timeline to just include the completed instants
*
* @return New instance of HoodieTimeline with just completed instants
*/
HoodieTimeline filterCompletedInstants();
/**
* Create a new Timeline with instants after startTs and before or on endTs
*
* @param startTs
* @param endTs
*/
HoodieTimeline findInstantsInRange(String startTs, String endTs);
/**
* Create a new Timeline with instants after startTs and before or on endTs
*/
HoodieTimeline findInstantsInRange(String startTs, String endTs);
/**
* Create a new Timeline with all the instants after startTs
*
* @param commitTime
* @param numCommits
*/
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
/**
* Create a new Timeline with all the instants after startTs
*/
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
/**
* If the timeline has any instants
*
* @return true if timeline is empty
*/
boolean empty();
/**
* If the timeline has any instants
*
* @return true if timeline is empty
*/
boolean empty();
/**
* @return total number of completed instants
*/
int countInstants();
/**
* @return total number of completed instants
*/
int countInstants();
/**
* @return first completed instant if available
*/
Optional<HoodieInstant> firstInstant();
/**
* @return first completed instant if available
*/
Optional<HoodieInstant> firstInstant();
/**
* @param n
* @return nth completed instant from the first completed instant
*/
Optional<HoodieInstant> nthInstant(int n);
/**
* @return nth completed instant from the first completed instant
*/
Optional<HoodieInstant> nthInstant(int n);
/**
* @return last completed instant if available
*/
Optional<HoodieInstant> lastInstant();
/**
* @return last completed instant if available
*/
Optional<HoodieInstant> lastInstant();
/**
* @param n
* @return nth completed instant going back from the last completed instant
*/
Optional<HoodieInstant> nthFromLastInstant(int n);
/**
* @return nth completed instant going back from the last completed instant
*/
Optional<HoodieInstant> nthFromLastInstant(int n);
/**
* @return true if the passed instant is present as a completed instant on the timeline
*/
boolean containsInstant(HoodieInstant instant);
/**
* @return true if the passed instant is present as a completed instant on the timeline
*/
boolean containsInstant(HoodieInstant instant);
/**
* @return true if the passed instant is present as a completed instant on the timeline or
* if the instant is before the first completed instant in the timeline
*/
boolean containsOrBeforeTimelineStarts(String ts);
/**
* @return true if the passed instant is present as a completed instant on the timeline or if the
* instant is before the first completed instant in the timeline
*/
boolean containsOrBeforeTimelineStarts(String ts);
/**
* @return Get the stream of completed instants
*/
Stream<HoodieInstant> getInstants();
/**
* @return Get the stream of completed instants
*/
Stream<HoodieInstant> getInstants();
/**
* @return true if the passed in instant is before the first completed instant in the timeline
*/
boolean isBeforeTimelineStarts(String ts);
/**
* @return true if the passed in instant is before the first completed instant in the timeline
*/
boolean isBeforeTimelineStarts(String ts);
/**
* Read the completed instant details
*
* @param instant
* @return
*/
Optional<byte[]> getInstantDetails(HoodieInstant instant);
/**
* Read the completed instant details
*/
Optional<byte[]> getInstantDetails(HoodieInstant instant);
/**
* Helper methods to compare instants
**/
BiPredicate<String, String> GREATER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
BiPredicate<String, String> LESSER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
/**
* Helper methods to compare instants
**/
BiPredicate<String, String> GREATER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
BiPredicate<String, String> LESSER_OR_EQUAL =
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
static boolean compareTimestamps(String commit1, String commit2,
BiPredicate<String, String> predicateToApply) {
return predicateToApply.test(commit1, commit2);
}
static boolean compareTimestamps(String commit1, String commit2,
BiPredicate<String, String> predicateToApply) {
return predicateToApply.test(commit1, commit2);
}
static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
}
static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
}
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
}
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
}
static String makeCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
}
static String makeCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
}
static String makeInflightCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
}
static String makeInflightCommitFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
}
static String makeCleanerFileName(String instant) {
return instant + HoodieTimeline.CLEAN_EXTENSION;
}
static String makeCleanerFileName(String instant) {
return instant + HoodieTimeline.CLEAN_EXTENSION;
}
static String makeInflightCleanerFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
}
static String makeInflightCleanerFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
}
static String makeRollbackFileName(String instant) {
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
}
static String makeRollbackFileName(String instant) {
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
}
static String makeInflightRollbackFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
}
static String makeInflightRollbackFileName(String instant) {
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
}
static String makeInflightSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
}
static String makeInflightSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
}
static String makeSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
}
static String makeSavePointFileName(String commitTime) {
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
}
static String makeInflightCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
}
static String makeInflightCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
}
static String makeCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
}
static String makeCompactionFileName(String commitTime) {
return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
}
static String makeInflightDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
}
static String makeInflightDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
}
static String makeDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
}
static String makeDeltaFileName(String commitTime) {
return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
}
static String getCommitFromCommitFile(String commitFileName) {
return commitFileName.split("\\.")[0];
}
static String getCommitFromCommitFile(String commitFileName) {
return commitFileName.split("\\.")[0];
}
static String makeFileNameAsComplete(String fileName) {
return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
}
static String makeFileNameAsComplete(String fileName) {
return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
}
static String makeFileNameAsInflight(String fileName) {
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
}
static String makeFileNameAsInflight(String fileName) {
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
}
}

View File

@@ -19,13 +19,7 @@ package com.uber.hoodie.common.table;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
import com.uber.hoodie.common.model.HoodieLogFile;
import org.apache.hadoop.fs.FileStatus;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.stream.Stream;
/**
@@ -35,70 +29,70 @@ import java.util.stream.Stream;
*/
public interface TableFileSystemView {
/**
* ReadOptimizedView - methods to provide a view of columnar data files only.
*/
interface ReadOptimizedView {
/**
* Stream all the latest data files in the given partition
*/
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
/**
* Stream all the latest data files, in the file system view
*/
Stream<HoodieDataFile> getLatestDataFiles();
/**
* Stream all the latest version data files in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest data files pass
*/
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
/**
* Stream all the data file versions grouped by FileId for a given partition
*/
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
}
/**
* ReadOptimizedView - methods to provide a view of columnar data files only.
*/
interface ReadOptimizedView {
/**
* RealtimeView - methods to access a combination of columnar data files + log files with real time data.
* Stream all the latest data files in the given partition
*/
interface RealtimeView {
/**
* Stream all the latest file slices in the given partition
*/
Stream<FileSlice> getLatestFileSlices(String partitionPath);
/**
* Stream all the latest file slices in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest file slices, in the given range
*/
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
/**
* Stream all the file slices for a given partition, latest or not.
*/
Stream<FileSlice> getAllFileSlices(String partitionPath);
}
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
/**
* Stream all the file groups for a given partition
*
* @param partitionPath
* @return
* Stream all the latest data files, in the file system view
*/
Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
Stream<HoodieDataFile> getLatestDataFiles();
/**
* Stream all the latest version data files in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest data files pass
*/
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
/**
* Stream all the data file versions grouped by FileId for a given partition
*/
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
}
/**
* RealtimeView - methods to access a combination of columnar data files + log files with real
* time data.
*/
interface RealtimeView {
/**
* Stream all the latest file slices in the given partition
*/
Stream<FileSlice> getLatestFileSlices(String partitionPath);
/**
* Stream all the latest file slices in the given partition with precondition that
* commitTime(file) before maxCommitTime
*/
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime);
/**
* Stream all the latest file slices, in the given range
*/
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
/**
* Stream all the file slices for a given partition, latest or not.
*/
Stream<FileSlice> getAllFileSlices(String partitionPath);
}
/**
* Stream all the file groups for a given partition
*/
Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
}

View File

@@ -16,6 +16,9 @@
package com.uber.hoodie.common.table.log;
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
import com.google.common.collect.Maps;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieLogFile;
@@ -29,14 +32,6 @@ import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
import com.uber.hoodie.common.util.ReflectionUtils;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Arrays;
@@ -48,17 +43,22 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged
* list of records which will be used as a lookup table when merging the base columnar file
* with the redo log file.
*
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of
* records which will be used as a lookup table when merging the base columnar file with the redo
* log file.
*/
public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
public class HoodieCompactedLogRecordScanner implements
Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
private final static Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
// Final list of compacted/merged records to iterate
@@ -80,10 +80,10 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
Deque<HoodieLogBlock> lastBlocks = new ArrayDeque<>();
public HoodieCompactedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
Schema readerSchema, String latestInstantTime) {
Schema readerSchema, String latestInstantTime) {
this.readerSchema = readerSchema;
this.latestInstantTime = latestInstantTime;
this.hoodieTableMetaClient = new HoodieTableMetaClient(fs, basePath);
this.hoodieTableMetaClient = new HoodieTableMetaClient(fs, basePath);
// load class from the payload fully qualified class name
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
@@ -91,18 +91,18 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
Map<String, HoodieRecord<? extends HoodieRecordPayload>> records = Maps.newHashMap();
// iterate over the paths
Iterator<String> logFilePathsItr = logFilePaths.iterator();
while(logFilePathsItr.hasNext()) {
while (logFilePathsItr.hasNext()) {
HoodieLogFile logFile = new HoodieLogFile(new Path(logFilePathsItr.next()));
log.info("Scanning log file " + logFile.getPath());
totalLogFiles.incrementAndGet();
try {
// Use the HoodieLogFormatReader to iterate through the blocks in the log file
HoodieLogFormatReader reader = new HoodieLogFormatReader(fs, logFile, readerSchema, true);
while(reader.hasNext()) {
while (reader.hasNext()) {
HoodieLogBlock r = reader.next();
String blockInstantTime = r.getLogMetadata().get(INSTANT_TIME);
if(!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
HoodieTimeline.LESSER_OR_EQUAL)) {
if (!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
HoodieTimeline.LESSER_OR_EQUAL)) {
//hit a block with instant time greater than should be processed, stop processing further
break;
}
@@ -117,7 +117,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
case DELETE_BLOCK:
log.info("Reading a delete block from file " + logFile.getPath());
String lastBlockInstantTime = lastBlocks.peek().getLogMetadata().get(INSTANT_TIME);
if(!lastBlockInstantTime.equals(blockInstantTime)) {
if (!lastBlockInstantTime.equals(blockInstantTime)) {
// Block with the keys listed as to be deleted, data and delete blocks written in different batches
// so it is safe to merge
// This is a delete block, so lets merge any records from previous data block
@@ -130,7 +130,8 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
log.info("Reading a command block from file " + logFile.getPath());
// This is a command block - take appropriate action based on the command
HoodieCommandBlock commandBlock = (HoodieCommandBlock) r;
String targetInstantForCommandBlock = r.getLogMetadata().get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
String targetInstantForCommandBlock = r.getLogMetadata()
.get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
switch (commandBlock.getType()) { // there can be different types of command blocks
case ROLLBACK_PREVIOUS_BLOCK:
// Rollback the last read log block
@@ -139,15 +140,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
// the rollback operation itself
HoodieLogBlock lastBlock = lastBlocks.peek();
if (lastBlock != null && lastBlock.getBlockType() != CORRUPT_BLOCK &&
targetInstantForCommandBlock.contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
targetInstantForCommandBlock
.contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
log.info("Rolling back the last log block read in " + logFile.getPath());
lastBlocks.pop();
} else if(lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
} else if (lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
// handle corrupt blocks separately since they may not have metadata
log.info("Rolling back the last corrupted log block read in " + logFile.getPath());
log.info(
"Rolling back the last corrupted log block read in " + logFile.getPath());
lastBlocks.pop();
}
else {
} else {
log.warn("Invalid or extra rollback command block in " + logFile.getPath());
}
break;
@@ -165,7 +167,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
throw new HoodieIOException("IOException when reading log file " + logFile);
}
// merge the last read block when all the blocks are done reading
if(!lastBlocks.isEmpty()) {
if (!lastBlocks.isEmpty()) {
log.info("Merging the final data blocks in " + logFile.getPath());
merge(records, lastBlocks);
}
@@ -175,32 +177,33 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
}
/**
* Iterate over the GenericRecord in the block, read the hoodie key and partition path
* and merge with the application specific payload if the same key was found before
* Sufficient to just merge the log records since the base data is merged on previous compaction
*
* @param dataBlock
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and merge
* with the application specific payload if the same key was found before Sufficient to just merge
* the log records since the base data is merged on previous compaction
*/
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(HoodieAvroDataBlock dataBlock) {
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps.newHashMap();
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(
HoodieAvroDataBlock dataBlock) {
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps
.newHashMap();
List<IndexedRecord> recs = dataBlock.getRecords();
totalLogRecords.addAndGet(recs.size());
recs.forEach(rec -> {
String key = ((GenericRecord) rec).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
.toString();
.toString();
String partitionPath =
((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
.toString();
((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
.toString();
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
new HoodieKey(key, partitionPath),
ReflectionUtils.loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
new HoodieKey(key, partitionPath),
ReflectionUtils
.loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
if (recordsFromLastBlock.containsKey(key)) {
// Merge and store the merged record
HoodieRecordPayload combinedValue = recordsFromLastBlock.get(key).getData()
.preCombine(hoodieRecord.getData());
.preCombine(hoodieRecord.getData());
recordsFromLastBlock
.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
combinedValue));
.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
combinedValue));
} else {
// Put the record as is
recordsFromLastBlock.put(key, hoodieRecord);
@@ -211,12 +214,9 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
/**
* Merge the last seen log blocks with the accumulated records
*
* @param records
* @param lastBlocks
*/
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
Deque<HoodieLogBlock> lastBlocks) {
Deque<HoodieLogBlock> lastBlocks) {
while (!lastBlocks.isEmpty()) {
HoodieLogBlock lastBlock = lastBlocks.pop();
switch (lastBlock.getBlockType()) {
@@ -234,19 +234,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
/**
* Merge the records read from a single data block with the accumulated records
*
* @param records
* @param recordsFromLastBlock
*/
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
recordsFromLastBlock.forEach((key, hoodieRecord) -> {
if (records.containsKey(key)) {
// Merge and store the merged record
HoodieRecordPayload combinedValue = records.get(key).getData()
.preCombine(hoodieRecord.getData());
.preCombine(hoodieRecord.getData());
records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
combinedValue));
combinedValue));
} else {
// Put the record as is
records.put(key, hoodieRecord);

View File

@@ -29,31 +29,36 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* File Format for Hoodie Log Files.
* The File Format consists of blocks each seperated with a MAGIC sync marker.
* A Block can either be a Data block, Command block or Delete Block.
* Data Block - Contains log records serialized as Avro Binary Format
* Command Block - Specific commands like RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block
* Delete Block - List of keys to delete - tombstone for keys
* File Format for Hoodie Log Files. The File Format consists of blocks each seperated with a MAGIC
* sync marker. A Block can either be a Data block, Command block or Delete Block. Data Block -
* Contains log records serialized as Avro Binary Format Command Block - Specific commands like
* RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of keys
* to delete - tombstone for keys
*/
public interface HoodieLogFormat {
/**
* Magic 4 bytes we put at the start of every block in the log file. Sync marker.
* We could make this file specific (generate a random 4 byte magic and stick it in the file header), but this I think is suffice for now - PR
* Magic 4 bytes we put at the start of every block in the log file. Sync marker. We could make
* this file specific (generate a random 4 byte magic and stick it in the file header), but this I
* think is suffice for now - PR
*/
byte [] MAGIC = new byte [] {'H', 'U', 'D', 'I'};
byte[] MAGIC = new byte[]{'H', 'U', 'D', 'I'};
/**
* Writer interface to allow appending block to this file format
*/
interface Writer extends Closeable {
/** @return the path to this {@link HoodieLogFormat} */
/**
* @return the path to this {@link HoodieLogFormat}
*/
HoodieLogFile getLogFile();
/**
* Append Block returns a new Writer if the log is rolled
*/
Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException;
long getCurrentSize() throws IOException;
}
@@ -61,7 +66,10 @@ public interface HoodieLogFormat {
* Reader interface which is an Iterator of HoodieLogBlock
*/
interface Reader extends Closeable, Iterator<HoodieLogBlock> {
/** @return the path to this {@link HoodieLogFormat} */
/**
* @return the path to this {@link HoodieLogFormat}
*/
HoodieLogFile getLogFile();
}
@@ -70,6 +78,7 @@ public interface HoodieLogFormat {
* Builder class to construct the default log format writer
*/
class WriterBuilder {
private final static Logger log = LogManager.getLogger(WriterBuilder.class);
// Default max log file size 512 MB
public static final long DEFAULT_SIZE_THRESHOLD = 512 * 1024 * 1024L;
@@ -187,7 +196,8 @@ public interface HoodieLogFormat {
return new WriterBuilder();
}
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata)
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
boolean readMetadata)
throws IOException {
return new HoodieLogFormatReader(fs, logFile, readerSchema, readMetadata);
}

View File

@@ -17,7 +17,6 @@
package com.uber.hoodie.common.table.log;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
import com.uber.hoodie.common.table.log.block.HoodieCommandBlock;
@@ -38,11 +37,12 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Scans a log file and provides block level iterator on the log file
* Loads the entire block contents in memory
* Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found)
* Scans a log file and provides block level iterator on the log file Loads the entire block
* contents in memory Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one
* is found)
*/
public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
private static final int DEFAULT_BUFFER_SIZE = 4096;
private final static Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
@@ -53,14 +53,16 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
private HoodieLogBlock nextBlock = null;
private boolean readMetadata = true;
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readMetadata) throws IOException {
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
boolean readMetadata) throws IOException {
this.inputStream = fs.open(logFile.getPath(), bufferSize);
this.logFile = logFile;
this.readerSchema = readerSchema;
this.readMetadata = readMetadata;
}
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata) throws IOException {
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
boolean readMetadata) throws IOException {
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, readMetadata);
}
@@ -83,7 +85,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
// Skip blocksize in the stream and we should either find a sync marker (start of the next block) or EOF
// If we did not find either of it, then this block is a corrupted block.
boolean isCorrupted = isBlockCorrupt(blocksize);
if(isCorrupted) {
if (isCorrupted) {
return createCorruptBlock();
}
@@ -140,7 +142,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
}
private long scanForNextAvailableBlockOffset() throws IOException {
while(true) {
while (true) {
long currentPos = inputStream.getPos();
try {
boolean isEOF = readMagic();
@@ -191,7 +193,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
@Override
public HoodieLogBlock next() {
if(nextBlock == null) {
if (nextBlock == null) {
// may be hasNext is not called
hasNext();
}

View File

@@ -16,8 +16,6 @@
package com.uber.hoodie.common.table.log;
import com.google.common.base.Preconditions;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
import com.uber.hoodie.common.table.log.HoodieLogFormat.WriterBuilder;
@@ -35,8 +33,8 @@ import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* HoodieLogFormatWriter can be used to append blocks to a log file
* Use HoodieLogFormat.WriterBuilder to construct
* HoodieLogFormatWriter can be used to append blocks to a log file Use
* HoodieLogFormat.WriterBuilder to construct
*/
public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
@@ -58,7 +56,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
* @param sizeThreshold
*/
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
Short replication, Long sizeThreshold)
Short replication, Long sizeThreshold)
throws IOException, InterruptedException {
this.fs = fs;
this.logFile = logFile;
@@ -157,8 +155,9 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
}
public long getCurrentSize() throws IOException {
if(output == null) {
throw new IllegalStateException("Cannot get current size as the underlying stream has been closed already");
if (output == null) {
throw new IllegalStateException(
"Cannot get current size as the underlying stream has been closed already");
}
return output.getPos();
}

View File

@@ -18,6 +18,14 @@ package com.uber.hoodie.common.table.log.block;
import com.uber.hoodie.common.util.HoodieAvroUtils;
import com.uber.hoodie.exception.HoodieIOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
@@ -27,30 +35,18 @@ import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* DataBlock contains a list of records serialized using Avro.
* The Datablock contains
* 1. Compressed Writer Schema length
* 2. Compressed Writer Schema content
* 3. Total number of records in the block
* 4. Size of a record
* 5. Actual avro serialized content of the record
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Compressed
* Writer Schema length 2. Compressed Writer Schema content 3. Total number of records in the block
* 4. Size of a record 5. Actual avro serialized content of the record
*/
public class HoodieAvroDataBlock extends HoodieLogBlock {
private List<IndexedRecord> records;
private Schema schema;
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema, Map<LogMetadataType, String> metadata) {
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema,
Map<LogMetadataType, String> metadata) {
super(metadata);
this.records = records;
this.schema = schema;
@@ -76,7 +72,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
DataOutputStream output = new DataOutputStream(baos);
// 1. Write out metadata
if(super.getLogMetadata() != null) {
if (super.getLogMetadata() != null) {
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
}
@@ -117,12 +113,13 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
return HoodieLogBlockType.AVRO_DATA_BLOCK;
}
public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata) throws IOException {
public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata)
throws IOException {
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
Map<LogMetadataType, String> metadata = null;
// 1. Read the metadata written out, if applicable
if(readMetadata) {
if (readMetadata) {
metadata = HoodieLogBlock.getLogMetadata(dis);
}
// 1. Read the schema written out
@@ -131,7 +128,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
dis.readFully(compressedSchema, 0, schemaLength);
Schema writerSchema = new Schema.Parser().parse(HoodieAvroUtils.decompress(compressedSchema));
if(readerSchema == null) {
if (readerSchema == null) {
readerSchema = writerSchema;
}
@@ -141,7 +138,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
List<IndexedRecord> records = new ArrayList<>(totalRecords);
// 3. Read the content
for(int i=0;i<totalRecords;i++) {
for (int i = 0; i < totalRecords; i++) {
// TODO - avoid bytes copy
int recordLength = dis.readInt();
byte[] recordData = new byte[recordLength];

View File

@@ -32,7 +32,8 @@ public class HoodieCommandBlock extends HoodieLogBlock {
public enum HoodieCommandBlockTypeEnum {ROLLBACK_PREVIOUS_BLOCK}
public HoodieCommandBlock(HoodieCommandBlockTypeEnum type, Map<LogMetadataType, String> metadata) {
public HoodieCommandBlock(HoodieCommandBlockTypeEnum type,
Map<LogMetadataType, String> metadata) {
super(metadata);
this.type = type;
}
@@ -46,7 +47,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream output = new DataOutputStream(baos);
if(super.getLogMetadata() != null) {
if (super.getLogMetadata() != null) {
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
}
output.writeInt(type.ordinal());
@@ -66,7 +67,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
Map<LogMetadataType, String> metadata = null;
if(readMetadata) {
if (readMetadata) {
metadata = HoodieLogBlock.getLogMetadata(dis);
}
int ordinal = dis.readInt();

View File

@@ -44,7 +44,7 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
public byte[] getBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream output = new DataOutputStream(baos);
if(super.getLogMetadata() != null) {
if (super.getLogMetadata() != null) {
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
}
output.write(corruptedBytes);
@@ -60,20 +60,21 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
return corruptedBytes;
}
public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata) throws IOException {
public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata)
throws IOException {
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
Map<LogMetadataType, String> metadata = null;
int bytesRemaining = blockSize;
if(readMetadata) {
if (readMetadata) {
try { //attempt to read metadata
metadata = HoodieLogBlock.getLogMetadata(dis);
bytesRemaining = blockSize - HoodieLogBlock.getLogMetadataBytes(metadata).length;
} catch(IOException e) {
} catch (IOException e) {
// unable to read metadata, possibly corrupted
metadata = null;
}
}
byte [] corruptedBytes = new byte[bytesRemaining];
byte[] corruptedBytes = new byte[bytesRemaining];
dis.readFully(corruptedBytes);
return new HoodieCorruptBlock(corruptedBytes, metadata);
}

View File

@@ -16,8 +16,6 @@
package com.uber.hoodie.common.table.log.block;
import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
@@ -25,6 +23,7 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
/**
* Delete block contains a list of keys to be deleted from scanning the blocks so far
@@ -46,10 +45,10 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
public byte[] getBytes() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream output = new DataOutputStream(baos);
if(super.getLogMetadata() != null) {
if (super.getLogMetadata() != null) {
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
}
byte [] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
byte[] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
output.writeInt(bytesToWrite.length);
output.write(bytesToWrite);
return baos.toByteArray();
@@ -67,11 +66,11 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
Map<LogMetadataType, String> metadata = null;
if(readMetadata) {
if (readMetadata) {
metadata = HoodieLogBlock.getLogMetadata(dis);
}
int dataLength = dis.readInt();
byte [] data = new byte[dataLength];
byte[] data = new byte[dataLength];
dis.readFully(data);
return new HoodieDeleteBlock(new String(data).split(","), metadata);
}

View File

@@ -18,7 +18,6 @@ package com.uber.hoodie.common.table.log.block;
import com.google.common.collect.Maps;
import com.uber.hoodie.exception.HoodieException;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -34,6 +33,7 @@ public abstract class HoodieLogBlock {
public byte[] getBytes() throws IOException {
throw new HoodieException("No implementation was provided");
}
public HoodieLogBlockType getBlockType() {
throw new HoodieException("No implementation was provided");
}
@@ -42,8 +42,8 @@ public abstract class HoodieLogBlock {
private Map<LogMetadataType, String> logMetadata;
/**
* Type of the log block
* WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at
* the end.
*/
public enum HoodieLogBlockType {
COMMAND_BLOCK,
@@ -53,8 +53,8 @@ public abstract class HoodieLogBlock {
}
/**
* Metadata abstraction for a HoodieLogBlock
* WARNING : This enum is serialized as the ordinal. Only add new enums at the end.
* Metadata abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal.
* Only add new enums at the end.
*/
public enum LogMetadataType {
INSTANT_TIME,
@@ -70,21 +70,17 @@ public abstract class HoodieLogBlock {
}
/**
* Convert log metadata to bytes
* 1. Write size of metadata
* 2. Write enum ordinal
* 3. Write actual bytes
* @param metadata
* @return
* @throws IOException
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual
* bytes
*/
public static byte [] getLogMetadataBytes(Map<LogMetadataType, String> metadata) throws IOException {
public static byte[] getLogMetadataBytes(Map<LogMetadataType, String> metadata)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream output = new DataOutputStream(baos);
output.writeInt(metadata.size());
for(Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
for (Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
output.writeInt(entry.getKey().ordinal());
byte [] bytes = entry.getValue().getBytes();
byte[] bytes = entry.getValue().getBytes();
output.writeInt(bytes.length);
output.write(bytes);
}
@@ -92,13 +88,10 @@ public abstract class HoodieLogBlock {
}
/**
* Convert bytes to LogMetadata, follow the same order as
* {@link HoodieLogBlock#getLogMetadataBytes}
* @param dis
* @return
* @throws IOException
* Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}
*/
public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis) throws IOException {
public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis)
throws IOException {
Map<LogMetadataType, String> metadata = Maps.newHashMap();
// 1. Read the metadata written out
@@ -113,7 +106,7 @@ public abstract class HoodieLogBlock {
metadataCount--;
}
return metadata;
} catch(EOFException eof) {
} catch (EOFException eof) {
throw new IOException("Could not read metadata fields ", eof);
}
}

View File

@@ -22,7 +22,18 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.HoodieIOException;
import java.io.IOException;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -31,297 +42,273 @@ import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.Serializable;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
* is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline
* that allows for creation of instants on the timeline.
* <p></p>
* The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload()
* so that they can chain multiple mutations to the timeline and then call reload() once.
* <p></p>
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours
* (configurable) is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special
* timeline that allows for creation of instants on the timeline. <p></p> The timeline is not
* automatically reloaded on any mutation operation, clients have to manually call reload() so that
* they can chain multiple mutations to the timeline and then call reload() once. <p></p> This class
* can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
*/
public class HoodieActiveTimeline extends HoodieDefaultTimeline {
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
private String metaPath;
private transient FileSystem fs;
private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
private String metaPath;
private transient FileSystem fs;
/**
* Returns next commit time in the {@link #COMMIT_FORMATTER} format.
* @return
*/
public static String createNewCommitTime() {
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
/**
* Returns next commit time in the {@link #COMMIT_FORMATTER} format.
*/
public static String createNewCommitTime() {
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
}
protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
// Filter all the filter in the metapath and include only the extensions passed and
// convert them into HoodieInstant
try {
this.instants =
Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
// Include only the meta files with extensions that needs to be included
String extension = FSUtils.getFileExtension(path.getName());
return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
})).sorted(Comparator.comparing(
// Sort the meta-data by the instant time (first part of the file name)
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
// create HoodieInstantMarkers from FileStatus, which extracts properties
.map(HoodieInstant::new).collect(Collectors.toList());
log.info("Loaded instants " + instants);
} catch (IOException e) {
throw new HoodieIOException("Failed to scan metadata", e);
}
this.fs = fs;
this.metaPath = metaPath;
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
}
protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
// Filter all the filter in the metapath and include only the extensions passed and
// convert them into HoodieInstant
try {
this.instants =
Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
// Include only the meta files with extensions that needs to be included
String extension = FSUtils.getFileExtension(path.getName());
return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
})).sorted(Comparator.comparing(
// Sort the meta-data by the instant time (first part of the file name)
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
// create HoodieInstantMarkers from FileStatus, which extracts properties
.map(HoodieInstant::new).collect(Collectors.toList());
log.info("Loaded instants " + instants);
} catch (IOException e) {
throw new HoodieIOException("Failed to scan metadata", e);
}
this.fs = fs;
this.metaPath = metaPath;
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
this(fs, metaPath,
new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
}
/**
* For serialization and de-serialization only.
*
* @deprecated
*/
public HoodieActiveTimeline() {
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
/**
* Get all instants (commits, delta commits, compactions) that produce new data, in the active
* timeline *
*/
public HoodieTimeline getCommitsAndCompactionsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result
* in actions, in the active timeline *
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
/**
* Get only pure commits (inflight and completed) in the active timeline
*/
public HoodieTimeline getCommitTimeline() {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
}
/**
* Get only the delta commits (inflight and completed) in the active timeline
*/
public HoodieTimeline getDeltaCommitTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the commits (inflight and completed) in the compaction timeline
*/
public HoodieTimeline getCompactionTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
* actions
*
* @param actions actions allowed in the timeline
*/
public HoodieTimeline getTimelineOfActions(Set<String> actions) {
return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the cleaner action (inflight and completed) in the active timeline
*/
public HoodieTimeline getCleanerTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the rollback action (inflight and completed) in the active timeline
*/
public HoodieTimeline getRollbackTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the save point action (inflight and completed) in the active timeline
*/
public HoodieTimeline getSavePointTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
protected Stream<HoodieInstant> filterInstantsByAction(String action) {
return instants.stream().filter(s -> s.getAction().equals(action));
}
public void createInflight(HoodieInstant instant) {
log.info("Creating a new in-flight instant " + instant);
// Create the in-flight file
createFileInMetaPath(instant.getFileName(), Optional.empty());
}
public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
log.info("Marking instant complete " + instant);
Preconditions.checkArgument(instant.isInflight(),
"Could not mark an already completed instant as complete again " + instant);
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
log.info("Completed " + instant);
}
public void revertToInflight(HoodieInstant instant) {
log.info("Reverting instant to inflight " + instant);
moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
log.info("Reverted " + instant + " to inflight");
}
public void deleteInflight(HoodieInstant instant) {
log.info("Deleting in-flight " + instant);
Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
try {
boolean result = fs.delete(inFlightCommitFilePath, false);
if (result) {
log.info("Removed in-flight " + instant);
} else {
throw new HoodieIOException("Could not delete in-flight instant " + instant);
}
} catch (IOException e) {
throw new HoodieIOException(
"Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
this(fs, metaPath,
new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
Path detailPath = new Path(metaPath, instant.getFileName());
return readDataFromPath(detailPath);
}
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
Optional<byte[]> data) {
Path commitFilePath = new Path(metaPath, completed.getFileName());
try {
// open a new file and write the commit metadata in
Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
createFileInMetaPath(inflight.getFileName(), data);
boolean success = fs.rename(inflightCommitFile, commitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete " + inflight, e);
}
}
/**
* For serialization and de-serialization only.
* @deprecated
*/
public HoodieActiveTimeline() {
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
/**
* Get all instants (commits, delta commits, compactions) that produce new data, in the active timeline
**
* @return
*/
public HoodieTimeline getCommitsAndCompactionsTimeline() {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
}
/**
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result in actions, in the active timeline
**
* @return
*/
public HoodieTimeline getAllCommitsTimeline() {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION));
}
/**
* Get only pure commits (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getCommitTimeline() {
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
}
/**
* Get only the delta commits (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getDeltaCommitTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the commits (inflight and completed) in the compaction timeline
*
* @return
*/
public HoodieTimeline getCompactionTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions
*
* @param actions actions allowed in the timeline
* @return
*/
public HoodieTimeline getTimelineOfActions(Set<String> actions) {
return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the cleaner action (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getCleanerTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the rollback action (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getRollbackTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
/**
* Get only the save point action (inflight and completed) in the active timeline
*
* @return
*/
public HoodieTimeline getSavePointTimeline() {
return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
}
protected Stream<HoodieInstant> filterInstantsByAction(String action) {
return instants.stream().filter(s -> s.getAction().equals(action));
}
public void createInflight(HoodieInstant instant) {
log.info("Creating a new in-flight instant " + instant);
// Create the in-flight file
createFileInMetaPath(instant.getFileName(), Optional.empty());
}
public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
log.info("Marking instant complete " + instant);
Preconditions.checkArgument(instant.isInflight(),
"Could not mark an already completed instant as complete again " + instant);
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
log.info("Completed " + instant);
}
public void revertToInflight(HoodieInstant instant) {
log.info("Reverting instant to inflight " + instant);
moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
log.info("Reverted " + instant + " to inflight");
}
public void deleteInflight(HoodieInstant instant) {
log.info("Deleting in-flight " + instant);
Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
try {
boolean result = fs.delete(inFlightCommitFilePath, false);
if (result) {
log.info("Removed in-flight " + instant);
} else {
throw new HoodieIOException("Could not delete in-flight instant " + instant);
}
} catch (IOException e) {
throw new HoodieIOException(
"Could not remove inflight commit " + inFlightCommitFilePath, e);
}
}
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
Path detailPath = new Path(metaPath, instant.getFileName());
return readDataFromPath(detailPath);
}
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
Optional<byte[]> data) {
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
try {
if (!fs.exists(inFlightCommitFilePath)) {
Path commitFilePath = new Path(metaPath, completed.getFileName());
try {
// open a new file and write the commit metadata in
Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
createFileInMetaPath(inflight.getFileName(), data);
boolean success = fs.rename(inflightCommitFile, commitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete " + inflight, e);
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
}
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete revert " + completed, e);
}
}
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
try {
if (!fs.exists(inFlightCommitFilePath)) {
Path commitFilePath = new Path(metaPath, completed.getFileName());
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
if (!success) {
throw new HoodieIOException(
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
}
}
} catch (IOException e) {
throw new HoodieIOException("Could not complete revert " + completed, e);
public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
createFileInMetaPath(instant.getFileName(), content);
}
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
Path fullPath = new Path(metaPath, filename);
try {
if (!content.isPresent()) {
if (fs.createNewFile(fullPath)) {
log.info("Created a new file in meta path: " + fullPath);
return;
}
} else {
FSDataOutputStream fsout = fs.create(fullPath, true);
fsout.write(content.get());
fsout.close();
return;
}
throw new HoodieIOException("Failed to create file " + fullPath);
} catch (IOException e) {
throw new HoodieIOException("Failed to create file " + fullPath, e);
}
}
public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
createFileInMetaPath(instant.getFileName(), content);
protected Optional<byte[]> readDataFromPath(Path detailPath) {
try (FSDataInputStream is = fs.open(detailPath)) {
return Optional.of(IOUtils.toByteArray(is));
} catch (IOException e) {
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
}
}
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
Path fullPath = new Path(metaPath, filename);
try {
if (!content.isPresent()) {
if (fs.createNewFile(fullPath)) {
log.info("Created a new file in meta path: " + fullPath);
return;
}
} else {
FSDataOutputStream fsout = fs.create(fullPath, true);
fsout.write(content.get());
fsout.close();
return;
}
throw new HoodieIOException("Failed to create file " + fullPath);
} catch (IOException e) {
throw new HoodieIOException("Failed to create file " + fullPath, e);
}
}
protected Optional<byte[]> readDataFromPath(Path detailPath) {
try (FSDataInputStream is = fs.open(detailPath)) {
return Optional.of(IOUtils.toByteArray(is));
} catch (IOException e) {
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
}
}
public HoodieActiveTimeline reload() {
return new HoodieActiveTimeline(fs, metaPath);
}
public HoodieActiveTimeline reload() {
return new HoodieActiveTimeline(fs, metaPath);
}
}

View File

@@ -19,13 +19,6 @@ package com.uber.hoodie.common.table.timeline;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
@@ -34,79 +27,85 @@ import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
* is in the ActiveTimeline and the rest are in ArchivedTimeline.
* <p></p>
* Instants are read from the archive file during initialization and never refreshed. To refresh, clients
* need to call reload()
* <p></p>
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours
* (configurable) is in the ActiveTimeline and the rest are in ArchivedTimeline. <p></p> Instants
* are read from the archive file during initialization and never refreshed. To refresh, clients
* need to call reload() <p></p> This class can be serialized and de-serialized and on
* de-serialization the FileSystem is re-initialized.
*/
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
private transient FileSystem fs;
private String metaPath;
private Map<String, byte[]> readCommits = new HashMap<>();
private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
private transient FileSystem fs;
private String metaPath;
private Map<String, byte[]> readCommits = new HashMap<>();
public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
// Read back the commits to make sure
Path archiveLogPath = getArchiveLogPath(metaPath);
try (SequenceFile.Reader reader =
new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
Text key = new Text();
Text val = new Text();
while (reader.next(key, val)) {
// TODO - limit the number of commits loaded in memory. this could get very large.
// This is okay because only tooling will load the archived commit timeline today
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
}
this.instants = readCommits.keySet().stream().map(
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException(
"Could not load archived commit timeline from path " + archiveLogPath, e);
}
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
this.fs = fs;
this.metaPath = metaPath;
private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
// Read back the commits to make sure
Path archiveLogPath = getArchiveLogPath(metaPath);
try (SequenceFile.Reader reader =
new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
Text key = new Text();
Text val = new Text();
while (reader.next(key, val)) {
// TODO - limit the number of commits loaded in memory. this could get very large.
// This is okay because only tooling will load the archived commit timeline today
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
}
this.instants = readCommits.keySet().stream().map(
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
Collectors.toList());
} catch (IOException e) {
throw new HoodieIOException(
"Could not load archived commit timeline from path " + archiveLogPath, e);
}
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
this.fs = fs;
this.metaPath = metaPath;
}
/**
* For serialization and de-serialization only.
* @deprecated
*/
public HoodieArchivedTimeline() {
}
/**
* For serialization and de-serialization only.
*
* @deprecated
*/
public HoodieArchivedTimeline() {
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
/**
* This method is only used when this object is deserialized in a spark executor.
*
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
public static Path getArchiveLogPath(String metaPath) {
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
}
public static Path getArchiveLogPath(String metaPath) {
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
}
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
}
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
}
public HoodieArchivedTimeline reload() {
return new HoodieArchivedTimeline(fs, metaPath);
}
public HoodieArchivedTimeline reload() {
return new HoodieArchivedTimeline(fs, metaPath);
}
}

View File

@@ -17,135 +17,136 @@
package com.uber.hoodie.common.table.timeline;
import com.uber.hoodie.common.table.HoodieTimeline;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline.
* It provides methods to inspect a List[HoodieInstant]. Function to get the details of the instant
* is passed in as a lamdba.
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to
* inspect a List[HoodieInstant]. Function to get the details of the instant is passed in as a
* lamdba.
*
* @see HoodieTimeline
*/
public class HoodieDefaultTimeline implements HoodieTimeline {
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
protected Function<HoodieInstant, Optional<byte[]>> details;
protected List<HoodieInstant> instants;
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
Function<HoodieInstant, Optional<byte[]>> details) {
this.instants = instants.collect(Collectors.toList());
this.details = details;
}
protected Function<HoodieInstant, Optional<byte[]>> details;
protected List<HoodieInstant> instants;
/**
* For serailizing and de-serializing
*
* @deprecated
*/
public HoodieDefaultTimeline() {
}
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
Function<HoodieInstant, Optional<byte[]>> details) {
this.instants = instants.collect(Collectors.toList());
this.details = details;
}
public HoodieTimeline filterInflights() {
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
details);
}
/**
* For serailizing and de-serializing
*
* @deprecated
*/
public HoodieDefaultTimeline() {
}
public HoodieTimeline filterCompletedInstants() {
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
}
public HoodieTimeline filterInflights() {
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
details);
}
@Override
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
return new HoodieDefaultTimeline(instants.stream().filter(
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
HoodieTimeline.compareTimestamps(
public HoodieTimeline filterCompletedInstants() {
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
}
@Override
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
return new HoodieDefaultTimeline(instants.stream().filter(
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
HoodieTimeline.compareTimestamps(
s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
}
}
@Override
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
return new HoodieDefaultTimeline(
instants.stream().filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
.limit(numCommits), details);
}
@Override
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
return new HoodieDefaultTimeline(
instants.stream()
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
.limit(numCommits), details);
}
@Override
public boolean empty() {
return !instants.stream().findFirst().isPresent();
}
@Override
public boolean empty() {
return !instants.stream().findFirst().isPresent();
}
@Override
public int countInstants() {
return new Long(instants.stream().count()).intValue();
}
@Override
public int countInstants() {
return new Long(instants.stream().count()).intValue();
}
@Override
public Optional<HoodieInstant> firstInstant() {
return instants.stream().findFirst();
}
@Override
public Optional<HoodieInstant> firstInstant() {
return instants.stream().findFirst();
}
@Override
public Optional<HoodieInstant> nthInstant(int n) {
if (empty() || n >= countInstants()) {
return Optional.empty();
}
return Optional.of(instants.get(n));
@Override
public Optional<HoodieInstant> nthInstant(int n) {
if (empty() || n >= countInstants()) {
return Optional.empty();
}
return Optional.of(instants.get(n));
}
@Override
public Optional<HoodieInstant> lastInstant() {
return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
}
@Override
public Optional<HoodieInstant> lastInstant() {
return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
}
@Override
public Optional<HoodieInstant> nthFromLastInstant(int n) {
if (countInstants() < n + 1) {
return Optional.empty();
}
return nthInstant(countInstants() - 1 - n);
@Override
public Optional<HoodieInstant> nthFromLastInstant(int n) {
if (countInstants() < n + 1) {
return Optional.empty();
}
return nthInstant(countInstants() - 1 - n);
}
@Override
public boolean containsInstant(HoodieInstant instant) {
return instants.stream().anyMatch(s -> s.equals(instant));
}
@Override
public boolean containsInstant(HoodieInstant instant) {
return instants.stream().anyMatch(s -> s.equals(instant));
}
@Override
public boolean containsOrBeforeTimelineStarts(String instant) {
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|| isBeforeTimelineStarts(instant);
}
@Override
public boolean containsOrBeforeTimelineStarts(String instant) {
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|| isBeforeTimelineStarts(instant);
}
@Override
public Stream<HoodieInstant> getInstants() {
return instants.stream();
}
@Override
public Stream<HoodieInstant> getInstants() {
return instants.stream();
}
@Override
public boolean isBeforeTimelineStarts(String instant) {
Optional<HoodieInstant> firstCommit = firstInstant();
return firstCommit.isPresent() &&
HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
}
@Override
public boolean isBeforeTimelineStarts(String instant) {
Optional<HoodieInstant> firstCommit = firstInstant();
return firstCommit.isPresent() &&
HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
}
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
return details.apply(instant);
}
@Override
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
return details.apply(instant);
}
@Override
public String toString() {
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
.collect(Collectors.joining(","));
}
@Override
public String toString() {
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
.collect(Collectors.joining(","));
}
}

View File

@@ -16,118 +16,117 @@
package com.uber.hoodie.common.table.timeline;
import com.google.common.io.Files;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.util.FSUtils;
import org.apache.hadoop.fs.FileStatus;
import java.io.Serializable;
import java.util.Objects;
import org.apache.hadoop.fs.FileStatus;
/**
* A Hoodie Instant represents a action done on a hoodie dataset.
* All actions start with a inflight instant and then create a completed instant after done.
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight
* instant and then create a completed instant after done.
*
* @see HoodieTimeline
*/
public class HoodieInstant implements Serializable {
private boolean isInflight = false;
private String action;
private String timestamp;
/**
* Load the instant from the meta FileStatus
* @param fileStatus
*/
public HoodieInstant(FileStatus fileStatus) {
// First read the instant timestamp. [==>20170101193025<==].commit
String fileName = fileStatus.getPath().getName();
String fileExtension = FSUtils.getFileExtension(fileName);
timestamp = fileName.replace(fileExtension, "");
private boolean isInflight = false;
private String action;
private String timestamp;
// Next read the action for this marker
action = fileExtension.replaceFirst(".", "");
if(action.equals("inflight")) {
// This is to support backwards compatibility on how in-flight commit files were written
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
action = "commit";
isInflight = true;
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
isInflight = true;
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
}
/**
* Load the instant from the meta FileStatus
*/
public HoodieInstant(FileStatus fileStatus) {
// First read the instant timestamp. [==>20170101193025<==].commit
String fileName = fileStatus.getPath().getName();
String fileExtension = FSUtils.getFileExtension(fileName);
timestamp = fileName.replace(fileExtension, "");
// Next read the action for this marker
action = fileExtension.replaceFirst(".", "");
if (action.equals("inflight")) {
// This is to support backwards compatibility on how in-flight commit files were written
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
action = "commit";
isInflight = true;
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
isInflight = true;
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
}
}
public HoodieInstant(boolean isInflight, String action, String timestamp) {
this.isInflight = isInflight;
this.action = action;
this.timestamp = timestamp;
}
public HoodieInstant(boolean isInflight, String action, String timestamp) {
this.isInflight = isInflight;
this.action = action;
this.timestamp = timestamp;
}
public boolean isInflight() {
return isInflight;
}
public boolean isInflight() {
return isInflight;
}
public String getAction() {
return action;
}
public String getAction() {
return action;
}
public String getTimestamp() {
return timestamp;
}
public String getTimestamp() {
return timestamp;
}
/**
* Get the filename for this instant
* @return
*/
public String getFileName() {
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCommitFileName(timestamp) :
HoodieTimeline.makeCommitFileName(timestamp);
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
HoodieTimeline.makeCleanerFileName(timestamp);
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightRollbackFileName(timestamp) :
HoodieTimeline.makeRollbackFileName(timestamp);
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
HoodieTimeline.makeSavePointFileName(timestamp);
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCompactionFileName(timestamp) :
HoodieTimeline.makeCompactionFileName(timestamp);
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightDeltaFileName(timestamp) :
HoodieTimeline.makeDeltaFileName(timestamp);
}
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
/**
* Get the filename for this instant
*/
public String getFileName() {
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCommitFileName(timestamp) :
HoodieTimeline.makeCommitFileName(timestamp);
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
HoodieTimeline.makeCleanerFileName(timestamp);
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightRollbackFileName(timestamp) :
HoodieTimeline.makeRollbackFileName(timestamp);
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
HoodieTimeline.makeSavePointFileName(timestamp);
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightCompactionFileName(timestamp) :
HoodieTimeline.makeCompactionFileName(timestamp);
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
return isInflight ?
HoodieTimeline.makeInflightDeltaFileName(timestamp) :
HoodieTimeline.makeDeltaFileName(timestamp);
}
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieInstant that = (HoodieInstant) o;
return isInflight == that.isInflight &&
Objects.equals(action, that.action) &&
Objects.equals(timestamp, that.timestamp);
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
HoodieInstant that = (HoodieInstant) o;
return isInflight == that.isInflight &&
Objects.equals(action, that.action) &&
Objects.equals(timestamp, that.timestamp);
}
@Override
public int hashCode() {
return Objects.hash(isInflight, action, timestamp);
}
@Override
public int hashCode() {
return Objects.hash(isInflight, action, timestamp);
}
@Override
public String toString() {
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
}
@Override
public String toString() {
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
}
}

View File

@@ -19,18 +19,12 @@ package com.uber.hoodie.common.table.view;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.TableFileSystemView;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.table.TableFileSystemView;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
@@ -44,6 +38,10 @@ import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Common abstract implementation for multiple TableFileSystemView Implementations. 2 possible
@@ -54,8 +52,9 @@ import java.util.stream.Stream;
* @see TableFileSystemView
* @since 0.3.0
*/
public class HoodieTableFileSystemView implements TableFileSystemView, TableFileSystemView.ReadOptimizedView,
TableFileSystemView.RealtimeView, Serializable {
public class HoodieTableFileSystemView implements TableFileSystemView,
TableFileSystemView.ReadOptimizedView,
TableFileSystemView.RealtimeView, Serializable {
protected HoodieTableMetaClient metaClient;
protected transient FileSystem fs;
@@ -69,12 +68,9 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
/**
* Create a file system view, as of the given timeline
*
* @param metaClient
* @param visibleActiveTimeline
*/
public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline) {
HoodieTimeline visibleActiveTimeline) {
this.metaClient = metaClient;
this.fs = metaClient.getFs();
this.visibleActiveTimeline = visibleActiveTimeline;
@@ -85,14 +81,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
/**
* Create a file system view, as of the given timeline, with the provided file statuses.
*
* @param metaClient
* @param visibleActiveTimeline
* @param fileStatuses
*/
public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
HoodieTimeline visibleActiveTimeline,
FileStatus[] fileStatuses) {
HoodieTimeline visibleActiveTimeline,
FileStatus[] fileStatuses) {
this(metaClient, visibleActiveTimeline);
addFilesToView(fileStatuses);
}
@@ -104,44 +96,44 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
* @deprecated
*/
private void readObject(java.io.ObjectInputStream in)
throws IOException, ClassNotFoundException {
throws IOException, ClassNotFoundException {
in.defaultReadObject();
this.fs = FSUtils.getFs();
}
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
throws IOException {
out.defaultWriteObject();
}
/**
* Adds the provided statuses into the file system view, and also caches it inside this object.
*
* @param statuses
* @return
*/
private List<HoodieFileGroup> addFilesToView(FileStatus[] statuses) {
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(statuses)
.collect(Collectors.groupingBy((dataFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(
new Path(metaClient.getBasePath()),
dataFile.getFileStatus().getPath().getParent());
return Pair.of(partitionPathStr , dataFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(statuses)
.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(
new Path(metaClient.getBasePath()),
logFile.getPath().getParent());
return Pair.of(partitionPathStr , logFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(
statuses)
.collect(Collectors.groupingBy((dataFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(
new Path(metaClient.getBasePath()),
dataFile.getFileStatus().getPath().getParent());
return Pair.of(partitionPathStr, dataFile.getFileId());
}));
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(
statuses)
.collect(Collectors.groupingBy((logFile) -> {
String partitionPathStr = FSUtils.getRelativePartitionPath(
new Path(metaClient.getBasePath()),
logFile.getPath().getParent());
return Pair.of(partitionPathStr, logFile.getFileId());
}));
Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
fileIdSet.addAll(logFiles.keySet());
List<HoodieFileGroup> fileGroups = new ArrayList<>();
fileIdSet.forEach(pair -> {
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(), visibleActiveTimeline);
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(),
visibleActiveTimeline);
if (dataFiles.containsKey(pair)) {
dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
}
@@ -165,90 +157,93 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
Predicate<FileStatus> roFilePredicate = fileStatus ->
fileStatus.getPath().getName().contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
}
private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
Predicate<FileStatus> rtFilePredicate = fileStatus ->
fileStatus.getPath().getName().contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
fileStatus.getPath().getName()
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
}
@Override
public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFiles() {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestDataFile())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
String maxCommitTime) {
String maxCommitTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getAllDataFiles())
.flatMap(dataFileList -> dataFileList);
.map(fileGroup -> fileGroup.getAllDataFiles())
.flatMap(dataFileList -> dataFileList);
}
@Override
public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSlice())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestFileSlice())
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
String maxCommitTime) {
return getAllFileGroups(partitionPath)
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
return fileGroupMap.values().stream()
.map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
.map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
.filter(dataFileOpt -> dataFileOpt.isPresent())
.map(Optional::get);
}
@Override
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
return getAllFileGroups(partitionPath)
.map(group -> group.getAllFileSlices())
.flatMap(sliceList -> sliceList);
.map(group -> group.getAllFileSlices())
.flatMap(sliceList -> sliceList);
}
/**
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level
* go through this.
* Given a partition path, obtain all filegroups within that. All methods, that work at the
* partition level go through this.
*/
@Override
public Stream<HoodieFileGroup> getAllFileGroups(String partitionPathStr) {
@@ -266,7 +261,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
return fileGroups.stream();
} catch (IOException e) {
throw new HoodieIOException(
"Failed to list data files in partition " + partitionPathStr, e);
"Failed to list data files in partition " + partitionPathStr, e);
}
}
}

View File

@@ -19,7 +19,6 @@ package com.uber.hoodie.common.util;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
@@ -32,7 +31,11 @@ import com.uber.hoodie.common.model.HoodieAvroPayload;
import com.uber.hoodie.common.model.HoodieKey;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.exception.HoodieIOException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
@@ -50,146 +53,140 @@ import org.apache.avro.specific.SpecificRecordBase;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
public class AvroUtils {
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
List<String> deltaFilePaths, Schema expectedSchema) {
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
deltaFilePaths.forEach(s -> {
List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
loadedRecords.addAll(records);
});
return loadedRecords;
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
List<String> deltaFilePaths, Schema expectedSchema) {
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
deltaFilePaths.forEach(s -> {
List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
loadedRecords.addAll(records);
});
return loadedRecords;
}
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
String deltaFilePath, Schema expectedSchema) {
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
Path path = new Path(deltaFilePath);
try {
SeekableInput input = new FsInput(path, fs.getConf());
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
// Set the expected schema to be the current schema to account for schema evolution
reader.setExpected(expectedSchema);
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
for (GenericRecord deltaRecord : fileReader) {
String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath =
deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
new HoodieAvroPayload(Optional.of(deltaRecord))));
}
fileReader.close(); // also closes underlying FsInput
} catch (IOException e) {
throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
e);
}
return loadedRecords;
}
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
String deltaFilePath, Schema expectedSchema) {
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
Path path = new Path(deltaFilePath);
try {
SeekableInput input = new FsInput(path, fs.getConf());
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
// Set the expected schema to be the current schema to account for schema evolution
reader.setExpected(expectedSchema);
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
for (GenericRecord deltaRecord : fileReader) {
String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
String partitionPath =
deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
new HoodieAvroPayload(Optional.of(deltaRecord))));
}
fileReader.close(); // also closes underlying FsInput
} catch (IOException e) {
throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
e);
}
return loadedRecords;
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
stat.getDeletePathPatterns());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
}
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
int totalDeleted = 0;
String earliestCommitToRetain = null;
for (HoodieCleanStat stat : cleanStats) {
HoodieCleanPartitionMetadata metadata =
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
stat.getDeletePathPatterns());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
if (earliestCommitToRetain == null) {
// This will be the same for all partitions
earliestCommitToRetain = stat.getEarliestCommitToRetain();
}
}
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
int totalDeleted = 0;
for (HoodieRollbackStat stat : stats) {
HoodieRollbackPartitionMetadata metadata =
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
}
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, commits, partitionMetadataBuilder.build());
}
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
int totalDeleted = 0;
for (HoodieRollbackStat stat : stats) {
HoodieRollbackPartitionMetadata metadata =
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
totalDeleted += stat.getSuccessDeleteFiles().size();
}
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
totalDeleted, commits, partitionMetadataBuilder.build());
}
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
Map<String, List<String>> latestFiles) {
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
HoodieSavepointPartitionMetadata metadata =
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
partitionMetadataBuilder.put(stat.getKey(), metadata);
}
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
partitionMetadataBuilder.build());
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
Map<String, List<String>> latestFiles) {
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
HoodieSavepointPartitionMetadata metadata =
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
partitionMetadataBuilder.put(stat.getKey(), metadata);
}
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
partitionMetadataBuilder.build());
}
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
}
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
}
public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
}
public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
throws IOException {
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
}
public static Optional<byte[]> serializeRollbackMetadata(
HoodieRollbackMetadata rollbackMetadata) throws IOException {
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
}
public static Optional<byte[]> serializeRollbackMetadata(
HoodieRollbackMetadata rollbackMetadata) throws IOException {
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
}
public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
Class<T> clazz) throws IOException {
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
fileWriter.create(metadata.getSchema(), baos);
fileWriter.append(metadata);
fileWriter.flush();
return Optional.of(baos.toByteArray());
}
public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
Class<T> clazz) throws IOException {
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
fileWriter.create(metadata.getSchema(), baos);
fileWriter.append(metadata);
fileWriter.flush();
return Optional.of(baos.toByteArray());
}
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
throws IOException {
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
}
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
throws IOException {
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
}
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
throws IOException {
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
}
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
throws IOException {
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
}
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
Class<T> clazz) throws IOException {
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
FileReader<T> fileReader =
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
Preconditions
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
return fileReader.next();
}
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
Class<T> clazz) throws IOException {
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
FileReader<T> fileReader =
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
Preconditions
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
return fileReader.next();
}
}

View File

@@ -23,16 +23,6 @@ import com.uber.hoodie.common.model.HoodiePartitionMetadata;
import com.uber.hoodie.common.table.timeline.HoodieInstant;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.InvalidHoodiePathException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@@ -43,319 +33,339 @@ import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
/**
* Utility functions related to accessing the file storage
*/
public class FSUtils {
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
private static final String LOG_FILE_PREFIX = ".";
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
private static final long MIN_ROLLBACK_TO_KEEP = 10;
private static FileSystem fs;
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
private static final String LOG_FILE_PREFIX = ".";
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
private static final long MIN_CLEAN_TO_KEEP = 10;
private static final long MIN_ROLLBACK_TO_KEEP = 10;
private static FileSystem fs;
/**
* Only to be used for testing.
*/
@VisibleForTesting
public static void setFs(FileSystem fs) {
FSUtils.fs = fs;
/**
* Only to be used for testing.
*/
@VisibleForTesting
public static void setFs(FileSystem fs) {
FSUtils.fs = fs;
}
public static FileSystem getFs() {
if (fs != null) {
return fs;
}
public static FileSystem getFs() {
if (fs != null) {
return fs;
}
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
FileSystem fs;
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
e);
}
LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
return fs;
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
FileSystem fs;
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
e);
}
LOG.info(
String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
return fs;
}
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
}
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
}
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
return String.format("%s_*_%s.parquet", fileId, commitTime);
}
public static String maskWithOnlyCommitTime(String commitTime) {
return String.format("*_*_%s.parquet", commitTime);
}
public static String getCommitFromCommitFile(String commitFileName) {
return commitFileName.split("\\.")[0];
}
public static String getCommitTime(String fullFileName) {
return fullFileName.split("_")[2].split("\\.")[0];
}
public static long getFileSize(FileSystem fs, Path path) throws IOException {
return fs.getFileStatus(path).getLen();
}
public static String getFileId(String fullFileName) {
return fullFileName.split("_")[0];
}
/**
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
*/
public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath)
throws IOException {
List<String> datePartitions = new ArrayList<>();
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
for (FileStatus status : folders) {
Path path = status.getPath();
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
path.getParent().getName(), path.getName()));
}
return datePartitions;
}
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
String partitionFullPath = partitionPath.toString();
int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
}
/**
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
* com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
*/
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
throws IOException {
List<String> partitions = new ArrayList<>();
Path basePath = new Path(basePathStr);
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
while (allFiles.hasNext()) {
Path filePath = allFiles.next().getPath();
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
}
}
return partitions;
}
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
return String.format("%s_*_%s.parquet", fileId, commitTime);
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
boolean assumeDatePartitioning)
throws IOException {
if (assumeDatePartitioning) {
return getAllFoldersThreeLevelsDown(fs, basePathStr);
} else {
return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
}
}
public static String maskWithOnlyCommitTime(String commitTime) {
return String.format("*_*_%s.parquet", commitTime);
public static String getFileExtension(String fullName) {
Preconditions.checkNotNull(fullName);
String fileName = (new File(fullName)).getName();
int dotIndex = fileName.indexOf('.');
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
}
public static String getInstantTime(String name) {
return name.replace(getFileExtension(name), "");
}
/**
* Get the file extension from the log file
*/
public static String getFileExtensionFromLog(Path logPath) {
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(logPath, "LogFile");
}
return matcher.group(3);
}
public static String getCommitFromCommitFile(String commitFileName) {
return commitFileName.split("\\.")[0];
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
*/
public static String getFileIdFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(path, "LogFile");
}
return matcher.group(1);
}
public static String getCommitTime(String fullFileName) {
return fullFileName.split("_")[2].split("\\.")[0];
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
*/
public static String getBaseCommitTimeFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(path, "LogFile");
}
return matcher.group(2);
}
public static long getFileSize(FileSystem fs, Path path) throws IOException {
return fs.getFileStatus(path).getLen();
/**
* Get the last part of the file name in the log file and convert to int.
*/
public static int getFileVersionFromLog(Path logPath) {
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(logPath, "LogFile");
}
return Integer.parseInt(matcher.group(4));
}
public static String getFileId(String fullFileName) {
return fullFileName.split("_")[0];
public static String makeLogFileName(String fileId, String logFileExtension,
String baseCommitTime, int version) {
return LOG_FILE_PREFIX + String
.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
}
public static String maskWithoutLogVersion(String commitTime, String fileId,
String logFileExtension) {
return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
}
/**
* Get the latest log file written from the list of log files passed in
*/
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
return logFiles.sorted(Comparator
.comparing(s -> s.getLogVersion(),
Comparator.reverseOrder())).findFirst();
}
/**
* Get all the log files for the passed in FileId in the partition path
*/
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime)
throws IOException {
return Arrays.stream(fs.listStatus(partitionPath,
path -> path.getName().startsWith("." + fileId) && path.getName()
.contains(logFileExtension)))
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
}
/**
* Get the latest log version for the fileId in the partition path
*/
public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime)
throws IOException {
Optional<HoodieLogFile> latestLogFile =
getLatestLogFile(
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
if (latestLogFile.isPresent()) {
return Optional.of(latestLogFile.get().getLogVersion());
}
return Optional.empty();
}
public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime)
throws IOException {
Optional<Integer> currentVersion =
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
// handle potential overflow
return (currentVersion.isPresent()) ? currentVersion.get() : 1;
}
/**
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
*/
public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException {
List<String> datePartitions = new ArrayList<>();
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
for (FileStatus status : folders) {
Path path = status.getPath();
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
path.getParent().getName(), path.getName()));
}
return datePartitions;
/**
* computes the next log version for the specified fileId in the partition path
*/
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
final String logFileExtension, final String baseCommitTime) throws IOException {
Optional<Integer> currentVersion =
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
// handle potential overflow
return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
}
public static int getDefaultBufferSize(final FileSystem fs) {
return fs.getConf().getInt("io.file.buffer.size", 4096);
}
public static Short getDefaultReplication(FileSystem fs, Path path) {
return fs.getDefaultReplication(path);
}
public static Long getDefaultBlockSize(FileSystem fs, Path path) {
return fs.getDefaultBlockSize(path);
}
/**
* When a file was opened and the task died without closing the stream, another task executor
* cannot open because the existing lease will be active. We will try to recover the lease, from
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But if
* the client dies, this should be instant.
*/
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
throws IOException, InterruptedException {
LOG.info("Recover lease on dfs file " + p);
// initiate the recovery
boolean recovered = false;
for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
recovered = dfs.recoverLease(p);
if (recovered) {
break;
}
// Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
Thread.sleep(1000);
}
return recovered;
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
String partitionFullPath = partitionPath.toString();
int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
}
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_CLEAN_TO_KEEP and delete rest
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
e);
}
});
}
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException(
"Could not delete rollback meta files " + s.getFileName(), e);
}
});
}
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
if (!fs.exists(partitionPath)) {
fs.mkdirs(partitionPath);
}
}
/**
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
* com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
*/
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
throws IOException {
List<String> partitions = new ArrayList<>();
Path basePath = new Path(basePathStr);
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
while (allFiles.hasNext()) {
Path filePath = allFiles.next().getPath();
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
}
}
return partitions;
}
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr, boolean assumeDatePartitioning)
throws IOException {
if (assumeDatePartitioning) {
return getAllFoldersThreeLevelsDown(fs, basePathStr);
} else {
return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
}
}
public static String getFileExtension(String fullName) {
Preconditions.checkNotNull(fullName);
String fileName = (new File(fullName)).getName();
int dotIndex = fileName.indexOf('.');
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
}
public static String getInstantTime(String name) {
return name.replace(getFileExtension(name), "");
}
/**
* Get the file extension from the log file
*/
public static String getFileExtensionFromLog(Path logPath) {
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(logPath, "LogFile");
}
return matcher.group(3);
}
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
*/
public static String getFileIdFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(path, "LogFile");
}
return matcher.group(1);
}
/**
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
* have commitTime in the file name.
*/
public static String getBaseCommitTimeFromLogPath(Path path) {
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(path, "LogFile");
}
return matcher.group(2);
}
/**
* Get the last part of the file name in the log file and convert to int.
*/
public static int getFileVersionFromLog(Path logPath) {
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
if (!matcher.find()) {
throw new InvalidHoodiePathException(logPath, "LogFile");
}
return Integer.parseInt(matcher.group(4));
}
public static String makeLogFileName(String fileId, String logFileExtension,
String baseCommitTime, int version) {
return LOG_FILE_PREFIX + String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
}
public static String maskWithoutLogVersion(String commitTime, String fileId, String logFileExtension) {
return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
}
/**
* Get the latest log file written from the list of log files passed in
*/
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
return logFiles.sorted(Comparator
.comparing(s -> s.getLogVersion(),
Comparator.reverseOrder())).findFirst();
}
/**
* Get all the log files for the passed in FileId in the partition path
*/
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
return Arrays.stream(fs.listStatus(partitionPath,
path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
}
/**
* Get the latest log version for the fileId in the partition path
*/
public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
Optional<HoodieLogFile> latestLogFile =
getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
if (latestLogFile.isPresent()) {
return Optional.of(latestLogFile.get().getLogVersion());
}
return Optional.empty();
}
public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
Optional<Integer> currentVersion =
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
// handle potential overflow
return (currentVersion.isPresent()) ? currentVersion.get() : 1;
}
/**
* computes the next log version for the specified fileId in the partition path
*/
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
final String logFileExtension, final String baseCommitTime) throws IOException {
Optional<Integer> currentVersion =
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
// handle potential overflow
return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
}
public static int getDefaultBufferSize(final FileSystem fs) {
return fs.getConf().getInt("io.file.buffer.size", 4096);
}
public static Short getDefaultReplication(FileSystem fs, Path path) {
return fs.getDefaultReplication(path);
}
public static Long getDefaultBlockSize(FileSystem fs, Path path) {
return fs.getDefaultBlockSize(path);
}
/**
* When a file was opened and the task died without closing the stream, another task executor
* cannot open because the existing lease will be active. We will try to recover the lease, from
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But
* if the client dies, this should be instant.
*/
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
throws IOException, InterruptedException {
LOG.info("Recover lease on dfs file " + p);
// initiate the recovery
boolean recovered = false;
for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
recovered = dfs.recoverLease(p);
if (recovered)
break;
// Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
Thread.sleep(1000);
}
return recovered;
}
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_CLEAN_TO_KEEP and delete rest
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
e);
}
});
}
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
Stream<HoodieInstant> instants) {
//TODO - this should be archived when archival is made general for all meta-data
// skip MIN_ROLLBACK_TO_KEEP and delete rest
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
try {
return fs.delete(new Path(metaPath, s.getFileName()), false);
} catch (IOException e) {
throw new HoodieIOException(
"Could not delete rollback meta files " + s.getFileName(), e);
}
});
}
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
if(!fs.exists(partitionPath)) {
fs.mkdirs(partitionPath);
}
}
public static Long getSizeInMB(long sizeInBytes) {
return sizeInBytes / (1024 * 1024);
}
public static Long getSizeInMB(long sizeInBytes) {
return sizeInBytes / (1024 * 1024);
}
}

View File

@@ -17,156 +17,167 @@
package com.uber.hoodie.common.util;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.SchemaCompatabilityException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
import org.apache.avro.Schema;
import org.apache.avro.generic.*;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Helper class to do common stuff across Avro.
*/
public class HoodieAvroUtils {
// All metadata fields are optional strings.
private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING)));
// All metadata fields are optional strings.
private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING)));
private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
/**
* Convert a given avro record to bytes
*/
public static byte[] avroToBytes(GenericRecord record) throws IOException {
GenericDatumWriter<GenericRecord> writer =
new GenericDatumWriter<>(record.getSchema());
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
/**
* Convert a given avro record to bytes
*/
public static byte[] avroToBytes(GenericRecord record) throws IOException {
GenericDatumWriter<GenericRecord> writer =
new GenericDatumWriter<>(record.getSchema());
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
/**
* Convert serialized bytes back into avro record
*/
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
return reader.read(null, decoder);
}
/**
* Adds the Hoodie metadata fields to the given schema
*/
public static Schema addMetadataFields(Schema schema) {
List<Schema.Field> parentFields = new ArrayList<>();
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
parentFields.add(commitTimeField);
parentFields.add(commitSeqnoField);
parentFields.add(recordKeyField);
parentFields.add(partitionPathField);
parentFields.add(fileNameField);
for (Schema.Field field : schema.getFields()) {
parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
}
/**
* Convert serialized bytes back into avro record
*/
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
return reader.read(null, decoder);
Schema mergedSchema = Schema
.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
mergedSchema.setFields(parentFields);
return mergedSchema;
}
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
METADATA_FIELD_SCHEMA, "", null);
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Arrays.asList(recordKeyField));
return recordKeySchema;
}
public static Schema getRecordKeySchema() {
return RECORD_KEY_SCHEMA;
}
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey,
String partitionPath, String fileName) {
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
return record;
}
/**
* Adds the Hoodie commit metadata into the provided Generic Record.
*/
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime,
String commitSeqno) {
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
return record;
}
/**
* Given a avro record with a given schema, rewrites it into the new schema
*/
public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
GenericRecord newRecord = new GenericData.Record(newSchema);
for (Schema.Field f : record.getSchema().getFields()) {
newRecord.put(f.name(), record.get(f.name()));
}
/**
* Adds the Hoodie metadata fields to the given schema
*/
public static Schema addMetadataFields(Schema schema) {
List<Schema.Field> parentFields = new ArrayList<>();
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
parentFields.add(commitTimeField);
parentFields.add(commitSeqnoField);
parentFields.add(recordKeyField);
parentFields.add(partitionPathField);
parentFields.add(fileNameField);
for (Schema.Field field : schema.getFields()) {
parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
}
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
mergedSchema.setFields(parentFields);
return mergedSchema;
if (!new GenericData().validate(newSchema, newRecord)) {
throw new SchemaCompatabilityException(
"Unable to validate the rewritten record " + record + " against schema "
+ newSchema);
}
return newRecord;
}
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Arrays.asList(recordKeyField));
return recordKeySchema;
public static byte[] compress(String text) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
OutputStream out = new DeflaterOutputStream(baos);
out.write(text.getBytes("UTF-8"));
out.close();
} catch (IOException e) {
throw new HoodieIOException("IOException while compressing text " + text, e);
}
return baos.toByteArray();
}
public static Schema getRecordKeySchema() {
return RECORD_KEY_SCHEMA;
}
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath, String fileName) {
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
return record;
}
/**
* Adds the Hoodie commit metadata into the provided Generic Record.
*/
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
return record;
}
/**
* Given a avro record with a given schema, rewrites it into the new schema
*/
public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
GenericRecord newRecord = new GenericData.Record(newSchema);
for (Schema.Field f : record.getSchema().getFields()) {
newRecord.put(f.name(), record.get(f.name()));
}
if (!new GenericData().validate(newSchema, newRecord)) {
throw new SchemaCompatabilityException(
"Unable to validate the rewritten record " + record + " against schema "
+ newSchema);
}
return newRecord;
}
public static byte[] compress(String text) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
OutputStream out = new DeflaterOutputStream(baos);
out.write(text.getBytes("UTF-8"));
out.close();
} catch (IOException e) {
throw new HoodieIOException("IOException while compressing text " + text, e);
}
return baos.toByteArray();
}
public static String decompress(byte[] bytes) {
InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] buffer = new byte[8192];
int len;
while((len = in.read(buffer))>0)
baos.write(buffer, 0, len);
return new String(baos.toByteArray(), "UTF-8");
} catch (IOException e) {
throw new HoodieIOException("IOException while decompressing text", e);
}
public static String decompress(byte[] bytes) {
InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
byte[] buffer = new byte[8192];
int len;
while ((len = in.read(buffer)) > 0) {
baos.write(buffer, 0, len);
}
return new String(baos.toByteArray(), "UTF-8");
} catch (IOException e) {
throw new HoodieIOException("IOException while decompressing text", e);
}
}
}

View File

@@ -17,10 +17,13 @@
package com.uber.hoodie.common.util;
public class NumericUtils {
public static String humanReadableByteCount(double bytes) {
if (bytes < 1024) return String.format("%.1f B", bytes);
int exp = (int) (Math.log(bytes) / Math.log(1024));
String pre = "KMGTPE".charAt(exp-1) + "";
return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
public static String humanReadableByteCount(double bytes) {
if (bytes < 1024) {
return String.format("%.1f B", bytes);
}
int exp = (int) (Math.log(bytes) / Math.log(1024));
String pre = "KMGTPE".charAt(exp - 1) + "";
return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
}
}

View File

@@ -16,14 +16,20 @@
package com.uber.hoodie.common.util;
import static com.uber.hoodie.common.util.FSUtils.getFs;
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
import com.uber.hoodie.common.BloomFilter;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.exception.HoodieException;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.MetadataNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
@@ -36,163 +42,144 @@ import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;
import java.io.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static com.uber.hoodie.common.util.FSUtils.getFs;
/**
* Utility functions involving with parquet.
*/
public class ParquetUtils {
/**
* Read the rowKey list from the given parquet file.
*
* @param filePath The parquet file path.
*/
public static Set<String> readRowKeysFromParquet(Path filePath) {
Configuration conf = new Configuration();
conf.addResource(getFs().getConf());
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
AvroReadSupport.setAvroReadSchema(conf, readSchema);
AvroReadSupport.setRequestedProjection(conf, readSchema);
ParquetReader reader = null;
Set<String> rowKeys = new HashSet<>();
/**
* Read the rowKey list from the given parquet file.
*
* @param filePath The parquet file path.
*/
public static Set<String> readRowKeysFromParquet(Path filePath) {
Configuration conf = new Configuration();
conf.addResource(getFs().getConf());
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
AvroReadSupport.setAvroReadSchema(conf, readSchema);
AvroReadSupport.setRequestedProjection(conf, readSchema);
ParquetReader reader = null;
Set<String> rowKeys = new HashSet<>();
try {
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
}
obj = reader.read();
}
} catch (IOException e) {
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
}
obj = reader.read();
}
reader.close();
} catch (IOException e) {
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// ignore
}
}
// ignore
}
return rowKeys;
}
}
return rowKeys;
}
/**
*
* Read the metadata from a parquet file
*
* @param parquetFilePath
* @return
*/
public static ParquetMetadata readMetadata(Path parquetFilePath) {
return readMetadata(new Configuration(), parquetFilePath);
/**
* Read the metadata from a parquet file
*/
public static ParquetMetadata readMetadata(Path parquetFilePath) {
return readMetadata(new Configuration(), parquetFilePath);
}
public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
ParquetMetadata footer;
try {
// TODO(vc): Should we use the parallel reading version here?
footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
} catch (IOException e) {
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
e);
}
return footer;
}
public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
ParquetMetadata footer;
/**
* Get the schema of the given parquet file.
*/
public static MessageType readSchema(Path parquetFilePath) {
return readMetadata(parquetFilePath).getFileMetaData().getSchema();
}
private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
List<String> footerVals = new ArrayList<>();
ParquetMetadata footer = readMetadata(parquetFilePath);
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
for (String footerName : footerNames) {
if (metadata.containsKey(footerName)) {
footerVals.add(metadata.get(footerName));
} else {
throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
"Looked for key " + footerName + " in " + parquetFilePath);
}
}
return footerVals;
}
public static Schema readAvroSchema(Path parquetFilePath) {
return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
}
/**
* Read out the bloom filter from the parquet file meta data.
*/
public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
String footerVal = readParquetFooter(parquetFilePath,
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
return new BloomFilter(footerVal);
}
public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
List<String> minMaxKeys = readParquetFooter(parquetFilePath,
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
if (minMaxKeys.size() != 2) {
throw new HoodieException(String.format(
"Could not read min/max record key out of footer correctly from %s. read) : %s",
parquetFilePath, minMaxKeys));
}
return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
}
/**
* NOTE: This literally reads the entire file contents, thus should be used with caution.
*/
public static List<GenericRecord> readAvroRecords(Path filePath) {
ParquetReader reader = null;
List<GenericRecord> records = new ArrayList<>();
try {
reader = AvroParquetReader.builder(filePath).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
records.add(((GenericRecord) obj));
}
obj = reader.read();
}
} catch (IOException e) {
throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
// TODO(vc): Should we use the parallel reading version here?
footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
reader.close();
} catch (IOException e) {
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
e);
// ignore
}
return footer;
}
/**
* Get the schema of the given parquet file.
*
* @param parquetFilePath
* @return
*/
public static MessageType readSchema(Path parquetFilePath) {
return readMetadata(parquetFilePath).getFileMetaData().getSchema();
}
private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
List<String> footerVals = new ArrayList<>();
ParquetMetadata footer = readMetadata(parquetFilePath);
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
for (String footerName : footerNames) {
if (metadata.containsKey(footerName)) {
footerVals.add(metadata.get(footerName));
} else {
throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
"Looked for key " + footerName + " in " + parquetFilePath);
}
}
return footerVals;
}
public static Schema readAvroSchema(Path parquetFilePath) {
return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
}
/**
* Read out the bloom filter from the parquet file meta data.
*/
public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
String footerVal = readParquetFooter(parquetFilePath,
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
return new BloomFilter(footerVal);
}
public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
List<String> minMaxKeys = readParquetFooter(parquetFilePath, HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
if (minMaxKeys.size() != 2) {
throw new HoodieException(String.format(
"Could not read min/max record key out of footer correctly from %s. read) : %s",
parquetFilePath, minMaxKeys));
}
return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
}
/**
*
* NOTE: This literally reads the entire file contents, thus should be used with caution.
*
* @param filePath
* @return
*/
public static List<GenericRecord> readAvroRecords(Path filePath) {
ParquetReader reader = null;
List<GenericRecord> records = new ArrayList<>();
try {
reader = AvroParquetReader.builder(filePath).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
records.add(((GenericRecord) obj));
}
obj = reader.read();
}
} catch (IOException e) {
throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// ignore
}
}
}
return records;
}
}
return records;
}
}

View File

@@ -17,22 +17,18 @@
package com.uber.hoodie.common.util;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import com.uber.hoodie.exception.HoodieException;
import org.apache.avro.generic.GenericRecord;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
public class ReflectionUtils {
private static Map<String, Class<?>> clazzCache = new HashMap<>();
public static <T> T loadClass(String fqcn) {
try {
if(clazzCache.get(fqcn) == null) {
if (clazzCache.get(fqcn) == null) {
Class<?> clazz = Class.<HoodieRecordPayload>forName(fqcn);
clazzCache.put(fqcn, clazz);
}
@@ -48,21 +44,17 @@ public class ReflectionUtils {
/**
* Instantiate a given class with a generic record payload
*
* @param recordPayloadClass
* @param payloadArgs
* @param <T>
* @return
*/
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass,
Object [] payloadArgs,
Class<?> ... constructorArgTypes) {
Object[] payloadArgs,
Class<?>... constructorArgTypes) {
try {
if(clazzCache.get(recordPayloadClass) == null) {
if (clazzCache.get(recordPayloadClass) == null) {
Class<?> clazz = Class.<HoodieRecordPayload>forName(recordPayloadClass);
clazzCache.put(recordPayloadClass, clazz);
}
return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes).newInstance(payloadArgs);
return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes)
.newInstance(payloadArgs);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
throw new HoodieException("Unable to instantiate payload class ", e);
} catch (ClassNotFoundException e) {

View File

@@ -16,43 +16,40 @@
package com.uber.hoodie.exception;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
/**
* <p>
* Exception thrown to indicate that a hoodie dataset was not found on the path provided
* <p>
* <p> Exception thrown to indicate that a hoodie dataset was not found on the path provided <p>
*/
public class DatasetNotFoundException extends HoodieException {
public DatasetNotFoundException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Hoodie dataset not found in path " + basePath;
}
public DatasetNotFoundException(String basePath) {
super(getErrorMessage(basePath));
}
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
throws DatasetNotFoundException {
// Check if the base path is found
try {
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
throw new DatasetNotFoundException(basePathDir.toString());
}
// Check if the meta path is found
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
throw new DatasetNotFoundException(metaPathDir.toString());
}
} catch (IllegalArgumentException e) {
// if the base path is file:///, then we have a IllegalArgumentException
throw new DatasetNotFoundException(metaPathDir.toString());
}
catch (IOException e) {
throw new HoodieIOException(
"Could not check if dataset " + basePathDir + " is valid dataset", e);
}
private static String getErrorMessage(String basePath) {
return "Hoodie dataset not found in path " + basePath;
}
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
throws DatasetNotFoundException {
// Check if the base path is found
try {
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
throw new DatasetNotFoundException(basePathDir.toString());
}
// Check if the meta path is found
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
throw new DatasetNotFoundException(metaPathDir.toString());
}
} catch (IllegalArgumentException e) {
// if the base path is file:///, then we have a IllegalArgumentException
throw new DatasetNotFoundException(metaPathDir.toString());
} catch (IOException e) {
throw new HoodieIOException(
"Could not check if dataset " + basePathDir + " is valid dataset", e);
}
}
}

View File

@@ -19,39 +19,34 @@ package com.uber.hoodie.exception;
import java.io.Serializable;
/**
* <p>
* Exception thrown for Hoodie failures. The root of
* the exception hierarchy.
* </p>
* <p>
* Hoodie Write/Read clients will throw this exception if
* any of its operations fail. This is a runtime (unchecked) exception.
* </p>
*
* <p> Exception thrown for Hoodie failures. The root of the exception hierarchy. </p> <p> Hoodie
* Write/Read clients will throw this exception if any of its operations fail. This is a runtime
* (unchecked) exception. </p>
*/
public class HoodieException extends RuntimeException implements Serializable {
public HoodieException() {
super();
}
public HoodieException(String message) {
super(message);
}
public HoodieException() {
super();
}
public HoodieException(String message, Throwable t) {
super(message, t);
}
public HoodieException(String message) {
super(message);
}
public HoodieException(Throwable t) {
super(t);
}
public HoodieException(String message, Throwable t) {
super(message, t);
}
protected static String format(String message, Object... args) {
String[] argStrings = new String[args.length];
for (int i = 0; i < args.length; i += 1) {
argStrings[i] = String.valueOf(args[i]);
}
return String.format(String.valueOf(message), (Object[]) argStrings);
public HoodieException(Throwable t) {
super(t);
}
protected static String format(String message, Object... args) {
String[] argStrings = new String[args.length];
for (int i = 0; i < args.length; i += 1) {
argStrings[i] = String.valueOf(args[i]);
}
return String.format(String.valueOf(message), (Object[]) argStrings);
}
}

View File

@@ -19,23 +19,22 @@ package com.uber.hoodie.exception;
import java.io.IOException;
/**
* <p>
* Exception thrown for dataset IO-related failures.
* </p>
* <p> Exception thrown for dataset IO-related failures. </p>
*/
public class HoodieIOException extends HoodieException {
private IOException ioException;
public HoodieIOException(String msg, IOException t) {
super(msg, t);
this.ioException = t;
}
private IOException ioException;
public HoodieIOException(String msg) {
super(msg);
}
public HoodieIOException(String msg, IOException t) {
super(msg, t);
this.ioException = t;
}
public IOException getIOException() {
return ioException;
}
public HoodieIOException(String msg) {
super(msg);
}
public IOException getIOException() {
return ioException;
}
}

View File

@@ -17,16 +17,15 @@
package com.uber.hoodie.exception;
/**
* <p>
* Exception thrown for HoodieIndex related errors.
* </p>
* <p> Exception thrown for HoodieIndex related errors. </p>
*/
public class HoodieIndexException extends HoodieException {
public HoodieIndexException(String msg) {
super(msg);
}
public HoodieIndexException(String msg, Throwable e) {
super(msg, e);
}
public HoodieIndexException(String msg) {
super(msg);
}
public HoodieIndexException(String msg, Throwable e) {
super(msg, e);
}
}

View File

@@ -17,7 +17,8 @@
package com.uber.hoodie.exception;
public class HoodieNotSupportedException extends HoodieException {
public HoodieNotSupportedException(String errorMsg) {
super(errorMsg);
}
public HoodieNotSupportedException(String errorMsg) {
super(errorMsg);
}
}

View File

@@ -19,17 +19,15 @@ package com.uber.hoodie.exception;
import com.uber.hoodie.common.model.HoodieRecord;
/**
* <p>
* Exception throws when indexing fails to locate the hoodie record.
* HoodieRecord current location and partition path does not match.
* This is an unrecoverable error
* </p>
* <p> Exception throws when indexing fails to locate the hoodie record. HoodieRecord current
* location and partition path does not match. This is an unrecoverable error </p>
*/
public class HoodieRecordMissingException extends HoodieException {
public HoodieRecordMissingException(HoodieRecord record) {
super(
"Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
+ " in current location " + record.getCurrentLocation()
+ " is not found in the partition");
}
public HoodieRecordMissingException(HoodieRecord record) {
super(
"Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
+ " in current location " + record.getCurrentLocation()
+ " is not found in the partition");
}
}

View File

@@ -17,16 +17,15 @@
package com.uber.hoodie.exception;
/**
* <p>
* Exception thrown to indicate that a hoodie dataset is invalid
* <p>
* <p> Exception thrown to indicate that a hoodie dataset is invalid <p>
*/
public class InvalidDatasetException extends HoodieException {
public InvalidDatasetException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Invalid Hoodie Dataset. " + basePath;
}
public InvalidDatasetException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Invalid Hoodie Dataset. " + basePath;
}
}

View File

@@ -19,7 +19,8 @@ package com.uber.hoodie.exception;
import org.apache.hadoop.fs.Path;
public class InvalidHoodiePathException extends HoodieException {
public InvalidHoodiePathException(Path path, String type) {
super("Invalid path " + path + " of type " + type);
}
public InvalidHoodiePathException(Path path, String type) {
super("Invalid path " + path + " of type " + type);
}
}

View File

@@ -22,11 +22,12 @@ package com.uber.hoodie.exception;
* Thrown when expected metadata is not found
*/
public class MetadataNotFoundException extends HoodieException {
public MetadataNotFoundException(String msg) {
super(msg);
}
public MetadataNotFoundException(String msg, Throwable e) {
super(msg, e);
}
public MetadataNotFoundException(String msg) {
super(msg);
}
public MetadataNotFoundException(String msg, Throwable e) {
super(msg, e);
}
}

View File

@@ -17,15 +17,16 @@
package com.uber.hoodie.exception;
public class SchemaCompatabilityException extends HoodieException {
public SchemaCompatabilityException(String message) {
super(message);
}
public SchemaCompatabilityException(String message, Throwable t) {
super(message, t);
}
public SchemaCompatabilityException(String message) {
super(message);
}
public SchemaCompatabilityException(Throwable t) {
super(t);
}
public SchemaCompatabilityException(String message, Throwable t) {
super(message, t);
}
public SchemaCompatabilityException(Throwable t) {
super(t);
}
}