Reformatting code per Google Code Style all over
This commit is contained in:
committed by
vinoth chandar
parent
5a62480a92
commit
e45679f5e2
@@ -17,60 +17,59 @@
|
||||
package com.uber.hoodie.avro;
|
||||
|
||||
import com.uber.hoodie.common.BloomFilter;
|
||||
|
||||
import java.util.HashMap;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.parquet.avro.AvroWriteSupport;
|
||||
import org.apache.parquet.hadoop.api.WriteSupport;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Wrap AvroWriterSupport for plugging in the bloom filter.
|
||||
*/
|
||||
public class HoodieAvroWriteSupport extends AvroWriteSupport {
|
||||
private BloomFilter bloomFilter;
|
||||
private String minRecordKey;
|
||||
private String maxRecordKey;
|
||||
|
||||
private BloomFilter bloomFilter;
|
||||
private String minRecordKey;
|
||||
private String maxRecordKey;
|
||||
|
||||
|
||||
public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
|
||||
"com.uber.hoodie.bloomfilter";
|
||||
public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
|
||||
public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
|
||||
public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
|
||||
"com.uber.hoodie.bloomfilter";
|
||||
public final static String HOODIE_MIN_RECORD_KEY_FOOTER = "hoodie_min_record_key";
|
||||
public final static String HOODIE_MAX_RECORD_KEY_FOOTER = "hoodie_max_record_key";
|
||||
|
||||
|
||||
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
|
||||
super(schema, avroSchema);
|
||||
this.bloomFilter = bloomFilter;
|
||||
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
|
||||
super(schema, avroSchema);
|
||||
this.bloomFilter = bloomFilter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public WriteSupport.FinalizedWriteContext finalizeWrite() {
|
||||
HashMap<String, String> extraMetaData = new HashMap<>();
|
||||
if (bloomFilter != null) {
|
||||
extraMetaData
|
||||
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
|
||||
if (minRecordKey != null && maxRecordKey != null) {
|
||||
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
|
||||
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
|
||||
}
|
||||
}
|
||||
return new WriteSupport.FinalizedWriteContext(extraMetaData);
|
||||
}
|
||||
|
||||
public void add(String recordKey) {
|
||||
this.bloomFilter.add(recordKey);
|
||||
if (minRecordKey != null) {
|
||||
minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
|
||||
} else {
|
||||
minRecordKey = recordKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public WriteSupport.FinalizedWriteContext finalizeWrite() {
|
||||
HashMap<String, String> extraMetaData = new HashMap<>();
|
||||
if (bloomFilter != null) {
|
||||
extraMetaData
|
||||
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
|
||||
if (minRecordKey != null && maxRecordKey != null) {
|
||||
extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey);
|
||||
extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey);
|
||||
}
|
||||
}
|
||||
return new WriteSupport.FinalizedWriteContext(extraMetaData);
|
||||
}
|
||||
|
||||
public void add(String recordKey) {
|
||||
this.bloomFilter.add(recordKey);
|
||||
if (minRecordKey != null) {
|
||||
minRecordKey = minRecordKey.compareTo(recordKey) <= 0 ? minRecordKey : recordKey;
|
||||
} else {
|
||||
minRecordKey = recordKey;
|
||||
}
|
||||
|
||||
if (maxRecordKey != null) {
|
||||
maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
|
||||
} else {
|
||||
maxRecordKey = recordKey;
|
||||
}
|
||||
if (maxRecordKey != null) {
|
||||
maxRecordKey = maxRecordKey.compareTo(recordKey) >= 0 ? maxRecordKey : recordKey;
|
||||
} else {
|
||||
maxRecordKey = recordKey;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,151 +17,148 @@
|
||||
package com.uber.hoodie.avro;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.GenericRecordBuilder;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
/**
|
||||
* Marjority of this is copied from
|
||||
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
|
||||
* Marjority of this is copied from https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
|
||||
* Adjusted for expected behavior of our use cases
|
||||
*/
|
||||
public class MercifulJsonConverter {
|
||||
private final ObjectMapper mapper = new ObjectMapper();
|
||||
private final Schema baseSchema;
|
||||
|
||||
public MercifulJsonConverter(Schema schema) {
|
||||
this.baseSchema = schema;
|
||||
private final ObjectMapper mapper = new ObjectMapper();
|
||||
private final Schema baseSchema;
|
||||
|
||||
public MercifulJsonConverter(Schema schema) {
|
||||
this.baseSchema = schema;
|
||||
}
|
||||
|
||||
|
||||
public GenericRecord convert(String json) throws IOException {
|
||||
try {
|
||||
return convert(mapper.readValue(json, Map.class), baseSchema);
|
||||
} catch (IOException e) {
|
||||
throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private GenericRecord convert(Map<String, Object> raw, Schema schema)
|
||||
throws IOException {
|
||||
GenericRecord result = new GenericData.Record(schema);
|
||||
for (Schema.Field f : schema.getFields()) {
|
||||
String name = f.name();
|
||||
Object rawValue = raw.get(name);
|
||||
if (rawValue != null) {
|
||||
result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public GenericRecord convert(String json) throws IOException {
|
||||
try {
|
||||
return convert(mapper.readValue(json, Map.class), baseSchema);
|
||||
} catch (IOException e) {
|
||||
throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
|
||||
private Object typeConvert(Object value, String name, Schema schema) throws IOException {
|
||||
if (isOptional(schema)) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
} else {
|
||||
schema = getNonNull(schema);
|
||||
}
|
||||
} else if (value == null) {
|
||||
// Always fail on null for non-nullable schemas
|
||||
throw new JsonConversionException(null, name, schema);
|
||||
}
|
||||
|
||||
switch (schema.getType()) {
|
||||
case BOOLEAN:
|
||||
if (value instanceof Boolean) {
|
||||
return (Boolean) value;
|
||||
}
|
||||
}
|
||||
|
||||
private GenericRecord convert(Map<String, Object> raw, Schema schema)
|
||||
throws IOException {
|
||||
GenericRecord result = new GenericData.Record(schema);
|
||||
for (Schema.Field f : schema.getFields()) {
|
||||
String name = f.name();
|
||||
Object rawValue = raw.get(name);
|
||||
if (rawValue != null) {
|
||||
result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).doubleValue();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private Object typeConvert(Object value, String name, Schema schema) throws IOException {
|
||||
if (isOptional(schema)) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
} else {
|
||||
schema = getNonNull(schema);
|
||||
}
|
||||
} else if (value == null) {
|
||||
// Always fail on null for non-nullable schemas
|
||||
throw new JsonConversionException(null, name, schema);
|
||||
break;
|
||||
case FLOAT:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).floatValue();
|
||||
}
|
||||
|
||||
switch (schema.getType()) {
|
||||
case BOOLEAN:
|
||||
if (value instanceof Boolean) {
|
||||
return (Boolean) value;
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).doubleValue();
|
||||
}
|
||||
break;
|
||||
case FLOAT:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).floatValue();
|
||||
}
|
||||
break;
|
||||
case INT:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).intValue();
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).longValue();
|
||||
}
|
||||
break;
|
||||
case STRING:
|
||||
return value.toString();
|
||||
case ENUM:
|
||||
if (schema.getEnumSymbols().contains(value.toString())) {
|
||||
return new GenericData.EnumSymbol(schema, value.toString());
|
||||
}
|
||||
throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
|
||||
schema.getFullName(), schema);
|
||||
case RECORD:
|
||||
return convert((Map<String, Object>) value, schema);
|
||||
case ARRAY:
|
||||
Schema elementSchema = schema.getElementType();
|
||||
List listRes = new ArrayList();
|
||||
for (Object v : (List) value) {
|
||||
listRes.add(typeConvert(v, name, elementSchema));
|
||||
}
|
||||
return listRes;
|
||||
case MAP:
|
||||
Schema valueSchema = schema.getValueType();
|
||||
Map<String, Object> mapRes = new HashMap<String, Object>();
|
||||
for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
|
||||
mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
|
||||
}
|
||||
return mapRes;
|
||||
default:
|
||||
throw new IllegalArgumentException(
|
||||
"JsonConverter cannot handle type: " + schema.getType());
|
||||
break;
|
||||
case INT:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).intValue();
|
||||
}
|
||||
throw new JsonConversionException(value, name, schema);
|
||||
}
|
||||
|
||||
private boolean isOptional(Schema schema) {
|
||||
return schema.getType().equals(Schema.Type.UNION) &&
|
||||
schema.getTypes().size() == 2 &&
|
||||
(schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
|
||||
schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
|
||||
}
|
||||
|
||||
private Schema getNonNull(Schema schema) {
|
||||
List<Schema> types = schema.getTypes();
|
||||
return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
|
||||
}
|
||||
|
||||
public static class JsonConversionException extends RuntimeException {
|
||||
|
||||
private Object value;
|
||||
private String fieldName;
|
||||
private Schema schema;
|
||||
|
||||
public JsonConversionException(Object value, String fieldName, Schema schema) {
|
||||
this.value = value;
|
||||
this.fieldName = fieldName;
|
||||
this.schema = schema;
|
||||
break;
|
||||
case LONG:
|
||||
if (value instanceof Number) {
|
||||
return ((Number) value).longValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Type conversion error for field %s, %s for %s",
|
||||
fieldName, value, schema);
|
||||
break;
|
||||
case STRING:
|
||||
return value.toString();
|
||||
case ENUM:
|
||||
if (schema.getEnumSymbols().contains(value.toString())) {
|
||||
return new GenericData.EnumSymbol(schema, value.toString());
|
||||
}
|
||||
throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
|
||||
schema.getFullName(), schema);
|
||||
case RECORD:
|
||||
return convert((Map<String, Object>) value, schema);
|
||||
case ARRAY:
|
||||
Schema elementSchema = schema.getElementType();
|
||||
List listRes = new ArrayList();
|
||||
for (Object v : (List) value) {
|
||||
listRes.add(typeConvert(v, name, elementSchema));
|
||||
}
|
||||
return listRes;
|
||||
case MAP:
|
||||
Schema valueSchema = schema.getValueType();
|
||||
Map<String, Object> mapRes = new HashMap<String, Object>();
|
||||
for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
|
||||
mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
|
||||
}
|
||||
return mapRes;
|
||||
default:
|
||||
throw new IllegalArgumentException(
|
||||
"JsonConverter cannot handle type: " + schema.getType());
|
||||
}
|
||||
throw new JsonConversionException(value, name, schema);
|
||||
}
|
||||
|
||||
private boolean isOptional(Schema schema) {
|
||||
return schema.getType().equals(Schema.Type.UNION) &&
|
||||
schema.getTypes().size() == 2 &&
|
||||
(schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
|
||||
schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
|
||||
}
|
||||
|
||||
private Schema getNonNull(Schema schema) {
|
||||
List<Schema> types = schema.getTypes();
|
||||
return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
|
||||
}
|
||||
|
||||
public static class JsonConversionException extends RuntimeException {
|
||||
|
||||
private Object value;
|
||||
private String fieldName;
|
||||
private Schema schema;
|
||||
|
||||
public JsonConversionException(Object value, String fieldName, Schema schema) {
|
||||
this.value = value;
|
||||
this.fieldName = fieldName;
|
||||
this.schema = schema;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Type conversion error for field %s, %s for %s",
|
||||
fieldName, value, schema);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,84 +17,86 @@
|
||||
package com.uber.hoodie.common;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieIndexException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import javax.xml.bind.DatatypeConverter;
|
||||
import org.apache.commons.io.output.ByteArrayOutputStream;
|
||||
import org.apache.hadoop.util.bloom.Key;
|
||||
import org.apache.hadoop.util.hash.Hash;
|
||||
|
||||
import javax.xml.bind.DatatypeConverter;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
/**
|
||||
* A Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}.
|
||||
*/
|
||||
public class BloomFilter {
|
||||
/**
|
||||
* Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
|
||||
*/
|
||||
public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
|
||||
|
||||
private org.apache.hadoop.util.bloom.BloomFilter filter = null;
|
||||
/**
|
||||
* Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
|
||||
*/
|
||||
public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
|
||||
|
||||
public BloomFilter(int numEntries, double errorRate) {
|
||||
this(numEntries, errorRate, Hash.MURMUR_HASH);
|
||||
private org.apache.hadoop.util.bloom.BloomFilter filter = null;
|
||||
|
||||
public BloomFilter(int numEntries, double errorRate) {
|
||||
this(numEntries, errorRate, Hash.MURMUR_HASH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Bloom filter with the given configurations.
|
||||
*/
|
||||
public BloomFilter(int numEntries, double errorRate, int hashType) {
|
||||
// Bit size
|
||||
int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
|
||||
// Number of the hash functions
|
||||
int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
|
||||
// The filter
|
||||
this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the bloom filter from serialized string.
|
||||
*/
|
||||
public BloomFilter(String filterStr) {
|
||||
this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
|
||||
byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
|
||||
try {
|
||||
this.filter.readFields(dis);
|
||||
dis.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Bloom filter with the given configurations.
|
||||
*/
|
||||
public BloomFilter(int numEntries, double errorRate, int hashType) {
|
||||
// Bit size
|
||||
int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
|
||||
// Number of the hash functions
|
||||
int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
|
||||
// The filter
|
||||
this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
|
||||
public void add(String key) {
|
||||
if (key == null) {
|
||||
throw new NullPointerException("Key cannot by null");
|
||||
}
|
||||
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the bloom filter from serialized string.
|
||||
*/
|
||||
public BloomFilter(String filterStr) {
|
||||
this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
|
||||
byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
|
||||
try {
|
||||
this.filter.readFields(dis);
|
||||
dis.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
|
||||
}
|
||||
public boolean mightContain(String key) {
|
||||
if (key == null) {
|
||||
throw new NullPointerException("Key cannot by null");
|
||||
}
|
||||
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
public void add(String key) {
|
||||
if (key == null) {
|
||||
throw new NullPointerException("Key cannot by null");
|
||||
}
|
||||
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
public boolean mightContain(String key) {
|
||||
if (key == null) {
|
||||
throw new NullPointerException("Key cannot by null");
|
||||
}
|
||||
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the bloom filter as a string.
|
||||
*/
|
||||
public String serializeToString() {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream dos = new DataOutputStream(baos);
|
||||
try {
|
||||
filter.write(dos);
|
||||
byte[] bytes = baos.toByteArray();
|
||||
dos.close();
|
||||
return DatatypeConverter.printBase64Binary(bytes);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
|
||||
}
|
||||
/**
|
||||
* Serialize the bloom filter as a string.
|
||||
*/
|
||||
public String serializeToString() {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream dos = new DataOutputStream(baos);
|
||||
try {
|
||||
filter.write(dos);
|
||||
byte[] bytes = baos.toByteArray();
|
||||
dos.close();
|
||||
return DatatypeConverter.printBase64Binary(bytes);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ package com.uber.hoodie.common;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieCleaningPolicy;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
@@ -27,100 +26,102 @@ import java.util.Optional;
|
||||
* Collects stats about a single partition clean operation
|
||||
*/
|
||||
public class HoodieCleanStat implements Serializable {
|
||||
// Policy used
|
||||
private final HoodieCleaningPolicy policy;
|
||||
// Partition path cleaned
|
||||
private final String partitionPath;
|
||||
// The patterns that were generated for the delete operation
|
||||
private final List<String> deletePathPatterns;
|
||||
private final List<String> successDeleteFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteFiles;
|
||||
// Earliest commit that was retained in this clean
|
||||
private final String earliestCommitToRetain;
|
||||
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
|
||||
List<String> deletePathPatterns, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, String earliestCommitToRetain) {
|
||||
this.policy = policy;
|
||||
this.partitionPath = partitionPath;
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
this.earliestCommitToRetain = earliestCommitToRetain;
|
||||
// Policy used
|
||||
private final HoodieCleaningPolicy policy;
|
||||
// Partition path cleaned
|
||||
private final String partitionPath;
|
||||
// The patterns that were generated for the delete operation
|
||||
private final List<String> deletePathPatterns;
|
||||
private final List<String> successDeleteFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteFiles;
|
||||
// Earliest commit that was retained in this clean
|
||||
private final String earliestCommitToRetain;
|
||||
|
||||
public HoodieCleanStat(HoodieCleaningPolicy policy, String partitionPath,
|
||||
List<String> deletePathPatterns, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, String earliestCommitToRetain) {
|
||||
this.policy = policy;
|
||||
this.partitionPath = partitionPath;
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
this.earliestCommitToRetain = earliestCommitToRetain;
|
||||
}
|
||||
|
||||
public HoodieCleaningPolicy getPolicy() {
|
||||
return policy;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
public List<String> getDeletePathPatterns() {
|
||||
return deletePathPatterns;
|
||||
}
|
||||
|
||||
public List<String> getSuccessDeleteFiles() {
|
||||
return successDeleteFiles;
|
||||
}
|
||||
|
||||
public List<String> getFailedDeleteFiles() {
|
||||
return failedDeleteFiles;
|
||||
}
|
||||
|
||||
public String getEarliestCommitToRetain() {
|
||||
return earliestCommitToRetain;
|
||||
}
|
||||
|
||||
public static HoodieCleanStat.Builder newBuilder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private HoodieCleaningPolicy policy;
|
||||
private List<String> deletePathPatterns;
|
||||
private List<String> successDeleteFiles;
|
||||
private List<String> failedDeleteFiles;
|
||||
private String partitionPath;
|
||||
private String earliestCommitToRetain;
|
||||
|
||||
public Builder withPolicy(HoodieCleaningPolicy policy) {
|
||||
this.policy = policy;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleaningPolicy getPolicy() {
|
||||
return policy;
|
||||
public Builder withDeletePathPattern(List<String> deletePathPatterns) {
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> getDeletePathPatterns() {
|
||||
return deletePathPatterns;
|
||||
public Builder withFailedDeletes(List<String> failedDeleteFiles) {
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> getSuccessDeleteFiles() {
|
||||
return successDeleteFiles;
|
||||
public Builder withPartitionPath(String partitionPath) {
|
||||
this.partitionPath = partitionPath;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> getFailedDeleteFiles() {
|
||||
return failedDeleteFiles;
|
||||
public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
|
||||
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
|
||||
earliestCommitToRetain.get().getTimestamp() :
|
||||
"-1";
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getEarliestCommitToRetain() {
|
||||
return earliestCommitToRetain;
|
||||
}
|
||||
|
||||
public static HoodieCleanStat.Builder newBuilder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private HoodieCleaningPolicy policy;
|
||||
private List<String> deletePathPatterns;
|
||||
private List<String> successDeleteFiles;
|
||||
private List<String> failedDeleteFiles;
|
||||
private String partitionPath;
|
||||
private String earliestCommitToRetain;
|
||||
|
||||
public Builder withPolicy(HoodieCleaningPolicy policy) {
|
||||
this.policy = policy;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withDeletePathPattern(List<String> deletePathPatterns) {
|
||||
this.deletePathPatterns = deletePathPatterns;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withSuccessfulDeletes(List<String> successDeleteFiles) {
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withFailedDeletes(List<String> failedDeleteFiles) {
|
||||
this.failedDeleteFiles= failedDeleteFiles;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPartitionPath(String partitionPath) {
|
||||
this.partitionPath = partitionPath;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withEarliestCommitRetained(Optional<HoodieInstant> earliestCommitToRetain) {
|
||||
this.earliestCommitToRetain = (earliestCommitToRetain.isPresent()) ?
|
||||
earliestCommitToRetain.get().getTimestamp() :
|
||||
"-1";
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieCleanStat build() {
|
||||
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
|
||||
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
|
||||
}
|
||||
public HoodieCleanStat build() {
|
||||
return new HoodieCleanStat(policy, partitionPath, deletePathPatterns,
|
||||
successDeleteFiles, failedDeleteFiles, earliestCommitToRetain);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,13 +19,6 @@ package com.uber.hoodie.common;
|
||||
import com.uber.hoodie.avro.MercifulJsonConverter;
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.codehaus.jackson.JsonNode;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
@@ -34,75 +27,85 @@ import java.util.Optional;
|
||||
import java.util.zip.Deflater;
|
||||
import java.util.zip.DeflaterOutputStream;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.codehaus.jackson.JsonNode;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
public class HoodieJsonPayload implements HoodieRecordPayload<HoodieJsonPayload> {
|
||||
private byte[] jsonDataCompressed;
|
||||
private int dataSize;
|
||||
|
||||
public HoodieJsonPayload(String json) throws IOException {
|
||||
this.jsonDataCompressed = compressData(json);
|
||||
this.dataSize = json.length();
|
||||
private byte[] jsonDataCompressed;
|
||||
private int dataSize;
|
||||
|
||||
public HoodieJsonPayload(String json) throws IOException {
|
||||
this.jsonDataCompressed = compressData(json);
|
||||
this.dataSize = json.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema)
|
||||
throws IOException {
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
|
||||
MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
|
||||
return Optional.of(jsonConverter.convert(getJsonData()));
|
||||
}
|
||||
|
||||
private String getJsonData() throws IOException {
|
||||
return unCompressData(jsonDataCompressed);
|
||||
}
|
||||
|
||||
private byte[] compressData(String jsonData) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
|
||||
DeflaterOutputStream dos =
|
||||
new DeflaterOutputStream(baos, deflater, true);
|
||||
try {
|
||||
dos.write(jsonData.getBytes());
|
||||
} finally {
|
||||
dos.flush();
|
||||
dos.close();
|
||||
// Its important to call this.
|
||||
// Deflater takes off-heap native memory and does not release until GC kicks in
|
||||
deflater.end();
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
@Override public HoodieJsonPayload preCombine(HoodieJsonPayload another) {
|
||||
return this;
|
||||
|
||||
private String unCompressData(byte[] data) throws IOException {
|
||||
InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
|
||||
try {
|
||||
StringWriter sw = new StringWriter(dataSize);
|
||||
IOUtils.copy(iis, sw);
|
||||
return sw.toString();
|
||||
} finally {
|
||||
iis.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRec, Schema schema) throws IOException {
|
||||
return getInsertValue(schema);
|
||||
private String getFieldFromJsonOrFail(String field) throws IOException {
|
||||
JsonNode node = new ObjectMapper().readTree(getJsonData());
|
||||
if (!node.has(field)) {
|
||||
throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
|
||||
}
|
||||
return node.get(field).getTextValue();
|
||||
}
|
||||
|
||||
@Override public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
|
||||
MercifulJsonConverter jsonConverter = new MercifulJsonConverter(schema);
|
||||
return Optional.of(jsonConverter.convert(getJsonData()));
|
||||
}
|
||||
public String getRowKey(String keyColumnField) throws IOException {
|
||||
return getFieldFromJsonOrFail(keyColumnField);
|
||||
}
|
||||
|
||||
private String getJsonData() throws IOException {
|
||||
return unCompressData(jsonDataCompressed);
|
||||
}
|
||||
|
||||
private byte[] compressData(String jsonData) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
Deflater deflater = new Deflater(Deflater.BEST_COMPRESSION);
|
||||
DeflaterOutputStream dos =
|
||||
new DeflaterOutputStream(baos, deflater, true);
|
||||
try {
|
||||
dos.write(jsonData.getBytes());
|
||||
} finally {
|
||||
dos.flush();
|
||||
dos.close();
|
||||
// Its important to call this.
|
||||
// Deflater takes off-heap native memory and does not release until GC kicks in
|
||||
deflater.end();
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
|
||||
private String unCompressData(byte[] data) throws IOException {
|
||||
InflaterInputStream iis = new InflaterInputStream(new ByteArrayInputStream(data));
|
||||
try {
|
||||
StringWriter sw = new StringWriter(dataSize);
|
||||
IOUtils.copy(iis, sw);
|
||||
return sw.toString();
|
||||
} finally {
|
||||
iis.close();
|
||||
}
|
||||
}
|
||||
|
||||
private String getFieldFromJsonOrFail(String field) throws IOException {
|
||||
JsonNode node = new ObjectMapper().readTree(getJsonData());
|
||||
if(!node.has(field)) {
|
||||
throw new HoodieException("Field :" + field + " not found in payload => " + node.toString());
|
||||
}
|
||||
return node.get(field).getTextValue();
|
||||
}
|
||||
|
||||
public String getRowKey(String keyColumnField) throws IOException {
|
||||
return getFieldFromJsonOrFail(keyColumnField);
|
||||
}
|
||||
|
||||
public String getPartitionPath(String partitionPathField) throws IOException {
|
||||
return getFieldFromJsonOrFail(partitionPathField);
|
||||
}
|
||||
public String getPartitionPath(String partitionPathField) throws IOException {
|
||||
return getFieldFromJsonOrFail(partitionPathField);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,81 +16,82 @@
|
||||
|
||||
package com.uber.hoodie.common;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
/**
|
||||
* Collects stats about a single partition clean operation
|
||||
*/
|
||||
public class HoodieRollbackStat implements Serializable {
|
||||
// Partition path
|
||||
private final String partitionPath;
|
||||
private final List<String> successDeleteFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteFiles;
|
||||
// Count of HoodieLogFile to commandBlocks written for a particular rollback
|
||||
private final Map<FileStatus, Long> commandBlocksCount;
|
||||
|
||||
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.partitionPath = partitionPath;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
this.commandBlocksCount = commandBlocksCount;
|
||||
// Partition path
|
||||
private final String partitionPath;
|
||||
private final List<String> successDeleteFiles;
|
||||
// Files that could not be deleted
|
||||
private final List<String> failedDeleteFiles;
|
||||
// Count of HoodieLogFile to commandBlocks written for a particular rollback
|
||||
private final Map<FileStatus, Long> commandBlocksCount;
|
||||
|
||||
public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles,
|
||||
List<String> failedDeleteFiles, Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.partitionPath = partitionPath;
|
||||
this.successDeleteFiles = successDeleteFiles;
|
||||
this.failedDeleteFiles = failedDeleteFiles;
|
||||
this.commandBlocksCount = commandBlocksCount;
|
||||
}
|
||||
|
||||
public Map<FileStatus, Long> getCommandBlocksCount() {
|
||||
return commandBlocksCount;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
public List<String> getSuccessDeleteFiles() {
|
||||
return successDeleteFiles;
|
||||
}
|
||||
|
||||
public List<String> getFailedDeleteFiles() {
|
||||
return failedDeleteFiles;
|
||||
}
|
||||
|
||||
public static HoodieRollbackStat.Builder newBuilder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private List<String> successDeleteFiles;
|
||||
private List<String> failedDeleteFiles;
|
||||
private Map<FileStatus, Long> commandBlocksCount;
|
||||
private String partitionPath;
|
||||
|
||||
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
|
||||
//noinspection Convert2MethodRef
|
||||
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
|
||||
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
|
||||
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
|
||||
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Map<FileStatus, Long> getCommandBlocksCount() {
|
||||
return commandBlocksCount;
|
||||
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.commandBlocksCount = commandBlocksCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
public Builder withPartitionPath(String partitionPath) {
|
||||
this.partitionPath = partitionPath;
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<String> getSuccessDeleteFiles() {
|
||||
return successDeleteFiles;
|
||||
}
|
||||
|
||||
public List<String> getFailedDeleteFiles() {
|
||||
return failedDeleteFiles;
|
||||
}
|
||||
|
||||
public static HoodieRollbackStat.Builder newBuilder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private List<String> successDeleteFiles;
|
||||
private List<String> failedDeleteFiles;
|
||||
private Map<FileStatus, Long> commandBlocksCount;
|
||||
private String partitionPath;
|
||||
|
||||
public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
|
||||
//noinspection Convert2MethodRef
|
||||
successDeleteFiles = deletedFiles.entrySet().stream().filter(s -> s.getValue())
|
||||
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
|
||||
failedDeleteFiles = deletedFiles.entrySet().stream().filter(s -> !s.getValue())
|
||||
.map(s -> s.getKey().getPath().toString()).collect(Collectors.toList());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlocksCount) {
|
||||
this.commandBlocksCount = commandBlocksCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPartitionPath(String partitionPath) {
|
||||
this.partitionPath = partitionPath;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieRollbackStat build() {
|
||||
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
|
||||
}
|
||||
public HoodieRollbackStat build() {
|
||||
return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles,
|
||||
commandBlocksCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,5 +17,5 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
public enum ActionType {
|
||||
commit, savepoint, compaction, clean, rollback;
|
||||
commit, savepoint, compaction, clean, rollback;
|
||||
}
|
||||
|
||||
@@ -17,13 +17,7 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class CompactionWriteStat implements Serializable {
|
||||
@@ -34,7 +28,8 @@ public class CompactionWriteStat implements Serializable {
|
||||
private long totalLogFiles;
|
||||
private long totalRecordsToBeUpdate;
|
||||
|
||||
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles, long totalLogRecords,
|
||||
public CompactionWriteStat(HoodieWriteStat writeStat, String partitionPath, long totalLogFiles,
|
||||
long totalLogRecords,
|
||||
long totalRecordsToUpdate) {
|
||||
this.writeStat = writeStat;
|
||||
this.partitionPath = partitionPath;
|
||||
@@ -58,6 +53,7 @@ public class CompactionWriteStat implements Serializable {
|
||||
public long getTotalRecordsToBeUpdate() {
|
||||
return totalRecordsToBeUpdate;
|
||||
}
|
||||
|
||||
public HoodieWriteStat getHoodieWriteStat() {
|
||||
return writeStat;
|
||||
}
|
||||
|
||||
@@ -19,79 +19,75 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Within a file group, a slice is a combination of data file written at a commit time
|
||||
* and list of log files, containing changes to the data file from that commit time
|
||||
* Within a file group, a slice is a combination of data file written at a commit time and list of
|
||||
* log files, containing changes to the data file from that commit time
|
||||
*/
|
||||
public class FileSlice implements Serializable {
|
||||
|
||||
/**
|
||||
* id of the slice
|
||||
*/
|
||||
private String fileId;
|
||||
/**
|
||||
* id of the slice
|
||||
*/
|
||||
private String fileId;
|
||||
|
||||
/**
|
||||
* Point in the timeline, at which the slice was created
|
||||
*/
|
||||
private String baseCommitTime;
|
||||
/**
|
||||
* Point in the timeline, at which the slice was created
|
||||
*/
|
||||
private String baseCommitTime;
|
||||
|
||||
/**
|
||||
* data file, with the compacted data, for this slice
|
||||
*
|
||||
*/
|
||||
private HoodieDataFile dataFile;
|
||||
/**
|
||||
* data file, with the compacted data, for this slice
|
||||
*/
|
||||
private HoodieDataFile dataFile;
|
||||
|
||||
/**
|
||||
* List of appendable log files with real time data
|
||||
* - Sorted with greater log version first
|
||||
* - Always empty for copy_on_write storage.
|
||||
*/
|
||||
private final TreeSet<HoodieLogFile> logFiles;
|
||||
/**
|
||||
* List of appendable log files with real time data - Sorted with greater log version first -
|
||||
* Always empty for copy_on_write storage.
|
||||
*/
|
||||
private final TreeSet<HoodieLogFile> logFiles;
|
||||
|
||||
public FileSlice(String baseCommitTime, String fileId) {
|
||||
this.fileId = fileId;
|
||||
this.baseCommitTime = baseCommitTime;
|
||||
this.dataFile = null;
|
||||
this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
|
||||
}
|
||||
public FileSlice(String baseCommitTime, String fileId) {
|
||||
this.fileId = fileId;
|
||||
this.baseCommitTime = baseCommitTime;
|
||||
this.dataFile = null;
|
||||
this.logFiles = new TreeSet<>(HoodieLogFile.getLogVersionComparator());
|
||||
}
|
||||
|
||||
public void setDataFile(HoodieDataFile dataFile) {
|
||||
this.dataFile = dataFile;
|
||||
}
|
||||
public void setDataFile(HoodieDataFile dataFile) {
|
||||
this.dataFile = dataFile;
|
||||
}
|
||||
|
||||
public void addLogFile(HoodieLogFile logFile) {
|
||||
this.logFiles.add(logFile);
|
||||
}
|
||||
public void addLogFile(HoodieLogFile logFile) {
|
||||
this.logFiles.add(logFile);
|
||||
}
|
||||
|
||||
public Stream<HoodieLogFile> getLogFiles() {
|
||||
return logFiles.stream();
|
||||
}
|
||||
public Stream<HoodieLogFile> getLogFiles() {
|
||||
return logFiles.stream();
|
||||
}
|
||||
|
||||
public String getBaseCommitTime() {
|
||||
return baseCommitTime;
|
||||
}
|
||||
public String getBaseCommitTime() {
|
||||
return baseCommitTime;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public Optional<HoodieDataFile> getDataFile() {
|
||||
return Optional.ofNullable(dataFile);
|
||||
}
|
||||
public Optional<HoodieDataFile> getDataFile() {
|
||||
return Optional.ofNullable(dataFile);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("FileSlice {");
|
||||
sb.append("baseCommitTime=").append(baseCommitTime);
|
||||
sb.append(", dataFile='").append(dataFile).append('\'');
|
||||
sb.append(", logFiles='").append(logFiles).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("FileSlice {");
|
||||
sb.append("baseCommitTime=").append(baseCommitTime);
|
||||
sb.append(", dataFile='").append(dataFile).append('\'');
|
||||
sb.append(", logFiles='").append(logFiles).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,18 +23,18 @@ import org.apache.hadoop.fs.Path;
|
||||
|
||||
public class HoodieArchivedLogFile extends HoodieLogFile {
|
||||
|
||||
public static final String ARCHIVE_EXTENSION = ".archive";
|
||||
public static final String ARCHIVE_EXTENSION = ".archive";
|
||||
|
||||
public HoodieArchivedLogFile(FileStatus fileStatus) {
|
||||
super(fileStatus);
|
||||
}
|
||||
public HoodieArchivedLogFile(FileStatus fileStatus) {
|
||||
super(fileStatus);
|
||||
}
|
||||
|
||||
public HoodieArchivedLogFile(Path logPath) {
|
||||
super(logPath);
|
||||
}
|
||||
public HoodieArchivedLogFile(Path logPath) {
|
||||
super(logPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieArchivedLogFile {" + super.getPath() + '}';
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieArchivedLogFile {" + super.getPath() + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,40 +17,37 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This is a payload to wrap a existing Hoodie Avro Record.
|
||||
* Useful to create a HoodieRecord over existing GenericRecords in a hoodie datasets (useful in compactions)
|
||||
*
|
||||
* This is a payload to wrap a existing Hoodie Avro Record. Useful to create a HoodieRecord over
|
||||
* existing GenericRecords in a hoodie datasets (useful in compactions)
|
||||
*/
|
||||
public class HoodieAvroPayload implements HoodieRecordPayload<HoodieAvroPayload> {
|
||||
private final Optional<GenericRecord> record;
|
||||
|
||||
public HoodieAvroPayload(Optional<GenericRecord> record) {
|
||||
this.record = record;
|
||||
}
|
||||
private final Optional<GenericRecord> record;
|
||||
|
||||
@Override
|
||||
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
|
||||
return this;
|
||||
}
|
||||
public HoodieAvroPayload(Optional<GenericRecord> record) {
|
||||
this.record = record;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException {
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
@Override
|
||||
public HoodieAvroPayload preCombine(HoodieAvroPayload another) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
|
||||
return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
|
||||
}
|
||||
@Override
|
||||
public Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException {
|
||||
return getInsertValue(schema);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException {
|
||||
return record.map(r -> HoodieAvroUtils.rewriteRecord(r, schema));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,6 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
public enum HoodieCleaningPolicy {
|
||||
KEEP_LATEST_FILE_VERSIONS,
|
||||
KEEP_LATEST_COMMITS
|
||||
KEEP_LATEST_FILE_VERSIONS,
|
||||
KEEP_LATEST_COMMITS
|
||||
}
|
||||
|
||||
@@ -17,8 +17,13 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
@@ -27,196 +32,195 @@ import org.codehaus.jackson.annotate.JsonMethod;
|
||||
import org.codehaus.jackson.map.DeserializationConfig.Feature;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* All the metadata that gets stored along with a commit.
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieCommitMetadata implements Serializable {
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
|
||||
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
|
||||
private Map<String, String> extraMetadataMap;
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
|
||||
protected Map<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
|
||||
public HoodieCommitMetadata() {
|
||||
extraMetadataMap = new HashMap<>();
|
||||
partitionToWriteStats = new HashMap<>();
|
||||
private Map<String, String> extraMetadataMap;
|
||||
|
||||
public HoodieCommitMetadata() {
|
||||
extraMetadataMap = new HashMap<>();
|
||||
partitionToWriteStats = new HashMap<>();
|
||||
}
|
||||
|
||||
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
|
||||
if (!partitionToWriteStats.containsKey(partitionPath)) {
|
||||
partitionToWriteStats.put(partitionPath, new ArrayList<>());
|
||||
}
|
||||
partitionToWriteStats.get(partitionPath).add(stat);
|
||||
}
|
||||
|
||||
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
|
||||
if (!partitionToWriteStats.containsKey(partitionPath)) {
|
||||
partitionToWriteStats.put(partitionPath, new ArrayList<>());
|
||||
public void addMetadata(String metaKey, String value) {
|
||||
extraMetadataMap.put(metaKey, value);
|
||||
}
|
||||
|
||||
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
|
||||
return partitionToWriteStats.get(partitionPath);
|
||||
}
|
||||
|
||||
public Map<String, String> getExtraMetadata() {
|
||||
return extraMetadataMap;
|
||||
}
|
||||
|
||||
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
|
||||
return partitionToWriteStats;
|
||||
}
|
||||
|
||||
public String getMetadata(String metaKey) {
|
||||
return extraMetadataMap.get(metaKey);
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndRelativePaths() {
|
||||
HashMap<String, String> filePaths = new HashMap<>();
|
||||
// list all partitions paths
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
|
||||
for (HoodieWriteStat stat : entry.getValue()) {
|
||||
filePaths.put(stat.getFileId(), stat.getPath());
|
||||
}
|
||||
}
|
||||
return filePaths;
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
|
||||
HashMap<String, String> fullPaths = new HashMap<>();
|
||||
for (Map.Entry<String, String> entry : getFileIdAndRelativePaths().entrySet()) {
|
||||
String fullPath =
|
||||
(entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
|
||||
fullPaths.put(entry.getKey(), fullPath);
|
||||
}
|
||||
return fullPaths;
|
||||
}
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if (partitionToWriteStats.containsKey(null)) {
|
||||
log.info("partition path is null for " + partitionToWriteStats.get(null));
|
||||
partitionToWriteStats.remove(null);
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
|
||||
if (jsonStr == null || jsonStr.isEmpty()) {
|
||||
// For empty commit file (no data or somethings bad happen).
|
||||
return new HoodieCommitMetadata();
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
|
||||
}
|
||||
|
||||
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
|
||||
public long fetchTotalPartitionsWritten() {
|
||||
return partitionToWriteStats.size();
|
||||
}
|
||||
|
||||
public long fetchTotalFilesInsert() {
|
||||
long totalFilesInsert = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalFilesInsert++;
|
||||
}
|
||||
partitionToWriteStats.get(partitionPath).add(stat);
|
||||
}
|
||||
}
|
||||
return totalFilesInsert;
|
||||
}
|
||||
|
||||
public void addMetadata(String metaKey, String value) {
|
||||
extraMetadataMap.put(metaKey, value);
|
||||
}
|
||||
|
||||
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
|
||||
return partitionToWriteStats.get(partitionPath);
|
||||
}
|
||||
|
||||
public Map<String, String> getExtraMetadata() { return extraMetadataMap; }
|
||||
|
||||
public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
|
||||
return partitionToWriteStats;
|
||||
}
|
||||
|
||||
public String getMetadata(String metaKey) {
|
||||
return extraMetadataMap.get(metaKey);
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndRelativePaths() {
|
||||
HashMap<String, String> filePaths = new HashMap<>();
|
||||
// list all partitions paths
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
|
||||
for (HoodieWriteStat stat: entry.getValue()) {
|
||||
filePaths.put(stat.getFileId(), stat.getPath());
|
||||
}
|
||||
public long fetchTotalFilesUpdated() {
|
||||
long totalFilesUpdated = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
|
||||
totalFilesUpdated++;
|
||||
}
|
||||
return filePaths;
|
||||
}
|
||||
}
|
||||
return totalFilesUpdated;
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
|
||||
HashMap<String, String> fullPaths = new HashMap<>();
|
||||
for (Map.Entry<String, String> entry: getFileIdAndRelativePaths().entrySet()) {
|
||||
String fullPath = (entry.getValue() != null) ? (new Path(basePath, entry.getValue())).toString() : null;
|
||||
fullPaths.put(entry.getKey(), fullPath);
|
||||
} return fullPaths;
|
||||
public long fetchTotalUpdateRecordsWritten() {
|
||||
long totalUpdateRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
|
||||
}
|
||||
}
|
||||
return totalUpdateRecordsWritten;
|
||||
}
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if(partitionToWriteStats.containsKey(null)) {
|
||||
log.info("partition path is null for " + partitionToWriteStats.get(null));
|
||||
partitionToWriteStats.remove(null);
|
||||
public long fetchTotalInsertRecordsWritten() {
|
||||
long totalInsertRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalInsertRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
|
||||
}
|
||||
}
|
||||
return totalInsertRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalRecordsWritten() {
|
||||
long totalRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
}
|
||||
return totalRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalBytesWritten() {
|
||||
long totalBytesWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalBytesWritten += stat.getTotalWriteBytes();
|
||||
}
|
||||
}
|
||||
return totalBytesWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalWriteErrors() {
|
||||
long totalWriteErrors = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalWriteErrors += stat.getTotalWriteErrors();
|
||||
}
|
||||
}
|
||||
return totalWriteErrors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
|
||||
if (jsonStr == null || jsonStr.isEmpty()) {
|
||||
// For empty commit file (no data or somethings bad happen).
|
||||
return new HoodieCommitMetadata();
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.configure(Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
|
||||
}
|
||||
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
|
||||
|
||||
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
|
||||
public long fetchTotalPartitionsWritten() {
|
||||
return partitionToWriteStats.size();
|
||||
}
|
||||
return partitionToWriteStats != null ?
|
||||
partitionToWriteStats.equals(that.partitionToWriteStats) :
|
||||
that.partitionToWriteStats == null;
|
||||
|
||||
public long fetchTotalFilesInsert() {
|
||||
long totalFilesInsert = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalFilesInsert ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalFilesInsert;
|
||||
}
|
||||
}
|
||||
|
||||
public long fetchTotalFilesUpdated() {
|
||||
long totalFilesUpdated = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
|
||||
totalFilesUpdated ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalFilesUpdated;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
|
||||
}
|
||||
|
||||
public long fetchTotalUpdateRecordsWritten() {
|
||||
long totalUpdateRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
|
||||
}
|
||||
}
|
||||
return totalUpdateRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalInsertRecordsWritten() {
|
||||
long totalInsertRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalInsertRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalInsertRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalRecordsWritten() {
|
||||
long totalRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
}
|
||||
return totalRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalBytesWritten() {
|
||||
long totalBytesWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalBytesWritten += stat.getTotalWriteBytes();
|
||||
}
|
||||
}
|
||||
return totalBytesWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalWriteErrors() {
|
||||
long totalWriteErrors = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalWriteErrors += stat.getTotalWriteErrors();
|
||||
}
|
||||
}
|
||||
return totalWriteErrors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
|
||||
|
||||
return partitionToWriteStats != null ?
|
||||
partitionToWriteStats.equals(that.partitionToWriteStats) :
|
||||
that.partitionToWriteStats == null;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
|
||||
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
|
||||
}
|
||||
public static HoodieCommitMetadata fromBytes(byte[] bytes) throws IOException {
|
||||
return fromJsonString(new String(bytes, Charset.forName("utf-8")));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,15 +16,12 @@
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.Supplier;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.codehaus.jackson.annotate.JsonAutoDetect;
|
||||
@@ -33,9 +30,11 @@ import org.codehaus.jackson.map.DeserializationConfig.Feature;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
/**
|
||||
* Place holder for the compaction specific meta-data, uses all the details used in a normal HoodieCommitMetadata
|
||||
* Place holder for the compaction specific meta-data, uses all the details used in a normal
|
||||
* HoodieCommitMetadata
|
||||
*/
|
||||
public class HoodieCompactionMetadata extends HoodieCommitMetadata {
|
||||
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCompactionMetadata.class);
|
||||
protected HashMap<String, List<CompactionWriteStat>> partitionToCompactionWriteStats;
|
||||
|
||||
@@ -60,7 +59,7 @@ public class HoodieCompactionMetadata extends HoodieCommitMetadata {
|
||||
}
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if(partitionToCompactionWriteStats.containsKey(null)) {
|
||||
if (partitionToCompactionWriteStats.containsKey(null)) {
|
||||
log.info("partition path is null for " + partitionToCompactionWriteStats.get(null));
|
||||
partitionToCompactionWriteStats.remove(null);
|
||||
}
|
||||
|
||||
@@ -17,56 +17,54 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.PathFilter;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
public class HoodieDataFile implements Serializable {
|
||||
private FileStatus fileStatus;
|
||||
|
||||
public HoodieDataFile(FileStatus fileStatus) {
|
||||
this.fileStatus = fileStatus;
|
||||
}
|
||||
private FileStatus fileStatus;
|
||||
|
||||
public String getFileId() {
|
||||
return FSUtils.getFileId(fileStatus.getPath().getName());
|
||||
}
|
||||
public HoodieDataFile(FileStatus fileStatus) {
|
||||
this.fileStatus = fileStatus;
|
||||
}
|
||||
|
||||
public String getCommitTime() {
|
||||
return FSUtils.getCommitTime(fileStatus.getPath().getName());
|
||||
}
|
||||
public String getFileId() {
|
||||
return FSUtils.getFileId(fileStatus.getPath().getName());
|
||||
}
|
||||
|
||||
public String getPath() {
|
||||
return fileStatus.getPath().toString();
|
||||
}
|
||||
public String getCommitTime() {
|
||||
return FSUtils.getCommitTime(fileStatus.getPath().getName());
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileStatus.getPath().getName();
|
||||
}
|
||||
public String getPath() {
|
||||
return fileStatus.getPath().toString();
|
||||
}
|
||||
|
||||
public FileStatus getFileStatus() {
|
||||
return fileStatus;
|
||||
}
|
||||
public String getFileName() {
|
||||
return fileStatus.getPath().getName();
|
||||
}
|
||||
|
||||
public static Comparator<HoodieDataFile> getCommitTimeComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return o2.getCommitTime().compareTo(o1.getCommitTime());
|
||||
};
|
||||
}
|
||||
public FileStatus getFileStatus() {
|
||||
return fileStatus;
|
||||
}
|
||||
|
||||
public long getFileSize() {
|
||||
return fileStatus.getLen();
|
||||
}
|
||||
public static Comparator<HoodieDataFile> getCommitTimeComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return o2.getCommitTime().compareTo(o1.getCommitTime());
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieDataFile {");
|
||||
sb.append("fileStatus=").append(fileStatus);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
public long getFileSize() {
|
||||
return fileStatus.getLen();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieDataFile {");
|
||||
sb.append("fileStatus=").append(fileStatus);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,22 +24,22 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieDeltaWriteStat extends HoodieWriteStat {
|
||||
|
||||
private int logVersion;
|
||||
private long logOffset;
|
||||
private int logVersion;
|
||||
private long logOffset;
|
||||
|
||||
public void setLogVersion(int logVersion) {
|
||||
this.logVersion = logVersion;
|
||||
}
|
||||
public void setLogVersion(int logVersion) {
|
||||
this.logVersion = logVersion;
|
||||
}
|
||||
|
||||
public int getLogVersion() {
|
||||
return logVersion;
|
||||
}
|
||||
public int getLogVersion() {
|
||||
return logVersion;
|
||||
}
|
||||
|
||||
public void setLogOffset(long logOffset) {
|
||||
this.logOffset = logOffset;
|
||||
}
|
||||
public void setLogOffset(long logOffset) {
|
||||
this.logOffset = logOffset;
|
||||
}
|
||||
|
||||
public long getLogOffset() {
|
||||
return logOffset;
|
||||
}
|
||||
public long getLogOffset() {
|
||||
return logOffset;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
public enum HoodieFileFormat {
|
||||
PARQUET(".parquet"), HOODIE_LOG(".log");
|
||||
PARQUET(".parquet"), HOODIE_LOG(".log");
|
||||
|
||||
private final String extension;
|
||||
private final String extension;
|
||||
|
||||
HoodieFileFormat(String extension) {
|
||||
this.extension = extension;
|
||||
}
|
||||
HoodieFileFormat(String extension) {
|
||||
this.extension = extension;
|
||||
}
|
||||
|
||||
public String getFileExtension() {
|
||||
return extension;
|
||||
}
|
||||
public String getFileExtension() {
|
||||
return extension;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,9 +20,6 @@ package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
@@ -35,212 +32,184 @@ import java.util.stream.Stream;
|
||||
*/
|
||||
public class HoodieFileGroup implements Serializable {
|
||||
|
||||
public static Comparator<String> getReverseCommitTimeComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return o2.compareTo(o1);
|
||||
};
|
||||
public static Comparator<String> getReverseCommitTimeComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return o2.compareTo(o1);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Partition containing the file group.
|
||||
*/
|
||||
private final String partitionPath;
|
||||
|
||||
/**
|
||||
* uniquely identifies the file group
|
||||
*/
|
||||
private final String id;
|
||||
|
||||
/**
|
||||
* Slices of files in this group, sorted with greater commit first.
|
||||
*/
|
||||
private final TreeMap<String, FileSlice> fileSlices;
|
||||
|
||||
/**
|
||||
* Timeline, based on which all getter work
|
||||
*/
|
||||
private final HoodieTimeline timeline;
|
||||
|
||||
/**
|
||||
* The last completed instant, that acts as a high watermark for all getters
|
||||
*/
|
||||
private final Optional<HoodieInstant> lastInstant;
|
||||
|
||||
public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
|
||||
this.partitionPath = partitionPath;
|
||||
this.id = id;
|
||||
this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
|
||||
this.timeline = timeline;
|
||||
this.lastInstant = timeline.lastInstant();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new datafile into the file group
|
||||
*/
|
||||
public void addDataFile(HoodieDataFile dataFile) {
|
||||
if (!fileSlices.containsKey(dataFile.getCommitTime())) {
|
||||
fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
|
||||
}
|
||||
fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Partition containing the file group.
|
||||
*/
|
||||
private final String partitionPath;
|
||||
|
||||
/**
|
||||
* uniquely identifies the file group
|
||||
*/
|
||||
private final String id;
|
||||
|
||||
/**
|
||||
* Slices of files in this group, sorted with greater commit first.
|
||||
*/
|
||||
private final TreeMap<String, FileSlice> fileSlices;
|
||||
|
||||
/**
|
||||
* Timeline, based on which all getter work
|
||||
*/
|
||||
private final HoodieTimeline timeline;
|
||||
|
||||
/**
|
||||
* The last completed instant, that acts as a high watermark for all
|
||||
* getters
|
||||
*/
|
||||
private final Optional<HoodieInstant> lastInstant;
|
||||
|
||||
public HoodieFileGroup(String partitionPath, String id, HoodieTimeline timeline) {
|
||||
this.partitionPath = partitionPath;
|
||||
this.id = id;
|
||||
this.fileSlices = new TreeMap<>(HoodieFileGroup.getReverseCommitTimeComparator());
|
||||
this.timeline = timeline;
|
||||
this.lastInstant = timeline.lastInstant();
|
||||
/**
|
||||
* Add a new log file into the group
|
||||
*/
|
||||
public void addLogFile(HoodieLogFile logFile) {
|
||||
if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
|
||||
fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
|
||||
}
|
||||
fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new datafile into the file group
|
||||
*
|
||||
* @param dataFile
|
||||
*/
|
||||
public void addDataFile(HoodieDataFile dataFile) {
|
||||
if (!fileSlices.containsKey(dataFile.getCommitTime())) {
|
||||
fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id));
|
||||
}
|
||||
fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile);
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* A FileSlice is considered committed, if one of the following is true - There is a committed
|
||||
* data file - There are some log files, that are based off a commit or delta commit
|
||||
*/
|
||||
private boolean isFileSliceCommitted(FileSlice slice) {
|
||||
String maxCommitTime = lastInstant.get().getTimestamp();
|
||||
return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a stream of committed file slices, sorted reverse base commit time.
|
||||
*/
|
||||
public Stream<FileSlice> getAllFileSlices() {
|
||||
if (!timeline.empty()) {
|
||||
return fileSlices.entrySet().stream()
|
||||
.map(sliceEntry -> sliceEntry.getValue())
|
||||
.filter(slice -> isFileSliceCommitted(slice));
|
||||
}
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new log file into the group
|
||||
*
|
||||
* @param logFile
|
||||
*/
|
||||
public void addLogFile(HoodieLogFile logFile) {
|
||||
if (!fileSlices.containsKey(logFile.getBaseCommitTime())) {
|
||||
fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id));
|
||||
}
|
||||
fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile);
|
||||
/**
|
||||
* Gets the latest slice - this can contain either
|
||||
*
|
||||
* - just the log files without data file - (or) data file with 0 or more log files
|
||||
*/
|
||||
public Optional<FileSlice> getLatestFileSlice() {
|
||||
// there should always be one
|
||||
return getAllFileSlices().findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
|
||||
*/
|
||||
public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
|
||||
return getAllFileSlices()
|
||||
.filter(slice ->
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
|
||||
return getAllFileSlices()
|
||||
.filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream of committed data files, sorted reverse commit time
|
||||
*/
|
||||
public Stream<HoodieDataFile> getAllDataFiles() {
|
||||
return getAllFileSlices()
|
||||
.filter(slice -> slice.getDataFile().isPresent())
|
||||
.map(slice -> slice.getDataFile().get());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest committed data file
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFile() {
|
||||
return getAllDataFiles().findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest data file, that is <= max commit time
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
|
||||
return getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest data file, that is contained within the provided commit range.
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
|
||||
return getAllDataFiles()
|
||||
.filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the latest log file (based on latest committed data file), currently being appended to
|
||||
*
|
||||
* @return logfile if present, empty if no log file has been opened already.
|
||||
*/
|
||||
public Optional<HoodieLogFile> getLatestLogFile() {
|
||||
Optional<FileSlice> latestSlice = getLatestFileSlice();
|
||||
if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
|
||||
return latestSlice.get().getLogFiles().findFirst();
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* A FileSlice is considered committed, if one of the following is true
|
||||
* - There is a committed data file
|
||||
* - There are some log files, that are based off a commit or delta commit
|
||||
*
|
||||
* @param slice
|
||||
* @return
|
||||
*/
|
||||
private boolean isFileSliceCommitted(FileSlice slice) {
|
||||
String maxCommitTime = lastInstant.get().getTimestamp();
|
||||
return timeline.containsOrBeforeTimelineStarts(slice.getBaseCommitTime()) &&
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides a stream of committed file slices, sorted reverse base commit time.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Stream<FileSlice> getAllFileSlices() {
|
||||
if (!timeline.empty()) {
|
||||
return fileSlices.entrySet().stream()
|
||||
.map(sliceEntry -> sliceEntry.getValue())
|
||||
.filter(slice -> isFileSliceCommitted(slice));
|
||||
}
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the latest slice - this can contain either
|
||||
*
|
||||
* - just the log files without data file
|
||||
* - (or) data file with 0 or more log files
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Optional<FileSlice> getLatestFileSlice() {
|
||||
// there should always be one
|
||||
return getAllFileSlices().findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime
|
||||
*
|
||||
* @param maxCommitTime
|
||||
* @return
|
||||
*/
|
||||
public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) {
|
||||
return getAllFileSlices()
|
||||
.filter(slice ->
|
||||
HoodieTimeline.compareTimestamps(slice.getBaseCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) {
|
||||
return getAllFileSlices()
|
||||
.filter(slice -> commitRange.contains(slice.getBaseCommitTime()))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream of committed data files, sorted reverse commit time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Stream<HoodieDataFile> getAllDataFiles() {
|
||||
return getAllFileSlices()
|
||||
.filter(slice -> slice.getDataFile().isPresent())
|
||||
.map(slice -> slice.getDataFile().get());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest committed data file
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFile() {
|
||||
return getAllDataFiles().findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest data file, that is <= max commit time
|
||||
*
|
||||
* @param maxCommitTime
|
||||
* @return
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFileBeforeOrOn(String maxCommitTime) {
|
||||
return getAllDataFiles()
|
||||
.filter(dataFile ->
|
||||
HoodieTimeline.compareTimestamps(dataFile.getCommitTime(),
|
||||
maxCommitTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest data file, that is contained within the provided commit range.
|
||||
*
|
||||
* @param commitRange
|
||||
* @return
|
||||
*/
|
||||
public Optional<HoodieDataFile> getLatestDataFileInRange(List<String> commitRange) {
|
||||
return getAllDataFiles()
|
||||
.filter(dataFile -> commitRange.contains(dataFile.getCommitTime()))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain the latest log file (based on latest committed data file),
|
||||
* currently being appended to
|
||||
*
|
||||
* @return logfile if present, empty if no log file has been opened already.
|
||||
*/
|
||||
public Optional<HoodieLogFile> getLatestLogFile() {
|
||||
Optional<FileSlice> latestSlice = getLatestFileSlice();
|
||||
if (latestSlice.isPresent() && latestSlice.get().getLogFiles().count() > 0) {
|
||||
return latestSlice.get().getLogFiles().findFirst();
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
|
||||
sb.append("id=").append(id);
|
||||
sb.append(", fileSlices='").append(fileSlices).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieFileGroup {");
|
||||
sb.append("id=").append(id);
|
||||
sb.append(", fileSlices='").append(fileSlices).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,57 +17,58 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* HoodieKey consists of
|
||||
*
|
||||
* - recordKey : a recordKey that acts as primary key for a record
|
||||
* - partitionPath : path to the partition that contains the record
|
||||
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
|
||||
* partition that contains the record
|
||||
*/
|
||||
public class HoodieKey implements Serializable {
|
||||
|
||||
|
||||
private final String recordKey;
|
||||
private final String recordKey;
|
||||
|
||||
private final String partitionPath;
|
||||
private final String partitionPath;
|
||||
|
||||
public HoodieKey(String recordKey, String partitionPath) {
|
||||
this.recordKey = recordKey;
|
||||
this.partitionPath = partitionPath;
|
||||
public HoodieKey(String recordKey, String partitionPath) {
|
||||
this.recordKey = recordKey;
|
||||
this.partitionPath = partitionPath;
|
||||
}
|
||||
|
||||
public String getRecordKey() {
|
||||
return recordKey;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getRecordKey() {
|
||||
return recordKey;
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieKey otherKey = (HoodieKey) o;
|
||||
return Objects.equal(recordKey, otherKey.recordKey) &&
|
||||
Objects.equal(partitionPath, otherKey.partitionPath);
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(recordKey, partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieKey otherKey = (HoodieKey) o;
|
||||
return Objects.equal(recordKey, otherKey.recordKey) &&
|
||||
Objects.equal(partitionPath, otherKey.partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(recordKey, partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieKey {");
|
||||
sb.append(" recordKey=").append(recordKey);
|
||||
sb.append(" partitionPath=").append(partitionPath);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieKey {");
|
||||
sb.append(" recordKey=").append(recordKey);
|
||||
sb.append(" partitionPath=").append(partitionPath);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,13 +19,13 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Comparator;
|
||||
import java.util.Optional;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Abstracts a single log file. Contains methods to extract metadata like the fileId, version and
|
||||
@@ -34,73 +34,74 @@ import java.util.Optional;
|
||||
* Also contains logic to roll-over the log file
|
||||
*/
|
||||
public class HoodieLogFile implements Serializable {
|
||||
public static final String DELTA_EXTENSION = ".log";
|
||||
|
||||
private final Path path;
|
||||
private Optional<FileStatus> fileStatus;
|
||||
public static final String DELTA_EXTENSION = ".log";
|
||||
|
||||
public HoodieLogFile(FileStatus fileStatus) {
|
||||
this(fileStatus.getPath());
|
||||
this.fileStatus = Optional.of(fileStatus);
|
||||
}
|
||||
private final Path path;
|
||||
private Optional<FileStatus> fileStatus;
|
||||
|
||||
public HoodieLogFile(Path logPath) {
|
||||
this.path = logPath;
|
||||
this.fileStatus = Optional.empty();
|
||||
}
|
||||
public HoodieLogFile(FileStatus fileStatus) {
|
||||
this(fileStatus.getPath());
|
||||
this.fileStatus = Optional.of(fileStatus);
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return FSUtils.getFileIdFromLogPath(path);
|
||||
}
|
||||
public HoodieLogFile(Path logPath) {
|
||||
this.path = logPath;
|
||||
this.fileStatus = Optional.empty();
|
||||
}
|
||||
|
||||
public String getBaseCommitTime() {
|
||||
return FSUtils.getBaseCommitTimeFromLogPath(path);
|
||||
}
|
||||
public String getFileId() {
|
||||
return FSUtils.getFileIdFromLogPath(path);
|
||||
}
|
||||
|
||||
public int getLogVersion() {
|
||||
return FSUtils.getFileVersionFromLog(path);
|
||||
}
|
||||
public String getBaseCommitTime() {
|
||||
return FSUtils.getBaseCommitTimeFromLogPath(path);
|
||||
}
|
||||
|
||||
public String getFileExtension() {
|
||||
return FSUtils.getFileExtensionFromLog(path);
|
||||
}
|
||||
public int getLogVersion() {
|
||||
return FSUtils.getFileVersionFromLog(path);
|
||||
}
|
||||
|
||||
public Path getPath() {
|
||||
return path;
|
||||
}
|
||||
public String getFileExtension() {
|
||||
return FSUtils.getFileExtensionFromLog(path);
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return path.getName();
|
||||
}
|
||||
public Path getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
public Optional<FileStatus> getFileStatus() {
|
||||
return fileStatus;
|
||||
}
|
||||
public String getFileName() {
|
||||
return path.getName();
|
||||
}
|
||||
|
||||
public Optional<Long> getFileSize() {
|
||||
return fileStatus.map(FileStatus::getLen);
|
||||
}
|
||||
public Optional<FileStatus> getFileStatus() {
|
||||
return fileStatus;
|
||||
}
|
||||
|
||||
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
|
||||
String fileId = getFileId();
|
||||
String baseCommitTime = getBaseCommitTime();
|
||||
String extension = "." + FSUtils.getFileExtensionFromLog(path);
|
||||
int newVersion = FSUtils
|
||||
.computeNextLogVersion(fs, path.getParent(), fileId,
|
||||
extension, baseCommitTime);
|
||||
return new HoodieLogFile(new Path(path.getParent(),
|
||||
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
|
||||
}
|
||||
public Optional<Long> getFileSize() {
|
||||
return fileStatus.map(FileStatus::getLen);
|
||||
}
|
||||
|
||||
public static Comparator<HoodieLogFile> getLogVersionComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
|
||||
};
|
||||
}
|
||||
public HoodieLogFile rollOver(FileSystem fs) throws IOException {
|
||||
String fileId = getFileId();
|
||||
String baseCommitTime = getBaseCommitTime();
|
||||
String extension = "." + FSUtils.getFileExtensionFromLog(path);
|
||||
int newVersion = FSUtils
|
||||
.computeNextLogVersion(fs, path.getParent(), fileId,
|
||||
extension, baseCommitTime);
|
||||
return new HoodieLogFile(new Path(path.getParent(),
|
||||
FSUtils.makeLogFileName(fileId, extension, baseCommitTime, newVersion)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieLogFile {" + path + '}';
|
||||
}
|
||||
public static Comparator<HoodieLogFile> getLogVersionComparator() {
|
||||
return (o1, o2) -> {
|
||||
// reverse the order
|
||||
return new Integer(o2.getLogVersion()).compareTo(o1.getLogVersion());
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HoodieLogFile {" + path + '}';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -25,117 +26,119 @@ import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* The metadata that goes into the meta file in each partition
|
||||
*/
|
||||
public class HoodiePartitionMetadata {
|
||||
|
||||
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
|
||||
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
|
||||
public static final String COMMIT_TIME_KEY = "commitTime";
|
||||
public static final String HOODIE_PARTITION_METAFILE = ".hoodie_partition_metadata";
|
||||
public static final String PARTITION_DEPTH_KEY = "partitionDepth";
|
||||
public static final String COMMIT_TIME_KEY = "commitTime";
|
||||
|
||||
/**
|
||||
* Contents of the metadata
|
||||
*/
|
||||
private final Properties props;
|
||||
/**
|
||||
* Contents of the metadata
|
||||
*/
|
||||
private final Properties props;
|
||||
|
||||
/**
|
||||
* Path to the partition, about which we have the metadata
|
||||
*/
|
||||
private final Path partitionPath;
|
||||
/**
|
||||
* Path to the partition, about which we have the metadata
|
||||
*/
|
||||
private final Path partitionPath;
|
||||
|
||||
private final FileSystem fs;
|
||||
private final FileSystem fs;
|
||||
|
||||
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
|
||||
private static Logger log = LogManager.getLogger(HoodiePartitionMetadata.class);
|
||||
|
||||
|
||||
/**
|
||||
* Construct metadata from existing partition
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
|
||||
this.fs = fs;
|
||||
this.props = new Properties();
|
||||
this.partitionPath = partitionPath;
|
||||
/**
|
||||
* Construct metadata from existing partition
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, Path partitionPath) {
|
||||
this.fs = fs;
|
||||
this.props = new Properties();
|
||||
this.partitionPath = partitionPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct metadata object to be written out.
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath,
|
||||
Path partitionPath) {
|
||||
this(fs, partitionPath);
|
||||
props.setProperty(COMMIT_TIME_KEY, commitTime);
|
||||
props
|
||||
.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||
}
|
||||
|
||||
public int getPartitionDepth() {
|
||||
if (!props.containsKey(PARTITION_DEPTH_KEY)) {
|
||||
throw new HoodieException("Could not find partitionDepth in partition metafile");
|
||||
}
|
||||
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct metadata object to be written out.
|
||||
*/
|
||||
public HoodiePartitionMetadata(FileSystem fs, String commitTime, Path basePath, Path partitionPath) {
|
||||
this(fs, partitionPath);
|
||||
props.setProperty(COMMIT_TIME_KEY, commitTime);
|
||||
props.setProperty(PARTITION_DEPTH_KEY, String.valueOf(partitionPath.depth() - basePath.depth()));
|
||||
}
|
||||
/**
|
||||
* Write the metadata safely into partition atomically.
|
||||
*/
|
||||
public void trySave(int taskPartitionId) {
|
||||
Path tmpMetaPath = new Path(partitionPath,
|
||||
HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
|
||||
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
|
||||
boolean metafileExists = false;
|
||||
|
||||
public int getPartitionDepth() {
|
||||
if (!props.containsKey(PARTITION_DEPTH_KEY)) {
|
||||
throw new HoodieException("Could not find partitionDepth in partition metafile");
|
||||
}
|
||||
return Integer.parseInt(props.getProperty(PARTITION_DEPTH_KEY));
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the metadata safely into partition atomically.
|
||||
*
|
||||
* @param taskPartitionId
|
||||
*/
|
||||
public void trySave(int taskPartitionId) {
|
||||
Path tmpMetaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE + "_" + taskPartitionId);
|
||||
Path metaPath = new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
|
||||
boolean metafileExists = false;
|
||||
try {
|
||||
metafileExists = fs.exists(metaPath);
|
||||
if (!metafileExists) {
|
||||
// write to temporary file
|
||||
FSDataOutputStream os = fs.create(tmpMetaPath, true);
|
||||
props.store(os, "partition metadata");
|
||||
os.hsync();
|
||||
os.hflush();
|
||||
os.close();
|
||||
|
||||
// move to actual path
|
||||
fs.rename(tmpMetaPath, metaPath);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn(
|
||||
"Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), "
|
||||
+
|
||||
partitionPath, ioe);
|
||||
} finally {
|
||||
if (!metafileExists) {
|
||||
try {
|
||||
metafileExists = fs.exists(metaPath);
|
||||
if (!metafileExists) {
|
||||
// write to temporary file
|
||||
FSDataOutputStream os = fs.create(tmpMetaPath, true);
|
||||
props.store(os, "partition metadata");
|
||||
os.hsync();
|
||||
os.hflush();
|
||||
os.close();
|
||||
|
||||
// move to actual path
|
||||
fs.rename(tmpMetaPath, metaPath);
|
||||
}
|
||||
// clean up tmp file, if still lying around
|
||||
if (fs.exists(tmpMetaPath)) {
|
||||
fs.delete(tmpMetaPath, false);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error trying to save partition metadata (this is okay, as long as atleast 1 of these succced), " +
|
||||
partitionPath, ioe);
|
||||
} finally {
|
||||
if (!metafileExists) {
|
||||
try {
|
||||
// clean up tmp file, if still lying around
|
||||
if (fs.exists(tmpMetaPath)) {
|
||||
fs.delete(tmpMetaPath, false);
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
|
||||
}
|
||||
}
|
||||
log.warn("Error trying to clean up temporary files for " + partitionPath, ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read out the metadata for this partition
|
||||
*/
|
||||
public void readFromFS() {
|
||||
try {
|
||||
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
|
||||
FSDataInputStream is = fs.open(metaFile);
|
||||
props.load(is);
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath, ioe);
|
||||
}
|
||||
/**
|
||||
* Read out the metadata for this partition
|
||||
*/
|
||||
public void readFromFS() {
|
||||
try {
|
||||
Path metaFile = new Path(partitionPath, HOODIE_PARTITION_METAFILE);
|
||||
FSDataInputStream is = fs.open(metaFile);
|
||||
props.load(is);
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error reading Hoodie partition metadata for " + partitionPath,
|
||||
ioe);
|
||||
}
|
||||
}
|
||||
|
||||
// methods related to partition meta data
|
||||
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
|
||||
try {
|
||||
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath, ioe);
|
||||
}
|
||||
// methods related to partition meta data
|
||||
public static boolean hasPartitionMetadata(FileSystem fs, Path partitionPath) {
|
||||
try {
|
||||
return fs.exists(new Path(partitionPath, HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE));
|
||||
} catch (IOException ioe) {
|
||||
throw new HoodieException("Error checking Hoodie partition metadata for " + partitionPath,
|
||||
ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
@@ -26,129 +25,131 @@ import java.util.Optional;
|
||||
*/
|
||||
public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
|
||||
|
||||
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
|
||||
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
|
||||
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
|
||||
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
|
||||
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
|
||||
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
|
||||
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
|
||||
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
|
||||
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
|
||||
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
|
||||
|
||||
/**
|
||||
* Identifies the record across the table
|
||||
*/
|
||||
private HoodieKey key;
|
||||
/**
|
||||
* Identifies the record across the table
|
||||
*/
|
||||
private HoodieKey key;
|
||||
|
||||
/**
|
||||
* Actual payload of the record
|
||||
*/
|
||||
private T data;
|
||||
/**
|
||||
* Actual payload of the record
|
||||
*/
|
||||
private T data;
|
||||
|
||||
/**
|
||||
* Current location of record on storage. Filled in by looking up index
|
||||
*/
|
||||
private HoodieRecordLocation currentLocation;
|
||||
/**
|
||||
* Current location of record on storage. Filled in by looking up index
|
||||
*/
|
||||
private HoodieRecordLocation currentLocation;
|
||||
|
||||
/**
|
||||
* New location of record on storage, after written
|
||||
*/
|
||||
private HoodieRecordLocation newLocation;
|
||||
/**
|
||||
* New location of record on storage, after written
|
||||
*/
|
||||
private HoodieRecordLocation newLocation;
|
||||
|
||||
public HoodieRecord(HoodieKey key, T data) {
|
||||
this.key = key;
|
||||
this.data = data;
|
||||
this.currentLocation = null;
|
||||
this.newLocation = null;
|
||||
public HoodieRecord(HoodieKey key, T data) {
|
||||
this.key = key;
|
||||
this.data = data;
|
||||
this.currentLocation = null;
|
||||
this.newLocation = null;
|
||||
}
|
||||
|
||||
public HoodieKey getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public T getData() {
|
||||
if (data == null) {
|
||||
throw new IllegalStateException("Payload already deflated for record.");
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
public HoodieKey getKey() {
|
||||
return key;
|
||||
/**
|
||||
* Release the actual payload, to ease memory pressure. To be called after the record has been
|
||||
* written to storage. Once deflated, cannot be inflated.
|
||||
*/
|
||||
public void deflate() {
|
||||
this.data = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the current currentLocation of the record. This should happen exactly-once
|
||||
*/
|
||||
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
|
||||
assert currentLocation == null;
|
||||
this.currentLocation = location;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieRecordLocation getCurrentLocation() {
|
||||
return currentLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the new currentLocation of the record, after being written. This again should happen
|
||||
* exactly-once.
|
||||
*/
|
||||
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
|
||||
assert newLocation == null;
|
||||
this.newLocation = location;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Optional<HoodieRecordLocation> getNewLocation() {
|
||||
return Optional.of(this.newLocation);
|
||||
}
|
||||
|
||||
public boolean isCurrentLocationKnown() {
|
||||
return this.currentLocation != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public T getData() {
|
||||
if (data == null) {
|
||||
throw new IllegalStateException("Payload already deflated for record.");
|
||||
}
|
||||
return data;
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieRecord that = (HoodieRecord) o;
|
||||
return Objects.equal(key, that.key) &&
|
||||
Objects.equal(data, that.data) &&
|
||||
Objects.equal(currentLocation, that.currentLocation) &&
|
||||
Objects.equal(newLocation, that.newLocation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the actual payload, to ease memory pressure. To be called after the record
|
||||
* has been written to storage. Once deflated, cannot be inflated.
|
||||
*/
|
||||
public void deflate() {
|
||||
this.data = null;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(key, data, currentLocation, newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecord{");
|
||||
sb.append("key=").append(key);
|
||||
sb.append(", currentLocation='").append(currentLocation).append('\'');
|
||||
sb.append(", newLocation='").append(newLocation).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the current currentLocation of the record. This should happen exactly-once
|
||||
*/
|
||||
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
|
||||
assert currentLocation == null;
|
||||
this.currentLocation = location;
|
||||
return this;
|
||||
}
|
||||
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
|
||||
return commitTime + "_" + partitionId + "_" + recordIndex;
|
||||
}
|
||||
|
||||
public HoodieRecordLocation getCurrentLocation() {
|
||||
return currentLocation;
|
||||
}
|
||||
public String getPartitionPath() {
|
||||
assert key != null;
|
||||
return key.getPartitionPath();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the new currentLocation of the record, after being written. This again should happen
|
||||
* exactly-once.
|
||||
*/
|
||||
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
|
||||
assert newLocation == null;
|
||||
this.newLocation = location;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Optional<HoodieRecordLocation> getNewLocation() {
|
||||
return Optional.of(this.newLocation);
|
||||
}
|
||||
|
||||
public boolean isCurrentLocationKnown() {
|
||||
return this.currentLocation != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieRecord that = (HoodieRecord) o;
|
||||
return Objects.equal(key, that.key) &&
|
||||
Objects.equal(data, that.data) &&
|
||||
Objects.equal(currentLocation, that.currentLocation) &&
|
||||
Objects.equal(newLocation, that.newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(key, data, currentLocation, newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecord{");
|
||||
sb.append("key=").append(key);
|
||||
sb.append(", currentLocation='").append(currentLocation).append('\'');
|
||||
sb.append(", newLocation='").append(newLocation).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
|
||||
return commitTime + "_" + partitionId + "_" + recordIndex;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
assert key != null;
|
||||
return key.getPartitionPath();
|
||||
}
|
||||
|
||||
public String getRecordKey() {
|
||||
assert key != null;
|
||||
return key.getRecordKey();
|
||||
}
|
||||
public String getRecordKey() {
|
||||
assert key != null;
|
||||
return key.getRecordKey();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
@@ -26,44 +25,46 @@ import java.io.Serializable;
|
||||
*/
|
||||
public class HoodieRecordLocation implements Serializable {
|
||||
|
||||
private final String commitTime;
|
||||
private final String fileId;
|
||||
private final String commitTime;
|
||||
private final String fileId;
|
||||
|
||||
public HoodieRecordLocation(String commitTime, String fileId) {
|
||||
this.commitTime = commitTime;
|
||||
this.fileId = fileId;
|
||||
}
|
||||
public HoodieRecordLocation(String commitTime, String fileId) {
|
||||
this.commitTime = commitTime;
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
|
||||
return Objects.equal(commitTime, otherLoc.commitTime) &&
|
||||
Objects.equal(fileId, otherLoc.fileId);
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
|
||||
return Objects.equal(commitTime, otherLoc.commitTime) &&
|
||||
Objects.equal(fileId, otherLoc.fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(commitTime, fileId);
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(commitTime, fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
|
||||
sb.append("commitTime=").append(commitTime).append(", ");
|
||||
sb.append("fileId=").append(fileId);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
|
||||
sb.append("commitTime=").append(commitTime).append(", ");
|
||||
sb.append("fileId=").append(fileId);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getCommitTime() {
|
||||
return commitTime;
|
||||
}
|
||||
public String getCommitTime() {
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,54 +16,55 @@
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
|
||||
/**
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
|
||||
* This abstracts out callbacks which depend on record specific logic
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code> This abstracts
|
||||
* out callbacks which depend on record specific logic
|
||||
*/
|
||||
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
|
||||
/**
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them
|
||||
* before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
|
||||
*/
|
||||
T preCombine(T another);
|
||||
|
||||
/**
|
||||
*
|
||||
* This methods lets you write custom merging/combining logic to produce new values
|
||||
* as a function of current value on storage and whats contained in this object.
|
||||
*
|
||||
* eg:
|
||||
* 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
|
||||
* 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
|
||||
*
|
||||
* @param currentValue Current value in storage, to merge/combine this payload with
|
||||
* @param schema Schema used for record
|
||||
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this record.
|
||||
*/
|
||||
Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
|
||||
/**
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them before
|
||||
* attempting to insert/upsert (if combining turned on in HoodieClientConfig)
|
||||
*/
|
||||
T preCombine(T another);
|
||||
|
||||
/**
|
||||
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
|
||||
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
|
||||
* storage to be combined against. (i.e insert)
|
||||
* Return EMPTY to skip writing this record.
|
||||
*/
|
||||
Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
|
||||
/**
|
||||
* This methods lets you write custom merging/combining logic to produce new values as a function
|
||||
* of current value on storage and whats contained in this object.
|
||||
*
|
||||
* eg: 1) You are updating counters, you may want to add counts to currentValue and write back
|
||||
* updated counts 2) You may be reading DB redo logs, and merge them with current image for a
|
||||
* database row on storage
|
||||
*
|
||||
* @param currentValue Current value in storage, to merge/combine this payload with
|
||||
* @param schema Schema used for record
|
||||
* @return new combined/merged value to be written back to storage. EMPTY to skip writing this
|
||||
* record.
|
||||
*/
|
||||
Optional<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is passed
|
||||
* to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to compute
|
||||
* some aggregate metrics using the metadata in the context of a write success or failure.
|
||||
*/
|
||||
default Optional<Map<String, String>> getMetadata() {
|
||||
return Optional.empty();
|
||||
}
|
||||
/**
|
||||
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
|
||||
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
|
||||
* storage to be combined against. (i.e insert) Return EMPTY to skip writing this record.
|
||||
*/
|
||||
Optional<IndexedRecord> getInsertValue(Schema schema) throws IOException;
|
||||
|
||||
/**
|
||||
* This method can be used to extract some metadata from HoodieRecordPayload. The metadata is
|
||||
* passed to {@code WriteStatus.markSuccess()} and {@code WriteStatus.markFailure()} in order to
|
||||
* compute some aggregate metrics using the metadata in the context of a write success or
|
||||
* failure.
|
||||
*/
|
||||
default Optional<Map<String, String>> getMetadata() {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -31,5 +31,5 @@ package com.uber.hoodie.common.model;
|
||||
* SIMPLE_LSM - A simple 2 level LSM tree.
|
||||
*/
|
||||
public enum HoodieTableType {
|
||||
COPY_ON_WRITE, MERGE_ON_READ
|
||||
COPY_ON_WRITE, MERGE_ON_READ
|
||||
}
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
@@ -27,143 +25,153 @@ import java.io.Serializable;
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieWriteStat implements Serializable {
|
||||
|
||||
public static final String NULL_COMMIT = "null";
|
||||
public static final String NULL_COMMIT = "null";
|
||||
|
||||
/**
|
||||
* Id of the file being written
|
||||
*/
|
||||
private String fileId;
|
||||
/**
|
||||
* Id of the file being written
|
||||
*/
|
||||
private String fileId;
|
||||
|
||||
/**
|
||||
* Relative path to the file from the base path
|
||||
*/
|
||||
private String path;
|
||||
/**
|
||||
* Relative path to the file from the base path
|
||||
*/
|
||||
private String path;
|
||||
|
||||
/**
|
||||
* The previous version of the file. (null if this is the first version. i.e insert)
|
||||
*/
|
||||
private String prevCommit;
|
||||
/**
|
||||
* The previous version of the file. (null if this is the first version. i.e insert)
|
||||
*/
|
||||
private String prevCommit;
|
||||
|
||||
/**
|
||||
* Total number of records written for this file.
|
||||
* - for updates, its the entire number of records in the file
|
||||
* - for inserts, its the actual number of records inserted.
|
||||
*/
|
||||
private long numWrites;
|
||||
/**
|
||||
* Total number of records written for this file. - for updates, its the entire number of records
|
||||
* in the file - for inserts, its the actual number of records inserted.
|
||||
*/
|
||||
private long numWrites;
|
||||
|
||||
/**
|
||||
* Total number of records deleted.
|
||||
*/
|
||||
private long numDeletes;
|
||||
/**
|
||||
* Total number of records deleted.
|
||||
*/
|
||||
private long numDeletes;
|
||||
|
||||
/**
|
||||
* Total number of records actually changed. (0 for inserts)
|
||||
*/
|
||||
private long numUpdateWrites;
|
||||
/**
|
||||
* Total number of records actually changed. (0 for inserts)
|
||||
*/
|
||||
private long numUpdateWrites;
|
||||
|
||||
/**
|
||||
* Total size of file written
|
||||
*/
|
||||
private long totalWriteBytes;
|
||||
/**
|
||||
* Total size of file written
|
||||
*/
|
||||
private long totalWriteBytes;
|
||||
|
||||
/**
|
||||
* Total number of records, that were n't able to be written due to errors.
|
||||
*/
|
||||
private long totalWriteErrors;
|
||||
/**
|
||||
* Total number of records, that were n't able to be written due to errors.
|
||||
*/
|
||||
private long totalWriteErrors;
|
||||
|
||||
public HoodieWriteStat() {
|
||||
// called by jackson json lib
|
||||
public HoodieWriteStat() {
|
||||
// called by jackson json lib
|
||||
}
|
||||
|
||||
public void setFileId(String fileId) {
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
public void setPath(String path) {
|
||||
this.path = path;
|
||||
}
|
||||
|
||||
public void setPrevCommit(String prevCommit) {
|
||||
this.prevCommit = prevCommit;
|
||||
}
|
||||
|
||||
public void setNumWrites(long numWrites) {
|
||||
this.numWrites = numWrites;
|
||||
}
|
||||
|
||||
public void setNumDeletes(long numDeletes) {
|
||||
this.numDeletes = numDeletes;
|
||||
}
|
||||
|
||||
public void setNumUpdateWrites(long numUpdateWrites) {
|
||||
this.numUpdateWrites = numUpdateWrites;
|
||||
}
|
||||
|
||||
public long getTotalWriteBytes() {
|
||||
return totalWriteBytes;
|
||||
}
|
||||
|
||||
public void setTotalWriteBytes(long totalWriteBytes) {
|
||||
this.totalWriteBytes = totalWriteBytes;
|
||||
}
|
||||
|
||||
public long getTotalWriteErrors() {
|
||||
return totalWriteErrors;
|
||||
}
|
||||
|
||||
public void setTotalWriteErrors(long totalWriteErrors) {
|
||||
this.totalWriteErrors = totalWriteErrors;
|
||||
}
|
||||
|
||||
public String getPrevCommit() {
|
||||
return prevCommit;
|
||||
}
|
||||
|
||||
public long getNumWrites() {
|
||||
return numWrites;
|
||||
}
|
||||
|
||||
public long getNumDeletes() {
|
||||
return numDeletes;
|
||||
}
|
||||
|
||||
public long getNumUpdateWrites() {
|
||||
return numUpdateWrites;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public String getPath() {
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder()
|
||||
.append("HoodieWriteStat {")
|
||||
.append("path=" + path)
|
||||
.append(", prevCommit='" + prevCommit + '\'')
|
||||
.append(", numWrites=" + numWrites)
|
||||
.append(", numDeletes=" + numDeletes)
|
||||
.append(", numUpdateWrites=" + numUpdateWrites)
|
||||
.append(", numWriteBytes=" + totalWriteBytes)
|
||||
.append('}')
|
||||
.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void setFileId(String fileId) {
|
||||
this.fileId = fileId;
|
||||
HoodieWriteStat that = (HoodieWriteStat) o;
|
||||
if (!path.equals(that.path)) {
|
||||
return false;
|
||||
}
|
||||
return prevCommit.equals(that.prevCommit);
|
||||
|
||||
public void setPath(String path) { this.path = path; }
|
||||
}
|
||||
|
||||
public void setPrevCommit(String prevCommit) {
|
||||
this.prevCommit = prevCommit;
|
||||
}
|
||||
|
||||
public void setNumWrites(long numWrites) {
|
||||
this.numWrites = numWrites;
|
||||
}
|
||||
|
||||
public void setNumDeletes(long numDeletes) {
|
||||
this.numDeletes = numDeletes;
|
||||
}
|
||||
|
||||
public void setNumUpdateWrites(long numUpdateWrites) {
|
||||
this.numUpdateWrites = numUpdateWrites;
|
||||
}
|
||||
|
||||
public long getTotalWriteBytes() {
|
||||
return totalWriteBytes;
|
||||
}
|
||||
|
||||
public void setTotalWriteBytes(long totalWriteBytes) {
|
||||
this.totalWriteBytes = totalWriteBytes;
|
||||
}
|
||||
|
||||
public long getTotalWriteErrors() { return totalWriteErrors; }
|
||||
|
||||
public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
|
||||
|
||||
public String getPrevCommit() {
|
||||
return prevCommit;
|
||||
}
|
||||
|
||||
public long getNumWrites() {
|
||||
return numWrites;
|
||||
}
|
||||
|
||||
public long getNumDeletes() {
|
||||
return numDeletes;
|
||||
}
|
||||
|
||||
public long getNumUpdateWrites() {
|
||||
return numUpdateWrites;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public String getPath() { return path; }
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder()
|
||||
.append("HoodieWriteStat {")
|
||||
.append("path=" + path)
|
||||
.append(", prevCommit='" + prevCommit + '\'')
|
||||
.append(", numWrites=" + numWrites)
|
||||
.append(", numDeletes=" + numDeletes)
|
||||
.append(", numUpdateWrites=" + numUpdateWrites)
|
||||
.append(", numWriteBytes=" + totalWriteBytes)
|
||||
.append('}')
|
||||
.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieWriteStat that = (HoodieWriteStat) o;
|
||||
if (!path.equals(that.path))
|
||||
return false;
|
||||
return prevCommit.equals(that.prevCommit);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = path.hashCode();
|
||||
result = 31 * result + prevCommit.hashCode();
|
||||
return result;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = path.hashCode();
|
||||
result = 31 * result + prevCommit.hashCode();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,8 +19,11 @@ package com.uber.hoodie.common.table;
|
||||
import com.uber.hoodie.common.model.HoodieAvroPayload;
|
||||
import com.uber.hoodie.common.model.HoodieFileFormat;
|
||||
import com.uber.hoodie.common.model.HoodieTableType;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
@@ -28,144 +31,134 @@ import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc
|
||||
* Configurations are loaded from hoodie.properties, these properties are usually set during initializing a path as hoodie base path
|
||||
* and never changes during the lifetime of a hoodie dataset.
|
||||
* Configurations are loaded from hoodie.properties, these properties are usually set during
|
||||
* initializing a path as hoodie base path and never changes during the lifetime of a hoodie
|
||||
* dataset.
|
||||
*
|
||||
* @see HoodieTableMetaClient
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public class HoodieTableConfig implements Serializable {
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
|
||||
|
||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.ro.file.format";
|
||||
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.rt.file.format";
|
||||
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieTableConfig.class);
|
||||
|
||||
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
||||
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
|
||||
public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
|
||||
public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
|
||||
private Properties props;
|
||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||
public static final String HOODIE_RO_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.ro.file.format";
|
||||
public static final String HOODIE_RT_FILE_FORMAT_PROP_NAME =
|
||||
"hoodie.table.rt.file.format";
|
||||
public static final String HOODIE_PAYLOAD_CLASS_PROP_NAME = "hoodie.compaction.payload.class";
|
||||
|
||||
public HoodieTableConfig(FileSystem fs, String metaPath) {
|
||||
Properties props = new Properties();
|
||||
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
|
||||
log.info("Loading dataset properties from " + propertyPath);
|
||||
try {
|
||||
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
|
||||
props.load(inputStream);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
|
||||
}
|
||||
this.props = props;
|
||||
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
||||
public static final HoodieFileFormat DEFAULT_RO_FILE_FORMAT = HoodieFileFormat.PARQUET;
|
||||
public static final HoodieFileFormat DEFAULT_RT_FILE_FORMAT = HoodieFileFormat.HOODIE_LOG;
|
||||
public static final String DEFAULT_PAYLOAD_CLASS = HoodieAvroPayload.class.getName();
|
||||
private Properties props;
|
||||
|
||||
public HoodieTableConfig(FileSystem fs, String metaPath) {
|
||||
Properties props = new Properties();
|
||||
Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
|
||||
log.info("Loading dataset properties from " + propertyPath);
|
||||
try {
|
||||
try (FSDataInputStream inputStream = fs.open(propertyPath)) {
|
||||
props.load(inputStream);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not load Hoodie properties from " + propertyPath, e);
|
||||
}
|
||||
this.props = props;
|
||||
}
|
||||
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableConfig() {
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableConfig() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the hoodie meta directory and any necessary files inside the meta (including the
|
||||
* hoodie.properties)
|
||||
*/
|
||||
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
|
||||
Properties properties) throws IOException {
|
||||
if (!fs.exists(metadataFolder)) {
|
||||
fs.mkdirs(metadataFolder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
|
||||
*
|
||||
* @param metadataFolder
|
||||
* @param properties
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void createHoodieProperties(FileSystem fs, Path metadataFolder,
|
||||
Properties properties) throws IOException {
|
||||
if (!fs.exists(metadataFolder)) {
|
||||
fs.mkdirs(metadataFolder);
|
||||
}
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
FSDataOutputStream outputStream = fs.create(propertyPath);
|
||||
try {
|
||||
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
|
||||
throw new IllegalArgumentException(
|
||||
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
|
||||
}
|
||||
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
properties
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
} finally {
|
||||
outputStream.close();
|
||||
}
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
FSDataOutputStream outputStream = fs.create(propertyPath);
|
||||
try {
|
||||
if (!properties.containsKey(HOODIE_TABLE_NAME_PROP_NAME)) {
|
||||
throw new IllegalArgumentException(
|
||||
HOODIE_TABLE_NAME_PROP_NAME + " property needs to be specified");
|
||||
}
|
||||
if (!properties.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
|
||||
}
|
||||
if (properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME) == HoodieTableType.MERGE_ON_READ
|
||||
.name()
|
||||
&& !properties.containsKey(HOODIE_PAYLOAD_CLASS_PROP_NAME)) {
|
||||
properties.setProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
properties
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
} finally {
|
||||
outputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read the table type from the table properties and if not found, return the default
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTableType getTableType() {
|
||||
if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_TABLE_TYPE;
|
||||
/**
|
||||
* Read the table type from the table properties and if not found, return the default
|
||||
*/
|
||||
public HoodieTableType getTableType() {
|
||||
if (props.containsKey(HOODIE_TABLE_TYPE_PROP_NAME)) {
|
||||
return HoodieTableType.valueOf(props.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_TABLE_TYPE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the payload class for HoodieRecords from the table properties
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getPayloadClass() {
|
||||
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
/**
|
||||
* Read the payload class for HoodieRecords from the table properties
|
||||
*/
|
||||
public String getPayloadClass() {
|
||||
return props.getProperty(HOODIE_PAYLOAD_CLASS_PROP_NAME, DEFAULT_PAYLOAD_CLASS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the table name
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getTableName() {
|
||||
return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
|
||||
}
|
||||
/**
|
||||
* Read the table name
|
||||
*/
|
||||
public String getTableName() {
|
||||
return props.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Read Optimized Storage Format
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
*/
|
||||
public HoodieFileFormat getROFileFormat() {
|
||||
if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RO_FILE_FORMAT;
|
||||
/**
|
||||
* Get the Read Optimized Storage Format
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
*/
|
||||
public HoodieFileFormat getROFileFormat() {
|
||||
if (props.containsKey(HOODIE_RO_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RO_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RO_FILE_FORMAT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Read Optimized Storage Format
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
*/
|
||||
public HoodieFileFormat getRTFileFormat() {
|
||||
if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RT_FILE_FORMAT;
|
||||
/**
|
||||
* Get the Read Optimized Storage Format
|
||||
*
|
||||
* @return HoodieFileFormat for the Read Optimized Storage format
|
||||
*/
|
||||
public HoodieFileFormat getRTFileFormat() {
|
||||
if (props.containsKey(HOODIE_RT_FILE_FORMAT_PROP_NAME)) {
|
||||
return HoodieFileFormat.valueOf(props.getProperty(HOODIE_RT_FILE_FORMAT_PROP_NAME));
|
||||
}
|
||||
return DEFAULT_RT_FILE_FORMAT;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -21,6 +21,11 @@ import com.uber.hoodie.common.table.timeline.HoodieActiveTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
@@ -28,225 +33,205 @@ import org.apache.hadoop.fs.PathFilter;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.Externalizable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInput;
|
||||
import java.io.ObjectInputStream;
|
||||
import java.io.ObjectOutput;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table
|
||||
* It returns meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
|
||||
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to start getting the meta-data.
|
||||
* <p>
|
||||
* All the timelines are computed lazily, once computed the timeline is cached and never refreshed.
|
||||
* Use the <code>HoodieTimeline.reload()</code> to refresh timelines.
|
||||
* <code>HoodieTableMetaClient</code> allows to access meta-data about a hoodie table It returns
|
||||
* meta-data about commits, savepoints, compactions, cleanups as a <code>HoodieTimeline</code>
|
||||
* Create an instance of the <code>HoodieTableMetaClient</code> with FileSystem and basePath to
|
||||
* start getting the meta-data. <p> All the timelines are computed lazily, once computed the
|
||||
* timeline is cached and never refreshed. Use the <code>HoodieTimeline.reload()</code> to refresh
|
||||
* timelines.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public class HoodieTableMetaClient implements Serializable {
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
|
||||
public static String METAFOLDER_NAME = ".hoodie";
|
||||
|
||||
private String basePath;
|
||||
private transient FileSystem fs;
|
||||
private String metaPath;
|
||||
private HoodieTableType tableType;
|
||||
private HoodieTableConfig tableConfig;
|
||||
private HoodieActiveTimeline activeTimeline;
|
||||
private HoodieArchivedTimeline archivedTimeline;
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieTableMetaClient.class);
|
||||
public static String METAFOLDER_NAME = ".hoodie";
|
||||
|
||||
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
|
||||
// Do not load any timeline by default
|
||||
this(fs, basePath, false);
|
||||
private String basePath;
|
||||
private transient FileSystem fs;
|
||||
private String metaPath;
|
||||
private HoodieTableType tableType;
|
||||
private HoodieTableConfig tableConfig;
|
||||
private HoodieActiveTimeline activeTimeline;
|
||||
private HoodieArchivedTimeline archivedTimeline;
|
||||
|
||||
public HoodieTableMetaClient(FileSystem fs, String basePath) throws DatasetNotFoundException {
|
||||
// Do not load any timeline by default
|
||||
this(fs, basePath, false);
|
||||
}
|
||||
|
||||
public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
|
||||
throws DatasetNotFoundException {
|
||||
log.info("Loading HoodieTableMetaClient from " + basePath);
|
||||
this.basePath = basePath;
|
||||
this.fs = fs;
|
||||
Path basePathDir = new Path(this.basePath);
|
||||
this.metaPath = basePath + File.separator + METAFOLDER_NAME;
|
||||
Path metaPathDir = new Path(this.metaPath);
|
||||
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
|
||||
this.tableConfig = new HoodieTableConfig(fs, metaPath);
|
||||
this.tableType = tableConfig.getTableType();
|
||||
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
|
||||
if (loadActiveTimelineOnLoad) {
|
||||
log.info("Loading Active commit timeline for " + basePath);
|
||||
getActiveTimeline();
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieTableMetaClient(FileSystem fs, String basePath, boolean loadActiveTimelineOnLoad)
|
||||
throws DatasetNotFoundException {
|
||||
log.info("Loading HoodieTableMetaClient from " + basePath);
|
||||
this.basePath = basePath;
|
||||
this.fs = fs;
|
||||
Path basePathDir = new Path(this.basePath);
|
||||
this.metaPath = basePath + File.separator + METAFOLDER_NAME;
|
||||
Path metaPathDir = new Path(this.metaPath);
|
||||
DatasetNotFoundException.checkValidDataset(fs, basePathDir, metaPathDir);
|
||||
this.tableConfig = new HoodieTableConfig(fs, metaPath);
|
||||
this.tableType = tableConfig.getTableType();
|
||||
log.info("Finished Loading Table of type " + tableType + " from " + basePath);
|
||||
if (loadActiveTimelineOnLoad) {
|
||||
log.info("Loading Active commit timeline for " + basePath);
|
||||
getActiveTimeline();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableMetaClient() {
|
||||
}
|
||||
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieTableMetaClient() {
|
||||
}
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
private void writeObject(java.io.ObjectOutputStream out)
|
||||
throws IOException {
|
||||
out.defaultWriteObject();
|
||||
}
|
||||
|
||||
private void writeObject(java.io.ObjectOutputStream out)
|
||||
throws IOException {
|
||||
out.defaultWriteObject();
|
||||
}
|
||||
/**
|
||||
* @return Base path
|
||||
*/
|
||||
public String getBasePath() {
|
||||
return basePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Base path
|
||||
*/
|
||||
public String getBasePath() {
|
||||
return basePath;
|
||||
}
|
||||
/**
|
||||
* @return Hoodie Table Type
|
||||
*/
|
||||
public HoodieTableType getTableType() {
|
||||
return tableType;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Hoodie Table Type
|
||||
*/
|
||||
public HoodieTableType getTableType() {
|
||||
return tableType;
|
||||
}
|
||||
/**
|
||||
* @return Meta path
|
||||
*/
|
||||
public String getMetaPath() {
|
||||
return metaPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Meta path
|
||||
*/
|
||||
public String getMetaPath() {
|
||||
return metaPath;
|
||||
}
|
||||
/**
|
||||
* @return Table Config
|
||||
*/
|
||||
public HoodieTableConfig getTableConfig() {
|
||||
return tableConfig;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Table Config
|
||||
*/
|
||||
public HoodieTableConfig getTableConfig() {
|
||||
return tableConfig;
|
||||
}
|
||||
/**
|
||||
* Get the FS implementation for this table
|
||||
*/
|
||||
public FileSystem getFs() {
|
||||
return fs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the FS implementation for this table
|
||||
* @return
|
||||
*/
|
||||
public FileSystem getFs() {
|
||||
return fs;
|
||||
/**
|
||||
* Get the active instants as a timeline
|
||||
*
|
||||
* @return Active instants timeline
|
||||
*/
|
||||
public synchronized HoodieActiveTimeline getActiveTimeline() {
|
||||
if (activeTimeline == null) {
|
||||
activeTimeline = new HoodieActiveTimeline(fs, metaPath);
|
||||
}
|
||||
return activeTimeline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the active instants as a timeline
|
||||
*
|
||||
* @return Active instants timeline
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized HoodieActiveTimeline getActiveTimeline() {
|
||||
if (activeTimeline == null) {
|
||||
activeTimeline = new HoodieActiveTimeline(fs, metaPath);
|
||||
}
|
||||
return activeTimeline;
|
||||
/**
|
||||
* Get the archived commits as a timeline. This is costly operation, as all data from the archived
|
||||
* files are read. This should not be used, unless for historical debugging purposes
|
||||
*
|
||||
* @return Active commit timeline
|
||||
*/
|
||||
public synchronized HoodieArchivedTimeline getArchivedTimeline() {
|
||||
if (archivedTimeline == null) {
|
||||
archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
|
||||
}
|
||||
return archivedTimeline;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the archived commits as a timeline. This is costly operation, as all data from the
|
||||
* archived files are read. This should not be used, unless for historical debugging purposes
|
||||
*
|
||||
* @return Active commit timeline
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized HoodieArchivedTimeline getArchivedTimeline() {
|
||||
if (archivedTimeline == null) {
|
||||
archivedTimeline = new HoodieArchivedTimeline(fs, metaPath);
|
||||
}
|
||||
return archivedTimeline;
|
||||
/**
|
||||
* Helper method to initialize a given path, as a given storage type and table name
|
||||
*/
|
||||
public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath,
|
||||
HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
|
||||
if (tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
|
||||
}
|
||||
return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path, as a given storage type and table name
|
||||
*
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @param tableType
|
||||
* @param tableName
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static HoodieTableMetaClient initTableType(FileSystem fs, String basePath, HoodieTableType tableType, String tableName, String payloadClassName) throws IOException {
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
|
||||
if(tableType == HoodieTableType.MERGE_ON_READ) {
|
||||
properties.setProperty(HoodieTableConfig.HOODIE_PAYLOAD_CLASS_PROP_NAME, payloadClassName);
|
||||
}
|
||||
return HoodieTableMetaClient.initializePathAsHoodieDataset(fs, basePath, properties);
|
||||
/**
|
||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as
|
||||
* Properties
|
||||
*
|
||||
* @return Instance of HoodieTableMetaClient
|
||||
*/
|
||||
public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
|
||||
String basePath, Properties props) throws IOException {
|
||||
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
|
||||
Path basePathDir = new Path(basePath);
|
||||
if (!fs.exists(basePathDir)) {
|
||||
fs.mkdirs(basePathDir);
|
||||
}
|
||||
Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
|
||||
if (!fs.exists(metaPathDir)) {
|
||||
fs.mkdirs(metaPathDir);
|
||||
}
|
||||
HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
|
||||
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
|
||||
+ " from " + basePath);
|
||||
return metaClient;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to initialize a given path as a hoodie dataset with configs passed in as as Properties
|
||||
*
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @param props
|
||||
* @return Instance of HoodieTableMetaClient
|
||||
* @throws IOException
|
||||
*/
|
||||
public static HoodieTableMetaClient initializePathAsHoodieDataset(FileSystem fs,
|
||||
String basePath, Properties props) throws IOException {
|
||||
log.info("Initializing " + basePath + " as hoodie dataset " + basePath);
|
||||
Path basePathDir = new Path(basePath);
|
||||
if (!fs.exists(basePathDir)) {
|
||||
fs.mkdirs(basePathDir);
|
||||
}
|
||||
Path metaPathDir = new Path(basePath, METAFOLDER_NAME);
|
||||
if (!fs.exists(metaPathDir)) {
|
||||
fs.mkdirs(metaPathDir);
|
||||
}
|
||||
HoodieTableConfig.createHoodieProperties(fs, metaPathDir, props);
|
||||
HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs, basePath);
|
||||
log.info("Finished initializing Table of type " + metaClient.getTableConfig().getTableType()
|
||||
+ " from " + basePath);
|
||||
return metaClient;
|
||||
}
|
||||
// HELPER METHODS TO CREATE META FILE NAMES
|
||||
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
|
||||
throws IOException {
|
||||
return fs.listStatus(metaPath, nameFilter);
|
||||
}
|
||||
|
||||
// HELPER METHODS TO CREATE META FILE NAMES
|
||||
public static FileStatus[] scanFiles(FileSystem fs, Path metaPath, PathFilter nameFilter)
|
||||
throws IOException {
|
||||
return fs.listStatus(metaPath, nameFilter);
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieTableMetaClient that = (HoodieTableMetaClient) o;
|
||||
return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieTableMetaClient that = (HoodieTableMetaClient) o;
|
||||
return Objects.equals(basePath, that.basePath) && tableType == that.tableType;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(basePath, tableType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(basePath, tableType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
|
||||
sb.append("basePath='").append(basePath).append('\'');
|
||||
sb.append(", metaPath='").append(metaPath).append('\'');
|
||||
sb.append(", tableType=").append(tableType);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieTableMetaClient{");
|
||||
sb.append("basePath='").append(basePath).append('\'');
|
||||
sb.append(", metaPath='").append(metaPath).append('\'');
|
||||
sb.append(", tableType=").append(tableType);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,19 +18,16 @@ package com.uber.hoodie.common.table;
|
||||
|
||||
import com.uber.hoodie.common.table.timeline.HoodieDefaultTimeline;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Optional;
|
||||
import java.util.function.BiPredicate;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie dataset.
|
||||
* Instants are specific points in time represented as HoodieInstant.
|
||||
* <p>
|
||||
* Timelines are immutable once created and operations create new instance of
|
||||
* timelines which filter on the instants and this can be chained.
|
||||
* HoodieTimeline is a view of meta-data instants in the hoodie dataset. Instants are specific
|
||||
* points in time represented as HoodieInstant. <p> Timelines are immutable once created and
|
||||
* operations create new instance of timelines which filter on the instants and this can be
|
||||
* chained.
|
||||
*
|
||||
* @see com.uber.hoodie.common.table.HoodieTableMetaClient
|
||||
* @see HoodieDefaultTimeline
|
||||
@@ -38,205 +35,195 @@ import java.util.stream.Stream;
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public interface HoodieTimeline extends Serializable {
|
||||
String COMMIT_ACTION = "commit";
|
||||
String DELTA_COMMIT_ACTION = "deltacommit";
|
||||
String CLEAN_ACTION = "clean";
|
||||
String ROLLBACK_ACTION = "rollback";
|
||||
String SAVEPOINT_ACTION = "savepoint";
|
||||
String COMPACTION_ACTION = "compaction";
|
||||
String INFLIGHT_EXTENSION = ".inflight";
|
||||
|
||||
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
|
||||
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
|
||||
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
|
||||
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
|
||||
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
|
||||
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
|
||||
//this is to preserve backwards compatibility on commit in-flight filenames
|
||||
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
|
||||
String COMMIT_ACTION = "commit";
|
||||
String DELTA_COMMIT_ACTION = "deltacommit";
|
||||
String CLEAN_ACTION = "clean";
|
||||
String ROLLBACK_ACTION = "rollback";
|
||||
String SAVEPOINT_ACTION = "savepoint";
|
||||
String COMPACTION_ACTION = "compaction";
|
||||
String INFLIGHT_EXTENSION = ".inflight";
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include the in-flights
|
||||
*
|
||||
* @return New instance of HoodieTimeline with just in-flights
|
||||
*/
|
||||
HoodieTimeline filterInflights();
|
||||
String COMMIT_EXTENSION = "." + COMMIT_ACTION;
|
||||
String DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION;
|
||||
String CLEAN_EXTENSION = "." + CLEAN_ACTION;
|
||||
String ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION;
|
||||
String SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION;
|
||||
String COMPACTION_EXTENSION = "." + COMPACTION_ACTION;
|
||||
//this is to preserve backwards compatibility on commit in-flight filenames
|
||||
String INFLIGHT_COMMIT_EXTENSION = INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_DELTA_COMMIT_EXTENSION = "." + DELTA_COMMIT_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_CLEAN_EXTENSION = "." + CLEAN_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_ROLLBACK_EXTENSION = "." + ROLLBACK_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_SAVEPOINT_EXTENSION = "." + SAVEPOINT_ACTION + INFLIGHT_EXTENSION;
|
||||
String INFLIGHT_COMPACTION_EXTENSION = "." + COMPACTION_ACTION + INFLIGHT_EXTENSION;
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include the completed instants
|
||||
*
|
||||
* @return New instance of HoodieTimeline with just completed instants
|
||||
*/
|
||||
HoodieTimeline filterCompletedInstants();
|
||||
/**
|
||||
* Filter this timeline to just include the in-flights
|
||||
*
|
||||
* @return New instance of HoodieTimeline with just in-flights
|
||||
*/
|
||||
HoodieTimeline filterInflights();
|
||||
|
||||
/**
|
||||
* Filter this timeline to just include the completed instants
|
||||
*
|
||||
* @return New instance of HoodieTimeline with just completed instants
|
||||
*/
|
||||
HoodieTimeline filterCompletedInstants();
|
||||
|
||||
|
||||
/**
|
||||
* Create a new Timeline with instants after startTs and before or on endTs
|
||||
*
|
||||
* @param startTs
|
||||
* @param endTs
|
||||
*/
|
||||
HoodieTimeline findInstantsInRange(String startTs, String endTs);
|
||||
/**
|
||||
* Create a new Timeline with instants after startTs and before or on endTs
|
||||
*/
|
||||
HoodieTimeline findInstantsInRange(String startTs, String endTs);
|
||||
|
||||
/**
|
||||
* Create a new Timeline with all the instants after startTs
|
||||
*
|
||||
* @param commitTime
|
||||
* @param numCommits
|
||||
*/
|
||||
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
|
||||
/**
|
||||
* Create a new Timeline with all the instants after startTs
|
||||
*/
|
||||
HoodieTimeline findInstantsAfter(String commitTime, int numCommits);
|
||||
|
||||
/**
|
||||
* If the timeline has any instants
|
||||
*
|
||||
* @return true if timeline is empty
|
||||
*/
|
||||
boolean empty();
|
||||
/**
|
||||
* If the timeline has any instants
|
||||
*
|
||||
* @return true if timeline is empty
|
||||
*/
|
||||
boolean empty();
|
||||
|
||||
/**
|
||||
* @return total number of completed instants
|
||||
*/
|
||||
int countInstants();
|
||||
/**
|
||||
* @return total number of completed instants
|
||||
*/
|
||||
int countInstants();
|
||||
|
||||
/**
|
||||
* @return first completed instant if available
|
||||
*/
|
||||
Optional<HoodieInstant> firstInstant();
|
||||
/**
|
||||
* @return first completed instant if available
|
||||
*/
|
||||
Optional<HoodieInstant> firstInstant();
|
||||
|
||||
/**
|
||||
* @param n
|
||||
* @return nth completed instant from the first completed instant
|
||||
*/
|
||||
Optional<HoodieInstant> nthInstant(int n);
|
||||
/**
|
||||
* @return nth completed instant from the first completed instant
|
||||
*/
|
||||
Optional<HoodieInstant> nthInstant(int n);
|
||||
|
||||
/**
|
||||
* @return last completed instant if available
|
||||
*/
|
||||
Optional<HoodieInstant> lastInstant();
|
||||
/**
|
||||
* @return last completed instant if available
|
||||
*/
|
||||
Optional<HoodieInstant> lastInstant();
|
||||
|
||||
/**
|
||||
* @param n
|
||||
* @return nth completed instant going back from the last completed instant
|
||||
*/
|
||||
Optional<HoodieInstant> nthFromLastInstant(int n);
|
||||
/**
|
||||
* @return nth completed instant going back from the last completed instant
|
||||
*/
|
||||
Optional<HoodieInstant> nthFromLastInstant(int n);
|
||||
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline
|
||||
*/
|
||||
boolean containsInstant(HoodieInstant instant);
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline
|
||||
*/
|
||||
boolean containsInstant(HoodieInstant instant);
|
||||
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline or
|
||||
* if the instant is before the first completed instant in the timeline
|
||||
*/
|
||||
boolean containsOrBeforeTimelineStarts(String ts);
|
||||
/**
|
||||
* @return true if the passed instant is present as a completed instant on the timeline or if the
|
||||
* instant is before the first completed instant in the timeline
|
||||
*/
|
||||
boolean containsOrBeforeTimelineStarts(String ts);
|
||||
|
||||
/**
|
||||
* @return Get the stream of completed instants
|
||||
*/
|
||||
Stream<HoodieInstant> getInstants();
|
||||
/**
|
||||
* @return Get the stream of completed instants
|
||||
*/
|
||||
Stream<HoodieInstant> getInstants();
|
||||
|
||||
/**
|
||||
* @return true if the passed in instant is before the first completed instant in the timeline
|
||||
*/
|
||||
boolean isBeforeTimelineStarts(String ts);
|
||||
/**
|
||||
* @return true if the passed in instant is before the first completed instant in the timeline
|
||||
*/
|
||||
boolean isBeforeTimelineStarts(String ts);
|
||||
|
||||
/**
|
||||
* Read the completed instant details
|
||||
*
|
||||
* @param instant
|
||||
* @return
|
||||
*/
|
||||
Optional<byte[]> getInstantDetails(HoodieInstant instant);
|
||||
/**
|
||||
* Read the completed instant details
|
||||
*/
|
||||
Optional<byte[]> getInstantDetails(HoodieInstant instant);
|
||||
|
||||
/**
|
||||
* Helper methods to compare instants
|
||||
**/
|
||||
BiPredicate<String, String> GREATER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
|
||||
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
|
||||
BiPredicate<String, String> LESSER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
||||
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
|
||||
/**
|
||||
* Helper methods to compare instants
|
||||
**/
|
||||
BiPredicate<String, String> GREATER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) >= 0;
|
||||
BiPredicate<String, String> GREATER = (commit1, commit2) -> commit1.compareTo(commit2) > 0;
|
||||
BiPredicate<String, String> LESSER_OR_EQUAL =
|
||||
(commit1, commit2) -> commit1.compareTo(commit2) <= 0;
|
||||
BiPredicate<String, String> LESSER = (commit1, commit2) -> commit1.compareTo(commit2) < 0;
|
||||
|
||||
static boolean compareTimestamps(String commit1, String commit2,
|
||||
BiPredicate<String, String> predicateToApply) {
|
||||
return predicateToApply.test(commit1, commit2);
|
||||
}
|
||||
static boolean compareTimestamps(String commit1, String commit2,
|
||||
BiPredicate<String, String> predicateToApply) {
|
||||
return predicateToApply.test(commit1, commit2);
|
||||
}
|
||||
|
||||
static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
|
||||
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
|
||||
}
|
||||
static HoodieInstant getCompletedInstant(final HoodieInstant instant) {
|
||||
return new HoodieInstant(false, instant.getAction(), instant.getTimestamp());
|
||||
}
|
||||
|
||||
|
||||
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
|
||||
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
|
||||
}
|
||||
static HoodieInstant getInflightInstant(final HoodieInstant instant) {
|
||||
return new HoodieInstant(true, instant.getAction(), instant.getTimestamp());
|
||||
}
|
||||
|
||||
static String makeCommitFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
||||
}
|
||||
static String makeCommitFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.COMMIT_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightCommitFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||
}
|
||||
static String makeInflightCommitFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_COMMIT_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeCleanerFileName(String instant) {
|
||||
return instant + HoodieTimeline.CLEAN_EXTENSION;
|
||||
}
|
||||
static String makeCleanerFileName(String instant) {
|
||||
return instant + HoodieTimeline.CLEAN_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightCleanerFileName(String instant) {
|
||||
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
|
||||
}
|
||||
static String makeInflightCleanerFileName(String instant) {
|
||||
return instant + HoodieTimeline.INFLIGHT_CLEAN_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeRollbackFileName(String instant) {
|
||||
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
|
||||
}
|
||||
static String makeRollbackFileName(String instant) {
|
||||
return instant + HoodieTimeline.ROLLBACK_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightRollbackFileName(String instant) {
|
||||
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
|
||||
}
|
||||
static String makeInflightRollbackFileName(String instant) {
|
||||
return instant + HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightSavePointFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
|
||||
}
|
||||
static String makeInflightSavePointFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_SAVEPOINT_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeSavePointFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
|
||||
}
|
||||
static String makeSavePointFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.SAVEPOINT_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightCompactionFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
|
||||
}
|
||||
static String makeInflightCompactionFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_COMPACTION_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeCompactionFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
|
||||
}
|
||||
static String makeCompactionFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.COMPACTION_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeInflightDeltaFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
|
||||
}
|
||||
static String makeInflightDeltaFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.INFLIGHT_DELTA_COMMIT_EXTENSION;
|
||||
}
|
||||
|
||||
static String makeDeltaFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
|
||||
}
|
||||
static String makeDeltaFileName(String commitTime) {
|
||||
return commitTime + HoodieTimeline.DELTA_COMMIT_EXTENSION;
|
||||
}
|
||||
|
||||
static String getCommitFromCommitFile(String commitFileName) {
|
||||
return commitFileName.split("\\.")[0];
|
||||
}
|
||||
static String getCommitFromCommitFile(String commitFileName) {
|
||||
return commitFileName.split("\\.")[0];
|
||||
}
|
||||
|
||||
static String makeFileNameAsComplete(String fileName) {
|
||||
return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||
}
|
||||
static String makeFileNameAsComplete(String fileName) {
|
||||
return fileName.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||
}
|
||||
|
||||
static String makeFileNameAsInflight(String fileName) {
|
||||
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
|
||||
}
|
||||
static String makeFileNameAsInflight(String fileName) {
|
||||
return fileName + HoodieTimeline.INFLIGHT_EXTENSION;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -19,13 +19,7 @@ package com.uber.hoodie.common.table;
|
||||
import com.uber.hoodie.common.model.FileSlice;
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodieFileGroup;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
@@ -35,70 +29,70 @@ import java.util.stream.Stream;
|
||||
*/
|
||||
public interface TableFileSystemView {
|
||||
|
||||
/**
|
||||
* ReadOptimizedView - methods to provide a view of columnar data files only.
|
||||
*/
|
||||
interface ReadOptimizedView {
|
||||
/**
|
||||
* Stream all the latest data files in the given partition
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files, in the file system view
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFiles();
|
||||
|
||||
/**
|
||||
* Stream all the latest version data files in the given partition with precondition that
|
||||
* commitTime(file) before maxCommitTime
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files pass
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
|
||||
|
||||
/**
|
||||
* Stream all the data file versions grouped by FileId for a given partition
|
||||
*/
|
||||
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
|
||||
}
|
||||
/**
|
||||
* ReadOptimizedView - methods to provide a view of columnar data files only.
|
||||
*/
|
||||
interface ReadOptimizedView {
|
||||
|
||||
/**
|
||||
* RealtimeView - methods to access a combination of columnar data files + log files with real time data.
|
||||
* Stream all the latest data files in the given partition
|
||||
*/
|
||||
interface RealtimeView {
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlices(String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition with precondition that
|
||||
* commitTime(file) before maxCommitTime
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices, in the given range
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
|
||||
|
||||
/**
|
||||
* Stream all the file slices for a given partition, latest or not.
|
||||
*/
|
||||
Stream<FileSlice> getAllFileSlices(String partitionPath);
|
||||
}
|
||||
Stream<HoodieDataFile> getLatestDataFiles(String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the file groups for a given partition
|
||||
*
|
||||
* @param partitionPath
|
||||
* @return
|
||||
* Stream all the latest data files, in the file system view
|
||||
*/
|
||||
Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
|
||||
Stream<HoodieDataFile> getLatestDataFiles();
|
||||
|
||||
/**
|
||||
* Stream all the latest version data files in the given partition with precondition that
|
||||
* commitTime(file) before maxCommitTime
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest data files pass
|
||||
*/
|
||||
Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn);
|
||||
|
||||
/**
|
||||
* Stream all the data file versions grouped by FileId for a given partition
|
||||
*/
|
||||
Stream<HoodieDataFile> getAllDataFiles(String partitionPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* RealtimeView - methods to access a combination of columnar data files + log files with real
|
||||
* time data.
|
||||
*/
|
||||
interface RealtimeView {
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlices(String partitionPath);
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices in the given partition with precondition that
|
||||
* commitTime(file) before maxCommitTime
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime);
|
||||
|
||||
/**
|
||||
* Stream all the latest file slices, in the given range
|
||||
*/
|
||||
Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn);
|
||||
|
||||
/**
|
||||
* Stream all the file slices for a given partition, latest or not.
|
||||
*/
|
||||
Stream<FileSlice> getAllFileSlices(String partitionPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream all the file groups for a given partition
|
||||
*/
|
||||
Stream<HoodieFileGroup> getAllFileGroups(String partitionPath);
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
|
||||
package com.uber.hoodie.common.table.log;
|
||||
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.uber.hoodie.common.model.HoodieKey;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
@@ -29,14 +32,6 @@ import com.uber.hoodie.common.table.log.block.HoodieDeleteBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieLogBlock;
|
||||
import com.uber.hoodie.common.util.ReflectionUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Arrays;
|
||||
@@ -48,17 +43,22 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.HoodieLogBlockType.CORRUPT_BLOCK;
|
||||
import static com.uber.hoodie.common.table.log.block.HoodieLogBlock.LogMetadataType.INSTANT_TIME;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged
|
||||
* list of records which will be used as a lookup table when merging the base columnar file
|
||||
* with the redo log file.
|
||||
*
|
||||
* Scans through all the blocks in a list of HoodieLogFile and builds up a compacted/merged list of
|
||||
* records which will be used as a lookup table when merging the base columnar file with the redo
|
||||
* log file.
|
||||
*/
|
||||
public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
|
||||
public class HoodieCompactedLogRecordScanner implements
|
||||
Iterable<HoodieRecord<? extends HoodieRecordPayload>> {
|
||||
|
||||
private final static Logger log = LogManager.getLogger(HoodieCompactedLogRecordScanner.class);
|
||||
|
||||
// Final list of compacted/merged records to iterate
|
||||
@@ -80,10 +80,10 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
Deque<HoodieLogBlock> lastBlocks = new ArrayDeque<>();
|
||||
|
||||
public HoodieCompactedLogRecordScanner(FileSystem fs, String basePath, List<String> logFilePaths,
|
||||
Schema readerSchema, String latestInstantTime) {
|
||||
Schema readerSchema, String latestInstantTime) {
|
||||
this.readerSchema = readerSchema;
|
||||
this.latestInstantTime = latestInstantTime;
|
||||
this.hoodieTableMetaClient = new HoodieTableMetaClient(fs, basePath);
|
||||
this.hoodieTableMetaClient = new HoodieTableMetaClient(fs, basePath);
|
||||
// load class from the payload fully qualified class name
|
||||
this.payloadClassFQN = this.hoodieTableMetaClient.getTableConfig().getPayloadClass();
|
||||
|
||||
@@ -91,18 +91,18 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> records = Maps.newHashMap();
|
||||
// iterate over the paths
|
||||
Iterator<String> logFilePathsItr = logFilePaths.iterator();
|
||||
while(logFilePathsItr.hasNext()) {
|
||||
while (logFilePathsItr.hasNext()) {
|
||||
HoodieLogFile logFile = new HoodieLogFile(new Path(logFilePathsItr.next()));
|
||||
log.info("Scanning log file " + logFile.getPath());
|
||||
totalLogFiles.incrementAndGet();
|
||||
try {
|
||||
// Use the HoodieLogFormatReader to iterate through the blocks in the log file
|
||||
HoodieLogFormatReader reader = new HoodieLogFormatReader(fs, logFile, readerSchema, true);
|
||||
while(reader.hasNext()) {
|
||||
while (reader.hasNext()) {
|
||||
HoodieLogBlock r = reader.next();
|
||||
String blockInstantTime = r.getLogMetadata().get(INSTANT_TIME);
|
||||
if(!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||
if (!HoodieTimeline.compareTimestamps(blockInstantTime, this.latestInstantTime,
|
||||
HoodieTimeline.LESSER_OR_EQUAL)) {
|
||||
//hit a block with instant time greater than should be processed, stop processing further
|
||||
break;
|
||||
}
|
||||
@@ -117,7 +117,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
case DELETE_BLOCK:
|
||||
log.info("Reading a delete block from file " + logFile.getPath());
|
||||
String lastBlockInstantTime = lastBlocks.peek().getLogMetadata().get(INSTANT_TIME);
|
||||
if(!lastBlockInstantTime.equals(blockInstantTime)) {
|
||||
if (!lastBlockInstantTime.equals(blockInstantTime)) {
|
||||
// Block with the keys listed as to be deleted, data and delete blocks written in different batches
|
||||
// so it is safe to merge
|
||||
// This is a delete block, so lets merge any records from previous data block
|
||||
@@ -130,7 +130,8 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
log.info("Reading a command block from file " + logFile.getPath());
|
||||
// This is a command block - take appropriate action based on the command
|
||||
HoodieCommandBlock commandBlock = (HoodieCommandBlock) r;
|
||||
String targetInstantForCommandBlock = r.getLogMetadata().get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
|
||||
String targetInstantForCommandBlock = r.getLogMetadata()
|
||||
.get(HoodieLogBlock.LogMetadataType.TARGET_INSTANT_TIME);
|
||||
switch (commandBlock.getType()) { // there can be different types of command blocks
|
||||
case ROLLBACK_PREVIOUS_BLOCK:
|
||||
// Rollback the last read log block
|
||||
@@ -139,15 +140,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
// the rollback operation itself
|
||||
HoodieLogBlock lastBlock = lastBlocks.peek();
|
||||
if (lastBlock != null && lastBlock.getBlockType() != CORRUPT_BLOCK &&
|
||||
targetInstantForCommandBlock.contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
|
||||
targetInstantForCommandBlock
|
||||
.contentEquals(lastBlock.getLogMetadata().get(INSTANT_TIME))) {
|
||||
log.info("Rolling back the last log block read in " + logFile.getPath());
|
||||
lastBlocks.pop();
|
||||
} else if(lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
|
||||
} else if (lastBlock != null && lastBlock.getBlockType() == CORRUPT_BLOCK) {
|
||||
// handle corrupt blocks separately since they may not have metadata
|
||||
log.info("Rolling back the last corrupted log block read in " + logFile.getPath());
|
||||
log.info(
|
||||
"Rolling back the last corrupted log block read in " + logFile.getPath());
|
||||
lastBlocks.pop();
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
log.warn("Invalid or extra rollback command block in " + logFile.getPath());
|
||||
}
|
||||
break;
|
||||
@@ -165,7 +167,7 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
throw new HoodieIOException("IOException when reading log file " + logFile);
|
||||
}
|
||||
// merge the last read block when all the blocks are done reading
|
||||
if(!lastBlocks.isEmpty()) {
|
||||
if (!lastBlocks.isEmpty()) {
|
||||
log.info("Merging the final data blocks in " + logFile.getPath());
|
||||
merge(records, lastBlocks);
|
||||
}
|
||||
@@ -175,32 +177,33 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over the GenericRecord in the block, read the hoodie key and partition path
|
||||
* and merge with the application specific payload if the same key was found before
|
||||
* Sufficient to just merge the log records since the base data is merged on previous compaction
|
||||
*
|
||||
* @param dataBlock
|
||||
* Iterate over the GenericRecord in the block, read the hoodie key and partition path and merge
|
||||
* with the application specific payload if the same key was found before Sufficient to just merge
|
||||
* the log records since the base data is merged on previous compaction
|
||||
*/
|
||||
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(HoodieAvroDataBlock dataBlock) {
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps.newHashMap();
|
||||
private Map<String, HoodieRecord<? extends HoodieRecordPayload>> loadRecordsFromBlock(
|
||||
HoodieAvroDataBlock dataBlock) {
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock = Maps
|
||||
.newHashMap();
|
||||
List<IndexedRecord> recs = dataBlock.getRecords();
|
||||
totalLogRecords.addAndGet(recs.size());
|
||||
recs.forEach(rec -> {
|
||||
String key = ((GenericRecord) rec).get(HoodieRecord.RECORD_KEY_METADATA_FIELD)
|
||||
.toString();
|
||||
.toString();
|
||||
String partitionPath =
|
||||
((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
|
||||
.toString();
|
||||
((GenericRecord) rec).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD)
|
||||
.toString();
|
||||
HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(
|
||||
new HoodieKey(key, partitionPath),
|
||||
ReflectionUtils.loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
|
||||
new HoodieKey(key, partitionPath),
|
||||
ReflectionUtils
|
||||
.loadPayload(this.payloadClassFQN, new Object[]{Optional.of(rec)}, Optional.class));
|
||||
if (recordsFromLastBlock.containsKey(key)) {
|
||||
// Merge and store the merged record
|
||||
HoodieRecordPayload combinedValue = recordsFromLastBlock.get(key).getData()
|
||||
.preCombine(hoodieRecord.getData());
|
||||
.preCombine(hoodieRecord.getData());
|
||||
recordsFromLastBlock
|
||||
.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
|
||||
combinedValue));
|
||||
.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
|
||||
combinedValue));
|
||||
} else {
|
||||
// Put the record as is
|
||||
recordsFromLastBlock.put(key, hoodieRecord);
|
||||
@@ -211,12 +214,9 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
|
||||
/**
|
||||
* Merge the last seen log blocks with the accumulated records
|
||||
*
|
||||
* @param records
|
||||
* @param lastBlocks
|
||||
*/
|
||||
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
|
||||
Deque<HoodieLogBlock> lastBlocks) {
|
||||
Deque<HoodieLogBlock> lastBlocks) {
|
||||
while (!lastBlocks.isEmpty()) {
|
||||
HoodieLogBlock lastBlock = lastBlocks.pop();
|
||||
switch (lastBlock.getBlockType()) {
|
||||
@@ -234,19 +234,16 @@ public class HoodieCompactedLogRecordScanner implements Iterable<HoodieRecord<?
|
||||
|
||||
/**
|
||||
* Merge the records read from a single data block with the accumulated records
|
||||
*
|
||||
* @param records
|
||||
* @param recordsFromLastBlock
|
||||
*/
|
||||
private void merge(Map<String, HoodieRecord<? extends HoodieRecordPayload>> records,
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
|
||||
Map<String, HoodieRecord<? extends HoodieRecordPayload>> recordsFromLastBlock) {
|
||||
recordsFromLastBlock.forEach((key, hoodieRecord) -> {
|
||||
if (records.containsKey(key)) {
|
||||
// Merge and store the merged record
|
||||
HoodieRecordPayload combinedValue = records.get(key).getData()
|
||||
.preCombine(hoodieRecord.getData());
|
||||
.preCombine(hoodieRecord.getData());
|
||||
records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()),
|
||||
combinedValue));
|
||||
combinedValue));
|
||||
} else {
|
||||
// Put the record as is
|
||||
records.put(key, hoodieRecord);
|
||||
|
||||
@@ -29,31 +29,36 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* File Format for Hoodie Log Files.
|
||||
* The File Format consists of blocks each seperated with a MAGIC sync marker.
|
||||
* A Block can either be a Data block, Command block or Delete Block.
|
||||
* Data Block - Contains log records serialized as Avro Binary Format
|
||||
* Command Block - Specific commands like RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block
|
||||
* Delete Block - List of keys to delete - tombstone for keys
|
||||
* File Format for Hoodie Log Files. The File Format consists of blocks each seperated with a MAGIC
|
||||
* sync marker. A Block can either be a Data block, Command block or Delete Block. Data Block -
|
||||
* Contains log records serialized as Avro Binary Format Command Block - Specific commands like
|
||||
* RoLLBACK_PREVIOUS-BLOCK - Tombstone for the previously written block Delete Block - List of keys
|
||||
* to delete - tombstone for keys
|
||||
*/
|
||||
public interface HoodieLogFormat {
|
||||
|
||||
/**
|
||||
* Magic 4 bytes we put at the start of every block in the log file. Sync marker.
|
||||
* We could make this file specific (generate a random 4 byte magic and stick it in the file header), but this I think is suffice for now - PR
|
||||
* Magic 4 bytes we put at the start of every block in the log file. Sync marker. We could make
|
||||
* this file specific (generate a random 4 byte magic and stick it in the file header), but this I
|
||||
* think is suffice for now - PR
|
||||
*/
|
||||
byte [] MAGIC = new byte [] {'H', 'U', 'D', 'I'};
|
||||
byte[] MAGIC = new byte[]{'H', 'U', 'D', 'I'};
|
||||
|
||||
/**
|
||||
* Writer interface to allow appending block to this file format
|
||||
*/
|
||||
interface Writer extends Closeable {
|
||||
/** @return the path to this {@link HoodieLogFormat} */
|
||||
|
||||
/**
|
||||
* @return the path to this {@link HoodieLogFormat}
|
||||
*/
|
||||
HoodieLogFile getLogFile();
|
||||
|
||||
/**
|
||||
* Append Block returns a new Writer if the log is rolled
|
||||
*/
|
||||
Writer appendBlock(HoodieLogBlock block) throws IOException, InterruptedException;
|
||||
|
||||
long getCurrentSize() throws IOException;
|
||||
}
|
||||
|
||||
@@ -61,7 +66,10 @@ public interface HoodieLogFormat {
|
||||
* Reader interface which is an Iterator of HoodieLogBlock
|
||||
*/
|
||||
interface Reader extends Closeable, Iterator<HoodieLogBlock> {
|
||||
/** @return the path to this {@link HoodieLogFormat} */
|
||||
|
||||
/**
|
||||
* @return the path to this {@link HoodieLogFormat}
|
||||
*/
|
||||
HoodieLogFile getLogFile();
|
||||
}
|
||||
|
||||
@@ -70,6 +78,7 @@ public interface HoodieLogFormat {
|
||||
* Builder class to construct the default log format writer
|
||||
*/
|
||||
class WriterBuilder {
|
||||
|
||||
private final static Logger log = LogManager.getLogger(WriterBuilder.class);
|
||||
// Default max log file size 512 MB
|
||||
public static final long DEFAULT_SIZE_THRESHOLD = 512 * 1024 * 1024L;
|
||||
@@ -187,7 +196,8 @@ public interface HoodieLogFormat {
|
||||
return new WriterBuilder();
|
||||
}
|
||||
|
||||
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata)
|
||||
static HoodieLogFormat.Reader newReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
|
||||
boolean readMetadata)
|
||||
throws IOException {
|
||||
return new HoodieLogFormatReader(fs, logFile, readerSchema, readMetadata);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
package com.uber.hoodie.common.table.log;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieAvroDataBlock;
|
||||
import com.uber.hoodie.common.table.log.block.HoodieCommandBlock;
|
||||
@@ -38,11 +37,12 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Scans a log file and provides block level iterator on the log file
|
||||
* Loads the entire block contents in memory
|
||||
* Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found)
|
||||
* Scans a log file and provides block level iterator on the log file Loads the entire block
|
||||
* contents in memory Can emit either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one
|
||||
* is found)
|
||||
*/
|
||||
public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 4096;
|
||||
private final static Logger log = LogManager.getLogger(HoodieLogFormatReader.class);
|
||||
|
||||
@@ -53,14 +53,16 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
private HoodieLogBlock nextBlock = null;
|
||||
private boolean readMetadata = true;
|
||||
|
||||
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readMetadata) throws IOException {
|
||||
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
|
||||
boolean readMetadata) throws IOException {
|
||||
this.inputStream = fs.open(logFile.getPath(), bufferSize);
|
||||
this.logFile = logFile;
|
||||
this.readerSchema = readerSchema;
|
||||
this.readMetadata = readMetadata;
|
||||
}
|
||||
|
||||
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, boolean readMetadata) throws IOException {
|
||||
HoodieLogFormatReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema,
|
||||
boolean readMetadata) throws IOException {
|
||||
this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, readMetadata);
|
||||
}
|
||||
|
||||
@@ -83,7 +85,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
// Skip blocksize in the stream and we should either find a sync marker (start of the next block) or EOF
|
||||
// If we did not find either of it, then this block is a corrupted block.
|
||||
boolean isCorrupted = isBlockCorrupt(blocksize);
|
||||
if(isCorrupted) {
|
||||
if (isCorrupted) {
|
||||
return createCorruptBlock();
|
||||
}
|
||||
|
||||
@@ -140,7 +142,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
}
|
||||
|
||||
private long scanForNextAvailableBlockOffset() throws IOException {
|
||||
while(true) {
|
||||
while (true) {
|
||||
long currentPos = inputStream.getPos();
|
||||
try {
|
||||
boolean isEOF = readMagic();
|
||||
@@ -191,7 +193,7 @@ public class HoodieLogFormatReader implements HoodieLogFormat.Reader {
|
||||
|
||||
@Override
|
||||
public HoodieLogBlock next() {
|
||||
if(nextBlock == null) {
|
||||
if (nextBlock == null) {
|
||||
// may be hasNext is not called
|
||||
hasNext();
|
||||
}
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
|
||||
package com.uber.hoodie.common.table.log;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.Writer;
|
||||
import com.uber.hoodie.common.table.log.HoodieLogFormat.WriterBuilder;
|
||||
@@ -35,8 +33,8 @@ import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* HoodieLogFormatWriter can be used to append blocks to a log file
|
||||
* Use HoodieLogFormat.WriterBuilder to construct
|
||||
* HoodieLogFormatWriter can be used to append blocks to a log file Use
|
||||
* HoodieLogFormat.WriterBuilder to construct
|
||||
*/
|
||||
public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
|
||||
@@ -58,7 +56,7 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
* @param sizeThreshold
|
||||
*/
|
||||
HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize,
|
||||
Short replication, Long sizeThreshold)
|
||||
Short replication, Long sizeThreshold)
|
||||
throws IOException, InterruptedException {
|
||||
this.fs = fs;
|
||||
this.logFile = logFile;
|
||||
@@ -157,8 +155,9 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
|
||||
}
|
||||
|
||||
public long getCurrentSize() throws IOException {
|
||||
if(output == null) {
|
||||
throw new IllegalStateException("Cannot get current size as the underlying stream has been closed already");
|
||||
if (output == null) {
|
||||
throw new IllegalStateException(
|
||||
"Cannot get current size as the underlying stream has been closed already");
|
||||
}
|
||||
return output.getPos();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,14 @@ package com.uber.hoodie.common.table.log.block;
|
||||
|
||||
import com.uber.hoodie.common.util.HoodieAvroUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
@@ -27,30 +35,18 @@ import org.apache.avro.io.DecoderFactory;
|
||||
import org.apache.avro.io.Encoder;
|
||||
import org.apache.avro.io.EncoderFactory;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* DataBlock contains a list of records serialized using Avro.
|
||||
* The Datablock contains
|
||||
* 1. Compressed Writer Schema length
|
||||
* 2. Compressed Writer Schema content
|
||||
* 3. Total number of records in the block
|
||||
* 4. Size of a record
|
||||
* 5. Actual avro serialized content of the record
|
||||
* DataBlock contains a list of records serialized using Avro. The Datablock contains 1. Compressed
|
||||
* Writer Schema length 2. Compressed Writer Schema content 3. Total number of records in the block
|
||||
* 4. Size of a record 5. Actual avro serialized content of the record
|
||||
*/
|
||||
public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
|
||||
private List<IndexedRecord> records;
|
||||
private Schema schema;
|
||||
|
||||
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema, Map<LogMetadataType, String> metadata) {
|
||||
public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema,
|
||||
Map<LogMetadataType, String> metadata) {
|
||||
super(metadata);
|
||||
this.records = records;
|
||||
this.schema = schema;
|
||||
@@ -76,7 +72,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
|
||||
// 1. Write out metadata
|
||||
if(super.getLogMetadata() != null) {
|
||||
if (super.getLogMetadata() != null) {
|
||||
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
|
||||
}
|
||||
|
||||
@@ -117,12 +113,13 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
return HoodieLogBlockType.AVRO_DATA_BLOCK;
|
||||
}
|
||||
|
||||
public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata) throws IOException {
|
||||
public static HoodieLogBlock fromBytes(byte[] content, Schema readerSchema, boolean readMetadata)
|
||||
throws IOException {
|
||||
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
|
||||
Map<LogMetadataType, String> metadata = null;
|
||||
// 1. Read the metadata written out, if applicable
|
||||
if(readMetadata) {
|
||||
if (readMetadata) {
|
||||
metadata = HoodieLogBlock.getLogMetadata(dis);
|
||||
}
|
||||
// 1. Read the schema written out
|
||||
@@ -131,7 +128,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
dis.readFully(compressedSchema, 0, schemaLength);
|
||||
Schema writerSchema = new Schema.Parser().parse(HoodieAvroUtils.decompress(compressedSchema));
|
||||
|
||||
if(readerSchema == null) {
|
||||
if (readerSchema == null) {
|
||||
readerSchema = writerSchema;
|
||||
}
|
||||
|
||||
@@ -141,7 +138,7 @@ public class HoodieAvroDataBlock extends HoodieLogBlock {
|
||||
List<IndexedRecord> records = new ArrayList<>(totalRecords);
|
||||
|
||||
// 3. Read the content
|
||||
for(int i=0;i<totalRecords;i++) {
|
||||
for (int i = 0; i < totalRecords; i++) {
|
||||
// TODO - avoid bytes copy
|
||||
int recordLength = dis.readInt();
|
||||
byte[] recordData = new byte[recordLength];
|
||||
|
||||
@@ -32,7 +32,8 @@ public class HoodieCommandBlock extends HoodieLogBlock {
|
||||
|
||||
public enum HoodieCommandBlockTypeEnum {ROLLBACK_PREVIOUS_BLOCK}
|
||||
|
||||
public HoodieCommandBlock(HoodieCommandBlockTypeEnum type, Map<LogMetadataType, String> metadata) {
|
||||
public HoodieCommandBlock(HoodieCommandBlockTypeEnum type,
|
||||
Map<LogMetadataType, String> metadata) {
|
||||
super(metadata);
|
||||
this.type = type;
|
||||
}
|
||||
@@ -46,7 +47,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
if(super.getLogMetadata() != null) {
|
||||
if (super.getLogMetadata() != null) {
|
||||
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
|
||||
}
|
||||
output.writeInt(type.ordinal());
|
||||
@@ -66,7 +67,7 @@ public class HoodieCommandBlock extends HoodieLogBlock {
|
||||
public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
|
||||
Map<LogMetadataType, String> metadata = null;
|
||||
if(readMetadata) {
|
||||
if (readMetadata) {
|
||||
metadata = HoodieLogBlock.getLogMetadata(dis);
|
||||
}
|
||||
int ordinal = dis.readInt();
|
||||
|
||||
@@ -44,7 +44,7 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
|
||||
public byte[] getBytes() throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
if(super.getLogMetadata() != null) {
|
||||
if (super.getLogMetadata() != null) {
|
||||
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
|
||||
}
|
||||
output.write(corruptedBytes);
|
||||
@@ -60,20 +60,21 @@ public class HoodieCorruptBlock extends HoodieLogBlock {
|
||||
return corruptedBytes;
|
||||
}
|
||||
|
||||
public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata) throws IOException {
|
||||
public static HoodieLogBlock fromBytes(byte[] content, int blockSize, boolean readMetadata)
|
||||
throws IOException {
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
|
||||
Map<LogMetadataType, String> metadata = null;
|
||||
int bytesRemaining = blockSize;
|
||||
if(readMetadata) {
|
||||
if (readMetadata) {
|
||||
try { //attempt to read metadata
|
||||
metadata = HoodieLogBlock.getLogMetadata(dis);
|
||||
bytesRemaining = blockSize - HoodieLogBlock.getLogMetadataBytes(metadata).length;
|
||||
} catch(IOException e) {
|
||||
} catch (IOException e) {
|
||||
// unable to read metadata, possibly corrupted
|
||||
metadata = null;
|
||||
}
|
||||
}
|
||||
byte [] corruptedBytes = new byte[bytesRemaining];
|
||||
byte[] corruptedBytes = new byte[bytesRemaining];
|
||||
dis.readFully(corruptedBytes);
|
||||
return new HoodieCorruptBlock(corruptedBytes, metadata);
|
||||
}
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
|
||||
package com.uber.hoodie.common.table.log.block;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
@@ -25,6 +23,7 @@ import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Map;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* Delete block contains a list of keys to be deleted from scanning the blocks so far
|
||||
@@ -46,10 +45,10 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
|
||||
public byte[] getBytes() throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
if(super.getLogMetadata() != null) {
|
||||
if (super.getLogMetadata() != null) {
|
||||
output.write(HoodieLogBlock.getLogMetadataBytes(super.getLogMetadata()));
|
||||
}
|
||||
byte [] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
|
||||
byte[] bytesToWrite = StringUtils.join(keysToDelete, ',').getBytes(Charset.forName("utf-8"));
|
||||
output.writeInt(bytesToWrite.length);
|
||||
output.write(bytesToWrite);
|
||||
return baos.toByteArray();
|
||||
@@ -67,11 +66,11 @@ public class HoodieDeleteBlock extends HoodieLogBlock {
|
||||
public static HoodieLogBlock fromBytes(byte[] content, boolean readMetadata) throws IOException {
|
||||
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(content));
|
||||
Map<LogMetadataType, String> metadata = null;
|
||||
if(readMetadata) {
|
||||
if (readMetadata) {
|
||||
metadata = HoodieLogBlock.getLogMetadata(dis);
|
||||
}
|
||||
int dataLength = dis.readInt();
|
||||
byte [] data = new byte[dataLength];
|
||||
byte[] data = new byte[dataLength];
|
||||
dis.readFully(data);
|
||||
return new HoodieDeleteBlock(new String(data).split(","), metadata);
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ package com.uber.hoodie.common.table.log.block;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.DataOutputStream;
|
||||
@@ -34,6 +33,7 @@ public abstract class HoodieLogBlock {
|
||||
public byte[] getBytes() throws IOException {
|
||||
throw new HoodieException("No implementation was provided");
|
||||
}
|
||||
|
||||
public HoodieLogBlockType getBlockType() {
|
||||
throw new HoodieException("No implementation was provided");
|
||||
}
|
||||
@@ -42,8 +42,8 @@ public abstract class HoodieLogBlock {
|
||||
private Map<LogMetadataType, String> logMetadata;
|
||||
|
||||
/**
|
||||
* Type of the log block
|
||||
* WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
|
||||
* Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at
|
||||
* the end.
|
||||
*/
|
||||
public enum HoodieLogBlockType {
|
||||
COMMAND_BLOCK,
|
||||
@@ -53,8 +53,8 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Metadata abstraction for a HoodieLogBlock
|
||||
* WARNING : This enum is serialized as the ordinal. Only add new enums at the end.
|
||||
* Metadata abstraction for a HoodieLogBlock WARNING : This enum is serialized as the ordinal.
|
||||
* Only add new enums at the end.
|
||||
*/
|
||||
public enum LogMetadataType {
|
||||
INSTANT_TIME,
|
||||
@@ -70,21 +70,17 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert log metadata to bytes
|
||||
* 1. Write size of metadata
|
||||
* 2. Write enum ordinal
|
||||
* 3. Write actual bytes
|
||||
* @param metadata
|
||||
* @return
|
||||
* @throws IOException
|
||||
* Convert log metadata to bytes 1. Write size of metadata 2. Write enum ordinal 3. Write actual
|
||||
* bytes
|
||||
*/
|
||||
public static byte [] getLogMetadataBytes(Map<LogMetadataType, String> metadata) throws IOException {
|
||||
public static byte[] getLogMetadataBytes(Map<LogMetadataType, String> metadata)
|
||||
throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream output = new DataOutputStream(baos);
|
||||
output.writeInt(metadata.size());
|
||||
for(Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
|
||||
for (Map.Entry<LogMetadataType, String> entry : metadata.entrySet()) {
|
||||
output.writeInt(entry.getKey().ordinal());
|
||||
byte [] bytes = entry.getValue().getBytes();
|
||||
byte[] bytes = entry.getValue().getBytes();
|
||||
output.writeInt(bytes.length);
|
||||
output.write(bytes);
|
||||
}
|
||||
@@ -92,13 +88,10 @@ public abstract class HoodieLogBlock {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert bytes to LogMetadata, follow the same order as
|
||||
* {@link HoodieLogBlock#getLogMetadataBytes}
|
||||
* @param dis
|
||||
* @return
|
||||
* @throws IOException
|
||||
* Convert bytes to LogMetadata, follow the same order as {@link HoodieLogBlock#getLogMetadataBytes}
|
||||
*/
|
||||
public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis) throws IOException {
|
||||
public static Map<LogMetadataType, String> getLogMetadata(DataInputStream dis)
|
||||
throws IOException {
|
||||
|
||||
Map<LogMetadataType, String> metadata = Maps.newHashMap();
|
||||
// 1. Read the metadata written out
|
||||
@@ -113,7 +106,7 @@ public abstract class HoodieLogBlock {
|
||||
metadataCount--;
|
||||
}
|
||||
return metadata;
|
||||
} catch(EOFException eof) {
|
||||
} catch (EOFException eof) {
|
||||
throw new IOException("Could not read metadata fields ", eof);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,18 @@ import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
@@ -31,297 +42,273 @@ import org.apache.hadoop.fs.Path;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
|
||||
* is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special timeline
|
||||
* that allows for creation of instants on the timeline.
|
||||
* <p></p>
|
||||
* The timeline is not automatically reloaded on any mutation operation, clients have to manually call reload()
|
||||
* so that they can chain multiple mutations to the timeline and then call reload() once.
|
||||
* <p></p>
|
||||
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
* Represents the Active Timeline for the HoodieDataset. Instants for the last 12 hours
|
||||
* (configurable) is in the ActiveTimeline and the rest are Archived. ActiveTimeline is a special
|
||||
* timeline that allows for creation of instants on the timeline. <p></p> The timeline is not
|
||||
* automatically reloaded on any mutation operation, clients have to manually call reload() so that
|
||||
* they can chain multiple mutations to the timeline and then call reload() once. <p></p> This class
|
||||
* can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
*/
|
||||
public class HoodieActiveTimeline extends HoodieDefaultTimeline {
|
||||
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
|
||||
|
||||
public static final SimpleDateFormat COMMIT_FORMATTER = new SimpleDateFormat("yyyyMMddHHmmss");
|
||||
|
||||
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
|
||||
private String metaPath;
|
||||
private transient FileSystem fs;
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieActiveTimeline.class);
|
||||
private String metaPath;
|
||||
private transient FileSystem fs;
|
||||
|
||||
|
||||
/**
|
||||
* Returns next commit time in the {@link #COMMIT_FORMATTER} format.
|
||||
* @return
|
||||
*/
|
||||
public static String createNewCommitTime() {
|
||||
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
|
||||
/**
|
||||
* Returns next commit time in the {@link #COMMIT_FORMATTER} format.
|
||||
*/
|
||||
public static String createNewCommitTime() {
|
||||
return HoodieActiveTimeline.COMMIT_FORMATTER.format(new Date());
|
||||
}
|
||||
|
||||
protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
|
||||
// Filter all the filter in the metapath and include only the extensions passed and
|
||||
// convert them into HoodieInstant
|
||||
try {
|
||||
this.instants =
|
||||
Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
|
||||
// Include only the meta files with extensions that needs to be included
|
||||
String extension = FSUtils.getFileExtension(path.getName());
|
||||
return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
|
||||
})).sorted(Comparator.comparing(
|
||||
// Sort the meta-data by the instant time (first part of the file name)
|
||||
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
|
||||
// create HoodieInstantMarkers from FileStatus, which extracts properties
|
||||
.map(HoodieInstant::new).collect(Collectors.toList());
|
||||
log.info("Loaded instants " + instants);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to scan metadata", e);
|
||||
}
|
||||
this.fs = fs;
|
||||
this.metaPath = metaPath;
|
||||
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||
}
|
||||
|
||||
protected HoodieActiveTimeline(FileSystem fs, String metaPath, String[] includedExtensions) {
|
||||
// Filter all the filter in the metapath and include only the extensions passed and
|
||||
// convert them into HoodieInstant
|
||||
try {
|
||||
this.instants =
|
||||
Arrays.stream(HoodieTableMetaClient.scanFiles(fs, new Path(metaPath), path -> {
|
||||
// Include only the meta files with extensions that needs to be included
|
||||
String extension = FSUtils.getFileExtension(path.getName());
|
||||
return Arrays.stream(includedExtensions).anyMatch(Predicate.isEqual(extension));
|
||||
})).sorted(Comparator.comparing(
|
||||
// Sort the meta-data by the instant time (first part of the file name)
|
||||
fileStatus -> FSUtils.getInstantTime(fileStatus.getPath().getName())))
|
||||
// create HoodieInstantMarkers from FileStatus, which extracts properties
|
||||
.map(HoodieInstant::new).collect(Collectors.toList());
|
||||
log.info("Loaded instants " + instants);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to scan metadata", e);
|
||||
}
|
||||
this.fs = fs;
|
||||
this.metaPath = metaPath;
|
||||
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
|
||||
this(fs, metaPath,
|
||||
new String[]{COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
|
||||
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
|
||||
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
|
||||
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
|
||||
}
|
||||
|
||||
/**
|
||||
* For serialization and de-serialization only.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieActiveTimeline() {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, compactions) that produce new data, in the active
|
||||
* timeline *
|
||||
*/
|
||||
public HoodieTimeline getCommitsAndCompactionsTimeline() {
|
||||
return getTimelineOfActions(
|
||||
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result
|
||||
* in actions, in the active timeline *
|
||||
*/
|
||||
public HoodieTimeline getAllCommitsTimeline() {
|
||||
return getTimelineOfActions(
|
||||
Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION,
|
||||
SAVEPOINT_ACTION, ROLLBACK_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only pure commits (inflight and completed) in the active timeline
|
||||
*/
|
||||
public HoodieTimeline getCommitTimeline() {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the delta commits (inflight and completed) in the active timeline
|
||||
*/
|
||||
public HoodieTimeline getDeltaCommitTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the commits (inflight and completed) in the compaction timeline
|
||||
*/
|
||||
public HoodieTimeline getCompactionTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple
|
||||
* actions
|
||||
*
|
||||
* @param actions actions allowed in the timeline
|
||||
*/
|
||||
public HoodieTimeline getTimelineOfActions(Set<String> actions) {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get only the cleaner action (inflight and completed) in the active timeline
|
||||
*/
|
||||
public HoodieTimeline getCleanerTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the rollback action (inflight and completed) in the active timeline
|
||||
*/
|
||||
public HoodieTimeline getRollbackTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the save point action (inflight and completed) in the active timeline
|
||||
*/
|
||||
public HoodieTimeline getSavePointTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
|
||||
protected Stream<HoodieInstant> filterInstantsByAction(String action) {
|
||||
return instants.stream().filter(s -> s.getAction().equals(action));
|
||||
}
|
||||
|
||||
public void createInflight(HoodieInstant instant) {
|
||||
log.info("Creating a new in-flight instant " + instant);
|
||||
// Create the in-flight file
|
||||
createFileInMetaPath(instant.getFileName(), Optional.empty());
|
||||
}
|
||||
|
||||
public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
|
||||
log.info("Marking instant complete " + instant);
|
||||
Preconditions.checkArgument(instant.isInflight(),
|
||||
"Could not mark an already completed instant as complete again " + instant);
|
||||
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
|
||||
log.info("Completed " + instant);
|
||||
}
|
||||
|
||||
public void revertToInflight(HoodieInstant instant) {
|
||||
log.info("Reverting instant to inflight " + instant);
|
||||
moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
|
||||
log.info("Reverted " + instant + " to inflight");
|
||||
}
|
||||
|
||||
public void deleteInflight(HoodieInstant instant) {
|
||||
log.info("Deleting in-flight " + instant);
|
||||
Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
|
||||
try {
|
||||
boolean result = fs.delete(inFlightCommitFilePath, false);
|
||||
if (result) {
|
||||
log.info("Removed in-flight " + instant);
|
||||
} else {
|
||||
throw new HoodieIOException("Could not delete in-flight instant " + instant);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not remove inflight commit " + inFlightCommitFilePath, e);
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieActiveTimeline(FileSystem fs, String metaPath) {
|
||||
this(fs, metaPath,
|
||||
new String[] {COMMIT_EXTENSION, INFLIGHT_COMMIT_EXTENSION, DELTA_COMMIT_EXTENSION,
|
||||
INFLIGHT_DELTA_COMMIT_EXTENSION, COMPACTION_EXTENSION,
|
||||
INFLIGHT_COMPACTION_EXTENSION, SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
|
||||
CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION});
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
Path detailPath = new Path(metaPath, instant.getFileName());
|
||||
return readDataFromPath(detailPath);
|
||||
}
|
||||
|
||||
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
|
||||
Optional<byte[]> data) {
|
||||
Path commitFilePath = new Path(metaPath, completed.getFileName());
|
||||
try {
|
||||
// open a new file and write the commit metadata in
|
||||
Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
|
||||
createFileInMetaPath(inflight.getFileName(), data);
|
||||
boolean success = fs.rename(inflightCommitFile, commitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not complete " + inflight, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For serialization and de-serialization only.
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieActiveTimeline() {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, compactions) that produce new data, in the active timeline
|
||||
**
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getCommitsAndCompactionsTimeline() {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all instants (commits, delta commits, compactions, clean, savepoint, rollback) that result in actions, in the active timeline
|
||||
**
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getAllCommitsTimeline() {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION, COMPACTION_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only pure commits (inflight and completed) in the active timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getCommitTimeline() {
|
||||
return getTimelineOfActions(Sets.newHashSet(COMMIT_ACTION));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the delta commits (inflight and completed) in the active timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getDeltaCommitTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(DELTA_COMMIT_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the commits (inflight and completed) in the compaction timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getCompactionTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(COMPACTION_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a timeline of a specific set of actions. useful to create a merged timeline of multiple actions
|
||||
*
|
||||
* @param actions actions allowed in the timeline
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getTimelineOfActions(Set<String> actions) {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(s -> actions.contains(s.getAction())),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get only the cleaner action (inflight and completed) in the active timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getCleanerTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(CLEAN_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the rollback action (inflight and completed) in the active timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getRollbackTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(ROLLBACK_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get only the save point action (inflight and completed) in the active timeline
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public HoodieTimeline getSavePointTimeline() {
|
||||
return new HoodieDefaultTimeline(filterInstantsByAction(SAVEPOINT_ACTION),
|
||||
(Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails);
|
||||
}
|
||||
|
||||
|
||||
protected Stream<HoodieInstant> filterInstantsByAction(String action) {
|
||||
return instants.stream().filter(s -> s.getAction().equals(action));
|
||||
}
|
||||
|
||||
public void createInflight(HoodieInstant instant) {
|
||||
log.info("Creating a new in-flight instant " + instant);
|
||||
// Create the in-flight file
|
||||
createFileInMetaPath(instant.getFileName(), Optional.empty());
|
||||
}
|
||||
|
||||
public void saveAsComplete(HoodieInstant instant, Optional<byte[]> data) {
|
||||
log.info("Marking instant complete " + instant);
|
||||
Preconditions.checkArgument(instant.isInflight(),
|
||||
"Could not mark an already completed instant as complete again " + instant);
|
||||
moveInflightToComplete(instant, HoodieTimeline.getCompletedInstant(instant), data);
|
||||
log.info("Completed " + instant);
|
||||
}
|
||||
|
||||
public void revertToInflight(HoodieInstant instant) {
|
||||
log.info("Reverting instant to inflight " + instant);
|
||||
moveCompleteToInflight(instant, HoodieTimeline.getInflightInstant(instant));
|
||||
log.info("Reverted " + instant + " to inflight");
|
||||
}
|
||||
|
||||
public void deleteInflight(HoodieInstant instant) {
|
||||
log.info("Deleting in-flight " + instant);
|
||||
Path inFlightCommitFilePath = new Path(metaPath, instant.getFileName());
|
||||
try {
|
||||
boolean result = fs.delete(inFlightCommitFilePath, false);
|
||||
if (result) {
|
||||
log.info("Removed in-flight " + instant);
|
||||
} else {
|
||||
throw new HoodieIOException("Could not delete in-flight instant " + instant);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not remove inflight commit " + inFlightCommitFilePath, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
Path detailPath = new Path(metaPath, instant.getFileName());
|
||||
return readDataFromPath(detailPath);
|
||||
}
|
||||
|
||||
protected void moveInflightToComplete(HoodieInstant inflight, HoodieInstant completed,
|
||||
Optional<byte[]> data) {
|
||||
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
|
||||
Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
|
||||
try {
|
||||
if (!fs.exists(inFlightCommitFilePath)) {
|
||||
Path commitFilePath = new Path(metaPath, completed.getFileName());
|
||||
try {
|
||||
// open a new file and write the commit metadata in
|
||||
Path inflightCommitFile = new Path(metaPath, inflight.getFileName());
|
||||
createFileInMetaPath(inflight.getFileName(), data);
|
||||
boolean success = fs.rename(inflightCommitFile, commitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + inflightCommitFile + " to " + commitFilePath);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not complete " + inflight, e);
|
||||
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not complete revert " + completed, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void moveCompleteToInflight(HoodieInstant completed, HoodieInstant inflight) {
|
||||
Path inFlightCommitFilePath = new Path(metaPath, inflight.getFileName());
|
||||
try {
|
||||
if (!fs.exists(inFlightCommitFilePath)) {
|
||||
Path commitFilePath = new Path(metaPath, completed.getFileName());
|
||||
boolean success = fs.rename(commitFilePath, inFlightCommitFilePath);
|
||||
if (!success) {
|
||||
throw new HoodieIOException(
|
||||
"Could not rename " + commitFilePath + " to " + inFlightCommitFilePath);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not complete revert " + completed, e);
|
||||
public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
|
||||
createFileInMetaPath(instant.getFileName(), content);
|
||||
}
|
||||
|
||||
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
|
||||
Path fullPath = new Path(metaPath, filename);
|
||||
try {
|
||||
if (!content.isPresent()) {
|
||||
if (fs.createNewFile(fullPath)) {
|
||||
log.info("Created a new file in meta path: " + fullPath);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
FSDataOutputStream fsout = fs.create(fullPath, true);
|
||||
fsout.write(content.get());
|
||||
fsout.close();
|
||||
return;
|
||||
}
|
||||
throw new HoodieIOException("Failed to create file " + fullPath);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to create file " + fullPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void saveToInflight(HoodieInstant instant, Optional<byte[]> content) {
|
||||
createFileInMetaPath(instant.getFileName(), content);
|
||||
protected Optional<byte[]> readDataFromPath(Path detailPath) {
|
||||
try (FSDataInputStream is = fs.open(detailPath)) {
|
||||
return Optional.of(IOUtils.toByteArray(is));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected void createFileInMetaPath(String filename, Optional<byte[]> content) {
|
||||
Path fullPath = new Path(metaPath, filename);
|
||||
try {
|
||||
if (!content.isPresent()) {
|
||||
if (fs.createNewFile(fullPath)) {
|
||||
log.info("Created a new file in meta path: " + fullPath);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
FSDataOutputStream fsout = fs.create(fullPath, true);
|
||||
fsout.write(content.get());
|
||||
fsout.close();
|
||||
return;
|
||||
}
|
||||
throw new HoodieIOException("Failed to create file " + fullPath);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to create file " + fullPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
protected Optional<byte[]> readDataFromPath(Path detailPath) {
|
||||
try (FSDataInputStream is = fs.open(detailPath)) {
|
||||
return Optional.of(IOUtils.toByteArray(is));
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not read commit details from " + detailPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieActiveTimeline reload() {
|
||||
return new HoodieActiveTimeline(fs, metaPath);
|
||||
}
|
||||
public HoodieActiveTimeline reload() {
|
||||
return new HoodieActiveTimeline(fs, metaPath);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,13 +19,6 @@ package com.uber.hoodie.common.table.timeline;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.Arrays;
|
||||
@@ -34,79 +27,85 @@ import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours (configurable)
|
||||
* is in the ActiveTimeline and the rest are in ArchivedTimeline.
|
||||
* <p></p>
|
||||
* Instants are read from the archive file during initialization and never refreshed. To refresh, clients
|
||||
* need to call reload()
|
||||
* <p></p>
|
||||
* This class can be serialized and de-serialized and on de-serialization the FileSystem is re-initialized.
|
||||
* Represents the Archived Timeline for the HoodieDataset. Instants for the last 12 hours
|
||||
* (configurable) is in the ActiveTimeline and the rest are in ArchivedTimeline. <p></p> Instants
|
||||
* are read from the archive file during initialization and never refreshed. To refresh, clients
|
||||
* need to call reload() <p></p> This class can be serialized and de-serialized and on
|
||||
* de-serialization the FileSystem is re-initialized.
|
||||
*/
|
||||
public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
|
||||
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
|
||||
private transient FileSystem fs;
|
||||
private String metaPath;
|
||||
private Map<String, byte[]> readCommits = new HashMap<>();
|
||||
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
|
||||
private static final String HOODIE_COMMIT_ARCHIVE_LOG_FILE = "commits";
|
||||
private transient FileSystem fs;
|
||||
private String metaPath;
|
||||
private Map<String, byte[]> readCommits = new HashMap<>();
|
||||
|
||||
public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
|
||||
// Read back the commits to make sure
|
||||
Path archiveLogPath = getArchiveLogPath(metaPath);
|
||||
try (SequenceFile.Reader reader =
|
||||
new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
|
||||
Text key = new Text();
|
||||
Text val = new Text();
|
||||
while (reader.next(key, val)) {
|
||||
// TODO - limit the number of commits loaded in memory. this could get very large.
|
||||
// This is okay because only tooling will load the archived commit timeline today
|
||||
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
|
||||
}
|
||||
this.instants = readCommits.keySet().stream().map(
|
||||
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
|
||||
Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not load archived commit timeline from path " + archiveLogPath, e);
|
||||
}
|
||||
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||
this.fs = fs;
|
||||
this.metaPath = metaPath;
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieArchivedTimeline.class);
|
||||
|
||||
public HoodieArchivedTimeline(FileSystem fs, String metaPath) {
|
||||
// Read back the commits to make sure
|
||||
Path archiveLogPath = getArchiveLogPath(metaPath);
|
||||
try (SequenceFile.Reader reader =
|
||||
new SequenceFile.Reader(fs.getConf(), SequenceFile.Reader.file(archiveLogPath))) {
|
||||
Text key = new Text();
|
||||
Text val = new Text();
|
||||
while (reader.next(key, val)) {
|
||||
// TODO - limit the number of commits loaded in memory. this could get very large.
|
||||
// This is okay because only tooling will load the archived commit timeline today
|
||||
readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength()));
|
||||
}
|
||||
this.instants = readCommits.keySet().stream().map(
|
||||
s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)).collect(
|
||||
Collectors.toList());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not load archived commit timeline from path " + archiveLogPath, e);
|
||||
}
|
||||
// multiple casts will make this lambda serializable - http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
|
||||
this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails;
|
||||
this.fs = fs;
|
||||
this.metaPath = metaPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* For serialization and de-serialization only.
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieArchivedTimeline() {
|
||||
}
|
||||
/**
|
||||
* For serialization and de-serialization only.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieArchivedTimeline() {
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
/**
|
||||
* This method is only used when this object is deserialized in a spark executor.
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
|
||||
|
||||
public static Path getArchiveLogPath(String metaPath) {
|
||||
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
|
||||
}
|
||||
public static Path getArchiveLogPath(String metaPath) {
|
||||
return new Path(metaPath, HOODIE_COMMIT_ARCHIVE_LOG_FILE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
|
||||
}
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
return Optional.ofNullable(readCommits.get(instant.getTimestamp()));
|
||||
}
|
||||
|
||||
public HoodieArchivedTimeline reload() {
|
||||
return new HoodieArchivedTimeline(fs, metaPath);
|
||||
}
|
||||
public HoodieArchivedTimeline reload() {
|
||||
return new HoodieArchivedTimeline(fs, metaPath);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -17,135 +17,136 @@
|
||||
package com.uber.hoodie.common.table.timeline;
|
||||
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline.
|
||||
* It provides methods to inspect a List[HoodieInstant]. Function to get the details of the instant
|
||||
* is passed in as a lamdba.
|
||||
* HoodieDefaultTimeline is a default implementation of the HoodieTimeline. It provides methods to
|
||||
* inspect a List[HoodieInstant]. Function to get the details of the instant is passed in as a
|
||||
* lamdba.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
*/
|
||||
public class HoodieDefaultTimeline implements HoodieTimeline {
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
|
||||
|
||||
protected Function<HoodieInstant, Optional<byte[]>> details;
|
||||
protected List<HoodieInstant> instants;
|
||||
private final transient static Logger log = LogManager.getLogger(HoodieDefaultTimeline.class);
|
||||
|
||||
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
|
||||
Function<HoodieInstant, Optional<byte[]>> details) {
|
||||
this.instants = instants.collect(Collectors.toList());
|
||||
this.details = details;
|
||||
}
|
||||
protected Function<HoodieInstant, Optional<byte[]>> details;
|
||||
protected List<HoodieInstant> instants;
|
||||
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieDefaultTimeline() {
|
||||
}
|
||||
public HoodieDefaultTimeline(Stream<HoodieInstant> instants,
|
||||
Function<HoodieInstant, Optional<byte[]>> details) {
|
||||
this.instants = instants.collect(Collectors.toList());
|
||||
this.details = details;
|
||||
}
|
||||
|
||||
public HoodieTimeline filterInflights() {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
|
||||
details);
|
||||
}
|
||||
/**
|
||||
* For serailizing and de-serializing
|
||||
*
|
||||
* @deprecated
|
||||
*/
|
||||
public HoodieDefaultTimeline() {
|
||||
}
|
||||
|
||||
public HoodieTimeline filterCompletedInstants() {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
|
||||
}
|
||||
public HoodieTimeline filterInflights() {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(HoodieInstant::isInflight),
|
||||
details);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(
|
||||
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
|
||||
HoodieTimeline.compareTimestamps(
|
||||
public HoodieTimeline filterCompletedInstants() {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(s -> !s.isInflight()), details);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsInRange(String startTs, String endTs) {
|
||||
return new HoodieDefaultTimeline(instants.stream().filter(
|
||||
s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), startTs, GREATER) &&
|
||||
HoodieTimeline.compareTimestamps(
|
||||
s.getTimestamp(), endTs, LESSER_OR_EQUAL)), details);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
|
||||
return new HoodieDefaultTimeline(
|
||||
instants.stream().filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
|
||||
.limit(numCommits), details);
|
||||
}
|
||||
@Override
|
||||
public HoodieDefaultTimeline findInstantsAfter(String commitTime, int numCommits) {
|
||||
return new HoodieDefaultTimeline(
|
||||
instants.stream()
|
||||
.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), commitTime, GREATER))
|
||||
.limit(numCommits), details);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean empty() {
|
||||
return !instants.stream().findFirst().isPresent();
|
||||
}
|
||||
@Override
|
||||
public boolean empty() {
|
||||
return !instants.stream().findFirst().isPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countInstants() {
|
||||
return new Long(instants.stream().count()).intValue();
|
||||
}
|
||||
@Override
|
||||
public int countInstants() {
|
||||
return new Long(instants.stream().count()).intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<HoodieInstant> firstInstant() {
|
||||
return instants.stream().findFirst();
|
||||
}
|
||||
@Override
|
||||
public Optional<HoodieInstant> firstInstant() {
|
||||
return instants.stream().findFirst();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<HoodieInstant> nthInstant(int n) {
|
||||
if (empty() || n >= countInstants()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(instants.get(n));
|
||||
@Override
|
||||
public Optional<HoodieInstant> nthInstant(int n) {
|
||||
if (empty() || n >= countInstants()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(instants.get(n));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<HoodieInstant> lastInstant() {
|
||||
return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
|
||||
}
|
||||
@Override
|
||||
public Optional<HoodieInstant> lastInstant() {
|
||||
return empty() ? Optional.empty() : nthInstant(countInstants() - 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<HoodieInstant> nthFromLastInstant(int n) {
|
||||
if (countInstants() < n + 1) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return nthInstant(countInstants() - 1 - n);
|
||||
@Override
|
||||
public Optional<HoodieInstant> nthFromLastInstant(int n) {
|
||||
if (countInstants() < n + 1) {
|
||||
return Optional.empty();
|
||||
}
|
||||
return nthInstant(countInstants() - 1 - n);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsInstant(HoodieInstant instant) {
|
||||
return instants.stream().anyMatch(s -> s.equals(instant));
|
||||
}
|
||||
@Override
|
||||
public boolean containsInstant(HoodieInstant instant) {
|
||||
return instants.stream().anyMatch(s -> s.equals(instant));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean containsOrBeforeTimelineStarts(String instant) {
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|
||||
|| isBeforeTimelineStarts(instant);
|
||||
}
|
||||
@Override
|
||||
public boolean containsOrBeforeTimelineStarts(String instant) {
|
||||
return instants.stream().anyMatch(s -> s.getTimestamp().equals(instant))
|
||||
|| isBeforeTimelineStarts(instant);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieInstant> getInstants() {
|
||||
return instants.stream();
|
||||
}
|
||||
@Override
|
||||
public Stream<HoodieInstant> getInstants() {
|
||||
return instants.stream();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBeforeTimelineStarts(String instant) {
|
||||
Optional<HoodieInstant> firstCommit = firstInstant();
|
||||
return firstCommit.isPresent() &&
|
||||
HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
|
||||
}
|
||||
@Override
|
||||
public boolean isBeforeTimelineStarts(String instant) {
|
||||
Optional<HoodieInstant> firstCommit = firstInstant();
|
||||
return firstCommit.isPresent() &&
|
||||
HoodieTimeline.compareTimestamps(instant, firstCommit.get().getTimestamp(), LESSER);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
return details.apply(instant);
|
||||
}
|
||||
@Override
|
||||
public Optional<byte[]> getInstantDetails(HoodieInstant instant) {
|
||||
return details.apply(instant);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
|
||||
.collect(Collectors.joining(","));
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getName() + ": " + instants.stream().map(Object::toString)
|
||||
.collect(Collectors.joining(","));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -16,118 +16,117 @@
|
||||
|
||||
package com.uber.hoodie.common.table.timeline;
|
||||
|
||||
import com.google.common.io.Files;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
/**
|
||||
* A Hoodie Instant represents a action done on a hoodie dataset.
|
||||
* All actions start with a inflight instant and then create a completed instant after done.
|
||||
* A Hoodie Instant represents a action done on a hoodie dataset. All actions start with a inflight
|
||||
* instant and then create a completed instant after done.
|
||||
*
|
||||
* @see HoodieTimeline
|
||||
*/
|
||||
public class HoodieInstant implements Serializable {
|
||||
private boolean isInflight = false;
|
||||
private String action;
|
||||
private String timestamp;
|
||||
|
||||
/**
|
||||
* Load the instant from the meta FileStatus
|
||||
* @param fileStatus
|
||||
*/
|
||||
public HoodieInstant(FileStatus fileStatus) {
|
||||
// First read the instant timestamp. [==>20170101193025<==].commit
|
||||
String fileName = fileStatus.getPath().getName();
|
||||
String fileExtension = FSUtils.getFileExtension(fileName);
|
||||
timestamp = fileName.replace(fileExtension, "");
|
||||
private boolean isInflight = false;
|
||||
private String action;
|
||||
private String timestamp;
|
||||
|
||||
// Next read the action for this marker
|
||||
action = fileExtension.replaceFirst(".", "");
|
||||
if(action.equals("inflight")) {
|
||||
// This is to support backwards compatibility on how in-flight commit files were written
|
||||
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
|
||||
action = "commit";
|
||||
isInflight = true;
|
||||
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
|
||||
isInflight = true;
|
||||
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||
}
|
||||
/**
|
||||
* Load the instant from the meta FileStatus
|
||||
*/
|
||||
public HoodieInstant(FileStatus fileStatus) {
|
||||
// First read the instant timestamp. [==>20170101193025<==].commit
|
||||
String fileName = fileStatus.getPath().getName();
|
||||
String fileExtension = FSUtils.getFileExtension(fileName);
|
||||
timestamp = fileName.replace(fileExtension, "");
|
||||
|
||||
// Next read the action for this marker
|
||||
action = fileExtension.replaceFirst(".", "");
|
||||
if (action.equals("inflight")) {
|
||||
// This is to support backwards compatibility on how in-flight commit files were written
|
||||
// General rule is inflight extension is .<action>.inflight, but for commit it is .inflight
|
||||
action = "commit";
|
||||
isInflight = true;
|
||||
} else if (action.contains(HoodieTimeline.INFLIGHT_EXTENSION)) {
|
||||
isInflight = true;
|
||||
action = action.replace(HoodieTimeline.INFLIGHT_EXTENSION, "");
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieInstant(boolean isInflight, String action, String timestamp) {
|
||||
this.isInflight = isInflight;
|
||||
this.action = action;
|
||||
this.timestamp = timestamp;
|
||||
}
|
||||
public HoodieInstant(boolean isInflight, String action, String timestamp) {
|
||||
this.isInflight = isInflight;
|
||||
this.action = action;
|
||||
this.timestamp = timestamp;
|
||||
}
|
||||
|
||||
public boolean isInflight() {
|
||||
return isInflight;
|
||||
}
|
||||
public boolean isInflight() {
|
||||
return isInflight;
|
||||
}
|
||||
|
||||
public String getAction() {
|
||||
return action;
|
||||
}
|
||||
public String getAction() {
|
||||
return action;
|
||||
}
|
||||
|
||||
public String getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
public String getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the filename for this instant
|
||||
* @return
|
||||
*/
|
||||
public String getFileName() {
|
||||
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCommitFileName(timestamp) :
|
||||
HoodieTimeline.makeCommitFileName(timestamp);
|
||||
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
|
||||
HoodieTimeline.makeCleanerFileName(timestamp);
|
||||
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightRollbackFileName(timestamp) :
|
||||
HoodieTimeline.makeRollbackFileName(timestamp);
|
||||
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
|
||||
HoodieTimeline.makeSavePointFileName(timestamp);
|
||||
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCompactionFileName(timestamp) :
|
||||
HoodieTimeline.makeCompactionFileName(timestamp);
|
||||
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightDeltaFileName(timestamp) :
|
||||
HoodieTimeline.makeDeltaFileName(timestamp);
|
||||
}
|
||||
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
|
||||
/**
|
||||
* Get the filename for this instant
|
||||
*/
|
||||
public String getFileName() {
|
||||
if (HoodieTimeline.COMMIT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCommitFileName(timestamp) :
|
||||
HoodieTimeline.makeCommitFileName(timestamp);
|
||||
} else if (HoodieTimeline.CLEAN_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCleanerFileName(timestamp) :
|
||||
HoodieTimeline.makeCleanerFileName(timestamp);
|
||||
} else if (HoodieTimeline.ROLLBACK_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightRollbackFileName(timestamp) :
|
||||
HoodieTimeline.makeRollbackFileName(timestamp);
|
||||
} else if (HoodieTimeline.SAVEPOINT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightSavePointFileName(timestamp) :
|
||||
HoodieTimeline.makeSavePointFileName(timestamp);
|
||||
} else if (HoodieTimeline.COMPACTION_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightCompactionFileName(timestamp) :
|
||||
HoodieTimeline.makeCompactionFileName(timestamp);
|
||||
} else if (HoodieTimeline.DELTA_COMMIT_ACTION.equals(action)) {
|
||||
return isInflight ?
|
||||
HoodieTimeline.makeInflightDeltaFileName(timestamp) :
|
||||
HoodieTimeline.makeDeltaFileName(timestamp);
|
||||
}
|
||||
throw new IllegalArgumentException("Cannot get file name for unknown action " + action);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieInstant that = (HoodieInstant) o;
|
||||
return isInflight == that.isInflight &&
|
||||
Objects.equals(action, that.action) &&
|
||||
Objects.equals(timestamp, that.timestamp);
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HoodieInstant that = (HoodieInstant) o;
|
||||
return isInflight == that.isInflight &&
|
||||
Objects.equals(action, that.action) &&
|
||||
Objects.equals(timestamp, that.timestamp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(isInflight, action, timestamp);
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(isInflight, action, timestamp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
|
||||
}
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + ((isInflight) ? "==>" : "") + timestamp + "__" + action + "]";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,18 +19,12 @@ package com.uber.hoodie.common.table.view;
|
||||
import com.uber.hoodie.common.model.FileSlice;
|
||||
import com.uber.hoodie.common.model.HoodieDataFile;
|
||||
import com.uber.hoodie.common.model.HoodieFileGroup;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.model.HoodieLogFile;
|
||||
import com.uber.hoodie.common.table.HoodieTableMetaClient;
|
||||
import com.uber.hoodie.common.table.HoodieTimeline;
|
||||
import com.uber.hoodie.common.table.TableFileSystemView;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
@@ -44,6 +38,10 @@ import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Common abstract implementation for multiple TableFileSystemView Implementations. 2 possible
|
||||
@@ -54,8 +52,9 @@ import java.util.stream.Stream;
|
||||
* @see TableFileSystemView
|
||||
* @since 0.3.0
|
||||
*/
|
||||
public class HoodieTableFileSystemView implements TableFileSystemView, TableFileSystemView.ReadOptimizedView,
|
||||
TableFileSystemView.RealtimeView, Serializable {
|
||||
public class HoodieTableFileSystemView implements TableFileSystemView,
|
||||
TableFileSystemView.ReadOptimizedView,
|
||||
TableFileSystemView.RealtimeView, Serializable {
|
||||
|
||||
protected HoodieTableMetaClient metaClient;
|
||||
protected transient FileSystem fs;
|
||||
@@ -69,12 +68,9 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
|
||||
|
||||
/**
|
||||
* Create a file system view, as of the given timeline
|
||||
*
|
||||
* @param metaClient
|
||||
* @param visibleActiveTimeline
|
||||
*/
|
||||
public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline) {
|
||||
HoodieTimeline visibleActiveTimeline) {
|
||||
this.metaClient = metaClient;
|
||||
this.fs = metaClient.getFs();
|
||||
this.visibleActiveTimeline = visibleActiveTimeline;
|
||||
@@ -85,14 +81,10 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
|
||||
|
||||
/**
|
||||
* Create a file system view, as of the given timeline, with the provided file statuses.
|
||||
*
|
||||
* @param metaClient
|
||||
* @param visibleActiveTimeline
|
||||
* @param fileStatuses
|
||||
*/
|
||||
public HoodieTableFileSystemView(HoodieTableMetaClient metaClient,
|
||||
HoodieTimeline visibleActiveTimeline,
|
||||
FileStatus[] fileStatuses) {
|
||||
HoodieTimeline visibleActiveTimeline,
|
||||
FileStatus[] fileStatuses) {
|
||||
this(metaClient, visibleActiveTimeline);
|
||||
addFilesToView(fileStatuses);
|
||||
}
|
||||
@@ -104,44 +96,44 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
|
||||
* @deprecated
|
||||
*/
|
||||
private void readObject(java.io.ObjectInputStream in)
|
||||
throws IOException, ClassNotFoundException {
|
||||
throws IOException, ClassNotFoundException {
|
||||
in.defaultReadObject();
|
||||
this.fs = FSUtils.getFs();
|
||||
}
|
||||
|
||||
private void writeObject(java.io.ObjectOutputStream out)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
out.defaultWriteObject();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the provided statuses into the file system view, and also caches it inside this object.
|
||||
*
|
||||
* @param statuses
|
||||
* @return
|
||||
*/
|
||||
private List<HoodieFileGroup> addFilesToView(FileStatus[] statuses) {
|
||||
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(statuses)
|
||||
.collect(Collectors.groupingBy((dataFile) -> {
|
||||
String partitionPathStr = FSUtils.getRelativePartitionPath(
|
||||
new Path(metaClient.getBasePath()),
|
||||
dataFile.getFileStatus().getPath().getParent());
|
||||
return Pair.of(partitionPathStr , dataFile.getFileId());
|
||||
}));
|
||||
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(statuses)
|
||||
.collect(Collectors.groupingBy((logFile) -> {
|
||||
String partitionPathStr = FSUtils.getRelativePartitionPath(
|
||||
new Path(metaClient.getBasePath()),
|
||||
logFile.getPath().getParent());
|
||||
return Pair.of(partitionPathStr , logFile.getFileId());
|
||||
}));
|
||||
Map<Pair<String, String>, List<HoodieDataFile>> dataFiles = convertFileStatusesToDataFiles(
|
||||
statuses)
|
||||
.collect(Collectors.groupingBy((dataFile) -> {
|
||||
String partitionPathStr = FSUtils.getRelativePartitionPath(
|
||||
new Path(metaClient.getBasePath()),
|
||||
dataFile.getFileStatus().getPath().getParent());
|
||||
return Pair.of(partitionPathStr, dataFile.getFileId());
|
||||
}));
|
||||
Map<Pair<String, String>, List<HoodieLogFile>> logFiles = convertFileStatusesToLogFiles(
|
||||
statuses)
|
||||
.collect(Collectors.groupingBy((logFile) -> {
|
||||
String partitionPathStr = FSUtils.getRelativePartitionPath(
|
||||
new Path(metaClient.getBasePath()),
|
||||
logFile.getPath().getParent());
|
||||
return Pair.of(partitionPathStr, logFile.getFileId());
|
||||
}));
|
||||
|
||||
Set<Pair<String, String>> fileIdSet = new HashSet<>(dataFiles.keySet());
|
||||
fileIdSet.addAll(logFiles.keySet());
|
||||
|
||||
List<HoodieFileGroup> fileGroups = new ArrayList<>();
|
||||
fileIdSet.forEach(pair -> {
|
||||
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(), visibleActiveTimeline);
|
||||
HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), pair.getValue(),
|
||||
visibleActiveTimeline);
|
||||
if (dataFiles.containsKey(pair)) {
|
||||
dataFiles.get(pair).forEach(dataFile -> group.addDataFile(dataFile));
|
||||
}
|
||||
@@ -165,90 +157,93 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
|
||||
|
||||
private Stream<HoodieDataFile> convertFileStatusesToDataFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> roFilePredicate = fileStatus ->
|
||||
fileStatus.getPath().getName().contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getROFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(roFilePredicate).map(HoodieDataFile::new);
|
||||
}
|
||||
|
||||
private Stream<HoodieLogFile> convertFileStatusesToLogFiles(FileStatus[] statuses) {
|
||||
Predicate<FileStatus> rtFilePredicate = fileStatus ->
|
||||
fileStatus.getPath().getName().contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
fileStatus.getPath().getName()
|
||||
.contains(metaClient.getTableConfig().getRTFileFormat().getFileExtension());
|
||||
return Arrays.stream(statuses).filter(rtFilePredicate).map(HoodieLogFile::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestDataFile())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestDataFile())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFiles() {
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> fileGroup.getLatestDataFile())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestDataFile())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime) {
|
||||
String maxCommitTime) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestDataFileBeforeOrOn(maxCommitTime))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) {
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> fileGroup.getLatestDataFileInRange(commitsToReturn))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<HoodieDataFile> getAllDataFiles(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles())
|
||||
.flatMap(dataFileList -> dataFileList);
|
||||
.map(fileGroup -> fileGroup.getAllDataFiles())
|
||||
.flatMap(dataFileList -> dataFileList);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSlices(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestFileSlice())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestFileSlice())
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath, String maxCommitTime) {
|
||||
public Stream<FileSlice> getLatestFileSlicesBeforeOrOn(String partitionPath,
|
||||
String maxCommitTime) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceBeforeOrOn(maxCommitTime))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getLatestFileSliceInRange(List<String> commitsToReturn) {
|
||||
return fileGroupMap.values().stream()
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
.map(fileGroup -> fileGroup.getLatestFileSliceInRange(commitsToReturn))
|
||||
.filter(dataFileOpt -> dataFileOpt.isPresent())
|
||||
.map(Optional::get);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<FileSlice> getAllFileSlices(String partitionPath) {
|
||||
return getAllFileGroups(partitionPath)
|
||||
.map(group -> group.getAllFileSlices())
|
||||
.flatMap(sliceList -> sliceList);
|
||||
.map(group -> group.getAllFileSlices())
|
||||
.flatMap(sliceList -> sliceList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a partition path, obtain all filegroups within that. All methods, that work at the partition level
|
||||
* go through this.
|
||||
* Given a partition path, obtain all filegroups within that. All methods, that work at the
|
||||
* partition level go through this.
|
||||
*/
|
||||
@Override
|
||||
public Stream<HoodieFileGroup> getAllFileGroups(String partitionPathStr) {
|
||||
@@ -266,7 +261,7 @@ public class HoodieTableFileSystemView implements TableFileSystemView, TableFile
|
||||
return fileGroups.stream();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Failed to list data files in partition " + partitionPathStr, e);
|
||||
"Failed to list data files in partition " + partitionPathStr, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,6 @@ package com.uber.hoodie.common.util;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import com.uber.hoodie.avro.model.HoodieCleanMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieCleanPartitionMetadata;
|
||||
import com.uber.hoodie.avro.model.HoodieRollbackMetadata;
|
||||
@@ -32,7 +31,11 @@ import com.uber.hoodie.common.model.HoodieAvroPayload;
|
||||
import com.uber.hoodie.common.model.HoodieKey;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.file.DataFileReader;
|
||||
import org.apache.avro.file.DataFileWriter;
|
||||
@@ -50,146 +53,140 @@ import org.apache.avro.specific.SpecificRecordBase;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class AvroUtils {
|
||||
|
||||
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
|
||||
List<String> deltaFilePaths, Schema expectedSchema) {
|
||||
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
|
||||
deltaFilePaths.forEach(s -> {
|
||||
List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
|
||||
loadedRecords.addAll(records);
|
||||
});
|
||||
return loadedRecords;
|
||||
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFiles(FileSystem fs,
|
||||
List<String> deltaFilePaths, Schema expectedSchema) {
|
||||
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
|
||||
deltaFilePaths.forEach(s -> {
|
||||
List<HoodieRecord<HoodieAvroPayload>> records = loadFromFile(fs, s, expectedSchema);
|
||||
loadedRecords.addAll(records);
|
||||
});
|
||||
return loadedRecords;
|
||||
}
|
||||
|
||||
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
|
||||
String deltaFilePath, Schema expectedSchema) {
|
||||
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
|
||||
Path path = new Path(deltaFilePath);
|
||||
try {
|
||||
SeekableInput input = new FsInput(path, fs.getConf());
|
||||
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
|
||||
// Set the expected schema to be the current schema to account for schema evolution
|
||||
reader.setExpected(expectedSchema);
|
||||
|
||||
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
|
||||
for (GenericRecord deltaRecord : fileReader) {
|
||||
String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String partitionPath =
|
||||
deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
|
||||
new HoodieAvroPayload(Optional.of(deltaRecord))));
|
||||
}
|
||||
fileReader.close(); // also closes underlying FsInput
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
|
||||
e);
|
||||
}
|
||||
return loadedRecords;
|
||||
}
|
||||
|
||||
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs,
|
||||
String deltaFilePath, Schema expectedSchema) {
|
||||
List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList();
|
||||
Path path = new Path(deltaFilePath);
|
||||
try {
|
||||
SeekableInput input = new FsInput(path, fs.getConf());
|
||||
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>();
|
||||
// Set the expected schema to be the current schema to account for schema evolution
|
||||
reader.setExpected(expectedSchema);
|
||||
|
||||
FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);
|
||||
for (GenericRecord deltaRecord : fileReader) {
|
||||
String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
|
||||
String partitionPath =
|
||||
deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
|
||||
loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath),
|
||||
new HoodieAvroPayload(Optional.of(deltaRecord))));
|
||||
}
|
||||
fileReader.close(); // also closes underlying FsInput
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not read avro records from path " + deltaFilePath,
|
||||
e);
|
||||
}
|
||||
return loadedRecords;
|
||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
|
||||
Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
|
||||
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
|
||||
int totalDeleted = 0;
|
||||
String earliestCommitToRetain = null;
|
||||
for (HoodieCleanStat stat : cleanStats) {
|
||||
HoodieCleanPartitionMetadata metadata =
|
||||
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
|
||||
stat.getDeletePathPatterns());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
if (earliestCommitToRetain == null) {
|
||||
// This will be the same for all partitions
|
||||
earliestCommitToRetain = stat.getEarliestCommitToRetain();
|
||||
}
|
||||
}
|
||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieCleanMetadata convertCleanMetadata(String startCleanTime,
|
||||
Optional<Long> durationInMs, List<HoodieCleanStat> cleanStats) {
|
||||
ImmutableMap.Builder<String, HoodieCleanPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieCleanPartitionMetadata>builder();
|
||||
int totalDeleted = 0;
|
||||
String earliestCommitToRetain = null;
|
||||
for (HoodieCleanStat stat : cleanStats) {
|
||||
HoodieCleanPartitionMetadata metadata =
|
||||
new HoodieCleanPartitionMetadata(stat.getPartitionPath(), stat.getPolicy().name(),
|
||||
stat.getDeletePathPatterns(), stat.getSuccessDeleteFiles(),
|
||||
stat.getDeletePathPatterns());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
if (earliestCommitToRetain == null) {
|
||||
// This will be the same for all partitions
|
||||
earliestCommitToRetain = stat.getEarliestCommitToRetain();
|
||||
}
|
||||
}
|
||||
return new HoodieCleanMetadata(startCleanTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, earliestCommitToRetain, partitionMetadataBuilder.build());
|
||||
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
|
||||
Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
|
||||
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
|
||||
int totalDeleted = 0;
|
||||
for (HoodieRollbackStat stat : stats) {
|
||||
HoodieRollbackPartitionMetadata metadata =
|
||||
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
|
||||
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
}
|
||||
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, commits, partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbackTime,
|
||||
Optional<Long> durationInMs, List<String> commits, List<HoodieRollbackStat> stats) {
|
||||
ImmutableMap.Builder<String, HoodieRollbackPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieRollbackPartitionMetadata>builder();
|
||||
int totalDeleted = 0;
|
||||
for (HoodieRollbackStat stat : stats) {
|
||||
HoodieRollbackPartitionMetadata metadata =
|
||||
new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
|
||||
stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles());
|
||||
partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
|
||||
totalDeleted += stat.getSuccessDeleteFiles().size();
|
||||
}
|
||||
return new HoodieRollbackMetadata(startRollbackTime, durationInMs.orElseGet(() -> -1L),
|
||||
totalDeleted, commits, partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
|
||||
Map<String, List<String>> latestFiles) {
|
||||
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
|
||||
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
|
||||
HoodieSavepointPartitionMetadata metadata =
|
||||
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
|
||||
partitionMetadataBuilder.put(stat.getKey(), metadata);
|
||||
}
|
||||
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
|
||||
partitionMetadataBuilder.build());
|
||||
public static HoodieSavepointMetadata convertSavepointMetadata(String user, String comment,
|
||||
Map<String, List<String>> latestFiles) {
|
||||
ImmutableMap.Builder<String, HoodieSavepointPartitionMetadata> partitionMetadataBuilder =
|
||||
ImmutableMap.<String, HoodieSavepointPartitionMetadata>builder();
|
||||
for (Map.Entry<String, List<String>> stat : latestFiles.entrySet()) {
|
||||
HoodieSavepointPartitionMetadata metadata =
|
||||
new HoodieSavepointPartitionMetadata(stat.getKey(), stat.getValue());
|
||||
partitionMetadataBuilder.put(stat.getKey(), metadata);
|
||||
}
|
||||
return new HoodieSavepointMetadata(user, System.currentTimeMillis(), comment,
|
||||
partitionMetadataBuilder.build());
|
||||
}
|
||||
|
||||
|
||||
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
|
||||
throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
|
||||
}
|
||||
public static Optional<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata)
|
||||
throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
|
||||
}
|
||||
|
||||
public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
|
||||
throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
|
||||
}
|
||||
public static Optional<byte[]> serializeSavepointMetadata(HoodieSavepointMetadata metadata)
|
||||
throws IOException {
|
||||
return serializeAvroMetadata(metadata, HoodieSavepointMetadata.class);
|
||||
}
|
||||
|
||||
public static Optional<byte[]> serializeRollbackMetadata(
|
||||
HoodieRollbackMetadata rollbackMetadata) throws IOException {
|
||||
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
|
||||
}
|
||||
public static Optional<byte[]> serializeRollbackMetadata(
|
||||
HoodieRollbackMetadata rollbackMetadata) throws IOException {
|
||||
return serializeAvroMetadata(rollbackMetadata, HoodieRollbackMetadata.class);
|
||||
}
|
||||
|
||||
public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
|
||||
Class<T> clazz) throws IOException {
|
||||
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
|
||||
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
fileWriter.create(metadata.getSchema(), baos);
|
||||
fileWriter.append(metadata);
|
||||
fileWriter.flush();
|
||||
return Optional.of(baos.toByteArray());
|
||||
}
|
||||
public static <T extends SpecificRecordBase> Optional<byte[]> serializeAvroMetadata(T metadata,
|
||||
Class<T> clazz) throws IOException {
|
||||
DatumWriter<T> datumWriter = new SpecificDatumWriter<>(clazz);
|
||||
DataFileWriter<T> fileWriter = new DataFileWriter<>(datumWriter);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
fileWriter.create(metadata.getSchema(), baos);
|
||||
fileWriter.append(metadata);
|
||||
fileWriter.flush();
|
||||
return Optional.of(baos.toByteArray());
|
||||
}
|
||||
|
||||
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
|
||||
}
|
||||
public static HoodieCleanMetadata deserializeHoodieCleanMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieCleanMetadata.class);
|
||||
}
|
||||
|
||||
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
||||
}
|
||||
public static HoodieSavepointMetadata deserializeHoodieSavepointMetadata(byte[] bytes)
|
||||
throws IOException {
|
||||
return deserializeAvroMetadata(bytes, HoodieSavepointMetadata.class);
|
||||
}
|
||||
|
||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
|
||||
Class<T> clazz) throws IOException {
|
||||
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
||||
FileReader<T> fileReader =
|
||||
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
|
||||
Preconditions
|
||||
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
|
||||
return fileReader.next();
|
||||
}
|
||||
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes,
|
||||
Class<T> clazz) throws IOException {
|
||||
DatumReader<T> reader = new SpecificDatumReader<>(clazz);
|
||||
FileReader<T> fileReader =
|
||||
DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
|
||||
Preconditions
|
||||
.checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz);
|
||||
return fileReader.next();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -23,16 +23,6 @@ import com.uber.hoodie.common.model.HoodiePartitionMetadata;
|
||||
import com.uber.hoodie.common.table.timeline.HoodieInstant;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.exception.InvalidHoodiePathException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@@ -43,319 +33,339 @@ import java.util.Optional;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Utility functions related to accessing the file storage
|
||||
*/
|
||||
public class FSUtils {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
|
||||
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
|
||||
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
|
||||
private static final String LOG_FILE_PREFIX = ".";
|
||||
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
|
||||
private static final long MIN_CLEAN_TO_KEEP = 10;
|
||||
private static final long MIN_ROLLBACK_TO_KEEP = 10;
|
||||
private static FileSystem fs;
|
||||
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
|
||||
// Log files are of this pattern - .b5068208-e1a4-11e6-bf01-fe55135034f3_20170101134598.log.1
|
||||
private static final Pattern LOG_FILE_PATTERN = Pattern.compile("\\.(.*)_(.*)\\.(.*)\\.([0-9]*)");
|
||||
private static final String LOG_FILE_PREFIX = ".";
|
||||
private static final int MAX_ATTEMPTS_RECOVER_LEASE = 10;
|
||||
private static final long MIN_CLEAN_TO_KEEP = 10;
|
||||
private static final long MIN_ROLLBACK_TO_KEEP = 10;
|
||||
private static FileSystem fs;
|
||||
|
||||
/**
|
||||
* Only to be used for testing.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public static void setFs(FileSystem fs) {
|
||||
FSUtils.fs = fs;
|
||||
/**
|
||||
* Only to be used for testing.
|
||||
*/
|
||||
@VisibleForTesting
|
||||
public static void setFs(FileSystem fs) {
|
||||
FSUtils.fs = fs;
|
||||
}
|
||||
|
||||
|
||||
public static FileSystem getFs() {
|
||||
if (fs != null) {
|
||||
return fs;
|
||||
}
|
||||
|
||||
|
||||
public static FileSystem getFs() {
|
||||
if (fs != null) {
|
||||
return fs;
|
||||
}
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||
FileSystem fs;
|
||||
try {
|
||||
fs = FileSystem.get(conf);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
|
||||
e);
|
||||
}
|
||||
LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
|
||||
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
|
||||
|
||||
return fs;
|
||||
Configuration conf = new Configuration();
|
||||
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||
FileSystem fs;
|
||||
try {
|
||||
fs = FileSystem.get(conf);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
|
||||
e);
|
||||
}
|
||||
LOG.info(
|
||||
String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
|
||||
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
|
||||
|
||||
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
|
||||
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
|
||||
return fs;
|
||||
}
|
||||
|
||||
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
|
||||
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
|
||||
}
|
||||
|
||||
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
|
||||
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
|
||||
}
|
||||
|
||||
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
|
||||
return String.format("%s_*_%s.parquet", fileId, commitTime);
|
||||
}
|
||||
|
||||
public static String maskWithOnlyCommitTime(String commitTime) {
|
||||
return String.format("*_*_%s.parquet", commitTime);
|
||||
}
|
||||
|
||||
public static String getCommitFromCommitFile(String commitFileName) {
|
||||
return commitFileName.split("\\.")[0];
|
||||
}
|
||||
|
||||
public static String getCommitTime(String fullFileName) {
|
||||
return fullFileName.split("_")[2].split("\\.")[0];
|
||||
}
|
||||
|
||||
public static long getFileSize(FileSystem fs, Path path) throws IOException {
|
||||
return fs.getFileStatus(path).getLen();
|
||||
}
|
||||
|
||||
public static String getFileId(String fullFileName) {
|
||||
return fullFileName.split("_")[0];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
|
||||
*/
|
||||
public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath)
|
||||
throws IOException {
|
||||
List<String> datePartitions = new ArrayList<>();
|
||||
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
|
||||
for (FileStatus status : folders) {
|
||||
Path path = status.getPath();
|
||||
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
|
||||
path.getParent().getName(), path.getName()));
|
||||
}
|
||||
return datePartitions;
|
||||
}
|
||||
|
||||
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
|
||||
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
|
||||
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
|
||||
String partitionFullPath = partitionPath.toString();
|
||||
int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
|
||||
return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
|
||||
* com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
|
||||
*/
|
||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
|
||||
throws IOException {
|
||||
List<String> partitions = new ArrayList<>();
|
||||
Path basePath = new Path(basePathStr);
|
||||
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
|
||||
while (allFiles.hasNext()) {
|
||||
Path filePath = allFiles.next().getPath();
|
||||
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
|
||||
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
|
||||
}
|
||||
}
|
||||
return partitions;
|
||||
}
|
||||
|
||||
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
|
||||
return String.format("%s_*_%s.parquet", fileId, commitTime);
|
||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr,
|
||||
boolean assumeDatePartitioning)
|
||||
throws IOException {
|
||||
if (assumeDatePartitioning) {
|
||||
return getAllFoldersThreeLevelsDown(fs, basePathStr);
|
||||
} else {
|
||||
return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
|
||||
}
|
||||
}
|
||||
|
||||
public static String maskWithOnlyCommitTime(String commitTime) {
|
||||
return String.format("*_*_%s.parquet", commitTime);
|
||||
public static String getFileExtension(String fullName) {
|
||||
Preconditions.checkNotNull(fullName);
|
||||
String fileName = (new File(fullName)).getName();
|
||||
int dotIndex = fileName.indexOf('.');
|
||||
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
|
||||
}
|
||||
|
||||
public static String getInstantTime(String name) {
|
||||
return name.replace(getFileExtension(name), "");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the file extension from the log file
|
||||
*/
|
||||
public static String getFileExtensionFromLog(Path logPath) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(logPath, "LogFile");
|
||||
}
|
||||
return matcher.group(3);
|
||||
}
|
||||
|
||||
public static String getCommitFromCommitFile(String commitFileName) {
|
||||
return commitFileName.split("\\.")[0];
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
*/
|
||||
public static String getFileIdFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(path, "LogFile");
|
||||
}
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
public static String getCommitTime(String fullFileName) {
|
||||
return fullFileName.split("_")[2].split("\\.")[0];
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
*/
|
||||
public static String getBaseCommitTimeFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(path, "LogFile");
|
||||
}
|
||||
return matcher.group(2);
|
||||
}
|
||||
|
||||
public static long getFileSize(FileSystem fs, Path path) throws IOException {
|
||||
return fs.getFileStatus(path).getLen();
|
||||
/**
|
||||
* Get the last part of the file name in the log file and convert to int.
|
||||
*/
|
||||
public static int getFileVersionFromLog(Path logPath) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(logPath, "LogFile");
|
||||
}
|
||||
return Integer.parseInt(matcher.group(4));
|
||||
}
|
||||
|
||||
public static String getFileId(String fullFileName) {
|
||||
return fullFileName.split("_")[0];
|
||||
public static String makeLogFileName(String fileId, String logFileExtension,
|
||||
String baseCommitTime, int version) {
|
||||
return LOG_FILE_PREFIX + String
|
||||
.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
|
||||
}
|
||||
|
||||
public static String maskWithoutLogVersion(String commitTime, String fileId,
|
||||
String logFileExtension) {
|
||||
return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the latest log file written from the list of log files passed in
|
||||
*/
|
||||
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
|
||||
return logFiles.sorted(Comparator
|
||||
.comparing(s -> s.getLogVersion(),
|
||||
Comparator.reverseOrder())).findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the log files for the passed in FileId in the partition path
|
||||
*/
|
||||
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||
throws IOException {
|
||||
return Arrays.stream(fs.listStatus(partitionPath,
|
||||
path -> path.getName().startsWith("." + fileId) && path.getName()
|
||||
.contains(logFileExtension)))
|
||||
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest log version for the fileId in the partition path
|
||||
*/
|
||||
public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||
throws IOException {
|
||||
Optional<HoodieLogFile> latestLogFile =
|
||||
getLatestLogFile(
|
||||
getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
||||
if (latestLogFile.isPresent()) {
|
||||
return Optional.of(latestLogFile.get().getLogVersion());
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime)
|
||||
throws IOException {
|
||||
Optional<Integer> currentVersion =
|
||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||
// handle potential overflow
|
||||
return (currentVersion.isPresent()) ? currentVersion.get() : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all partition paths assuming date partitioning (year, month, day) three levels down.
|
||||
*/
|
||||
public static List<String> getAllFoldersThreeLevelsDown(FileSystem fs, String basePath) throws IOException {
|
||||
List<String> datePartitions = new ArrayList<>();
|
||||
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
|
||||
for (FileStatus status : folders) {
|
||||
Path path = status.getPath();
|
||||
datePartitions.add(String.format("%s/%s/%s", path.getParent().getParent().getName(),
|
||||
path.getParent().getName(), path.getName()));
|
||||
}
|
||||
return datePartitions;
|
||||
/**
|
||||
* computes the next log version for the specified fileId in the partition path
|
||||
*/
|
||||
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
|
||||
final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Optional<Integer> currentVersion =
|
||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||
// handle potential overflow
|
||||
return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
|
||||
}
|
||||
|
||||
public static int getDefaultBufferSize(final FileSystem fs) {
|
||||
return fs.getConf().getInt("io.file.buffer.size", 4096);
|
||||
}
|
||||
|
||||
public static Short getDefaultReplication(FileSystem fs, Path path) {
|
||||
return fs.getDefaultReplication(path);
|
||||
}
|
||||
|
||||
public static Long getDefaultBlockSize(FileSystem fs, Path path) {
|
||||
return fs.getDefaultBlockSize(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* When a file was opened and the task died without closing the stream, another task executor
|
||||
* cannot open because the existing lease will be active. We will try to recover the lease, from
|
||||
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But if
|
||||
* the client dies, this should be instant.
|
||||
*/
|
||||
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
|
||||
throws IOException, InterruptedException {
|
||||
LOG.info("Recover lease on dfs file " + p);
|
||||
// initiate the recovery
|
||||
boolean recovered = false;
|
||||
for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
|
||||
LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
|
||||
recovered = dfs.recoverLease(p);
|
||||
if (recovered) {
|
||||
break;
|
||||
}
|
||||
// Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
return recovered;
|
||||
|
||||
public static String getRelativePartitionPath(Path basePath, Path partitionPath) {
|
||||
String partitionFullPath = partitionPath.toString();
|
||||
int partitionStartIndex = partitionFullPath.lastIndexOf(basePath.getName());
|
||||
return partitionFullPath.substring(partitionStartIndex + basePath.getName().length() + 1);
|
||||
}
|
||||
|
||||
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_CLEAN_TO_KEEP and delete rest
|
||||
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
|
||||
e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_ROLLBACK_TO_KEEP and delete rest
|
||||
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not delete rollback meta files " + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
|
||||
if (!fs.exists(partitionPath)) {
|
||||
fs.mkdirs(partitionPath);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Obtain all the partition paths, that are present in this table, denoted by presence of {@link
|
||||
* com.uber.hoodie.common.model.HoodiePartitionMetadata#HOODIE_PARTITION_METAFILE}
|
||||
*/
|
||||
public static List<String> getAllFoldersWithPartitionMetaFile(FileSystem fs, String basePathStr)
|
||||
throws IOException {
|
||||
List<String> partitions = new ArrayList<>();
|
||||
Path basePath = new Path(basePathStr);
|
||||
RemoteIterator<LocatedFileStatus> allFiles = fs.listFiles(new Path(basePathStr), true);
|
||||
while (allFiles.hasNext()) {
|
||||
Path filePath = allFiles.next().getPath();
|
||||
if (filePath.getName().equals(HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE)) {
|
||||
partitions.add(getRelativePartitionPath(basePath, filePath.getParent()));
|
||||
}
|
||||
}
|
||||
return partitions;
|
||||
}
|
||||
|
||||
public static List<String> getAllPartitionPaths(FileSystem fs, String basePathStr, boolean assumeDatePartitioning)
|
||||
throws IOException {
|
||||
if (assumeDatePartitioning) {
|
||||
return getAllFoldersThreeLevelsDown(fs, basePathStr);
|
||||
} else {
|
||||
return getAllFoldersWithPartitionMetaFile(fs, basePathStr);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getFileExtension(String fullName) {
|
||||
Preconditions.checkNotNull(fullName);
|
||||
String fileName = (new File(fullName)).getName();
|
||||
int dotIndex = fileName.indexOf('.');
|
||||
return dotIndex == -1 ? "" : fileName.substring(dotIndex);
|
||||
}
|
||||
|
||||
public static String getInstantTime(String name) {
|
||||
return name.replace(getFileExtension(name), "");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the file extension from the log file
|
||||
*/
|
||||
public static String getFileExtensionFromLog(Path logPath) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(logPath, "LogFile");
|
||||
}
|
||||
return matcher.group(3);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
*/
|
||||
public static String getFileIdFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(path, "LogFile");
|
||||
}
|
||||
return matcher.group(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the first part of the file name in the log file. That will be the fileId. Log file do not
|
||||
* have commitTime in the file name.
|
||||
*/
|
||||
public static String getBaseCommitTimeFromLogPath(Path path) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(path.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(path, "LogFile");
|
||||
}
|
||||
return matcher.group(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the last part of the file name in the log file and convert to int.
|
||||
*/
|
||||
public static int getFileVersionFromLog(Path logPath) {
|
||||
Matcher matcher = LOG_FILE_PATTERN.matcher(logPath.getName());
|
||||
if (!matcher.find()) {
|
||||
throw new InvalidHoodiePathException(logPath, "LogFile");
|
||||
}
|
||||
return Integer.parseInt(matcher.group(4));
|
||||
}
|
||||
|
||||
public static String makeLogFileName(String fileId, String logFileExtension,
|
||||
String baseCommitTime, int version) {
|
||||
return LOG_FILE_PREFIX + String.format("%s_%s%s.%d", fileId, baseCommitTime, logFileExtension, version);
|
||||
}
|
||||
|
||||
public static String maskWithoutLogVersion(String commitTime, String fileId, String logFileExtension) {
|
||||
return LOG_FILE_PREFIX + String.format("%s_%s%s*", fileId, commitTime, logFileExtension);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the latest log file written from the list of log files passed in
|
||||
*/
|
||||
public static Optional<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
|
||||
return logFiles.sorted(Comparator
|
||||
.comparing(s -> s.getLogVersion(),
|
||||
Comparator.reverseOrder())).findFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the log files for the passed in FileId in the partition path
|
||||
*/
|
||||
public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
return Arrays.stream(fs.listStatus(partitionPath,
|
||||
path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
|
||||
.map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest log version for the fileId in the partition path
|
||||
*/
|
||||
public static Optional<Integer> getLatestLogVersion(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Optional<HoodieLogFile> latestLogFile =
|
||||
getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
|
||||
if (latestLogFile.isPresent()) {
|
||||
return Optional.of(latestLogFile.get().getLogVersion());
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
public static int getCurrentLogVersion(FileSystem fs, Path partitionPath,
|
||||
final String fileId, final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Optional<Integer> currentVersion =
|
||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||
// handle potential overflow
|
||||
return (currentVersion.isPresent()) ? currentVersion.get() : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* computes the next log version for the specified fileId in the partition path
|
||||
*/
|
||||
public static int computeNextLogVersion(FileSystem fs, Path partitionPath, final String fileId,
|
||||
final String logFileExtension, final String baseCommitTime) throws IOException {
|
||||
Optional<Integer> currentVersion =
|
||||
getLatestLogVersion(fs, partitionPath, fileId, logFileExtension, baseCommitTime);
|
||||
// handle potential overflow
|
||||
return (currentVersion.isPresent()) ? currentVersion.get() + 1 : 1;
|
||||
}
|
||||
|
||||
public static int getDefaultBufferSize(final FileSystem fs) {
|
||||
return fs.getConf().getInt("io.file.buffer.size", 4096);
|
||||
}
|
||||
|
||||
public static Short getDefaultReplication(FileSystem fs, Path path) {
|
||||
return fs.getDefaultReplication(path);
|
||||
}
|
||||
|
||||
public static Long getDefaultBlockSize(FileSystem fs, Path path) {
|
||||
return fs.getDefaultBlockSize(path);
|
||||
}
|
||||
|
||||
/**
|
||||
* When a file was opened and the task died without closing the stream, another task executor
|
||||
* cannot open because the existing lease will be active. We will try to recover the lease, from
|
||||
* HDFS. If a data node went down, it takes about 10 minutes for the lease to be rocovered. But
|
||||
* if the client dies, this should be instant.
|
||||
*/
|
||||
public static boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p)
|
||||
throws IOException, InterruptedException {
|
||||
LOG.info("Recover lease on dfs file " + p);
|
||||
// initiate the recovery
|
||||
boolean recovered = false;
|
||||
for (int nbAttempt = 0; nbAttempt < MAX_ATTEMPTS_RECOVER_LEASE; nbAttempt++) {
|
||||
LOG.info("Attempt " + nbAttempt + " to recover lease on dfs file " + p);
|
||||
recovered = dfs.recoverLease(p);
|
||||
if (recovered)
|
||||
break;
|
||||
// Sleep for 1 second before trying again. Typically it takes about 2-3 seconds to recover under default settings
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
return recovered;
|
||||
|
||||
}
|
||||
|
||||
public static void deleteOlderCleanMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_CLEAN_TO_KEEP and delete rest
|
||||
instants.skip(MIN_CLEAN_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not delete clean meta files" + s.getFileName(),
|
||||
e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void deleteOlderRollbackMetaFiles(FileSystem fs, String metaPath,
|
||||
Stream<HoodieInstant> instants) {
|
||||
//TODO - this should be archived when archival is made general for all meta-data
|
||||
// skip MIN_ROLLBACK_TO_KEEP and delete rest
|
||||
instants.skip(MIN_ROLLBACK_TO_KEEP).map(s -> {
|
||||
try {
|
||||
return fs.delete(new Path(metaPath, s.getFileName()), false);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not delete rollback meta files " + s.getFileName(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static void createPathIfNotExists(FileSystem fs, Path partitionPath) throws IOException {
|
||||
if(!fs.exists(partitionPath)) {
|
||||
fs.mkdirs(partitionPath);
|
||||
}
|
||||
}
|
||||
|
||||
public static Long getSizeInMB(long sizeInBytes) {
|
||||
return sizeInBytes / (1024 * 1024);
|
||||
}
|
||||
public static Long getSizeInMB(long sizeInBytes) {
|
||||
return sizeInBytes / (1024 * 1024);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,156 +17,167 @@
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.exception.SchemaCompatabilityException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.zip.DeflaterOutputStream;
|
||||
import java.util.zip.InflaterInputStream;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.*;
|
||||
import org.apache.avro.generic.GenericData;
|
||||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.BinaryEncoder;
|
||||
import org.apache.avro.io.Decoder;
|
||||
import org.apache.avro.io.DecoderFactory;
|
||||
import org.apache.avro.io.EncoderFactory;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Helper class to do common stuff across Avro.
|
||||
*/
|
||||
public class HoodieAvroUtils {
|
||||
|
||||
// All metadata fields are optional strings.
|
||||
private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
|
||||
Schema.create(Schema.Type.NULL),
|
||||
Schema.create(Schema.Type.STRING)));
|
||||
// All metadata fields are optional strings.
|
||||
private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
|
||||
Schema.create(Schema.Type.NULL),
|
||||
Schema.create(Schema.Type.STRING)));
|
||||
|
||||
private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
|
||||
private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
|
||||
|
||||
/**
|
||||
* Convert a given avro record to bytes
|
||||
*/
|
||||
public static byte[] avroToBytes(GenericRecord record) throws IOException {
|
||||
GenericDatumWriter<GenericRecord> writer =
|
||||
new GenericDatumWriter<>(record.getSchema());
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
|
||||
writer.write(record, encoder);
|
||||
encoder.flush();
|
||||
out.close();
|
||||
return out.toByteArray();
|
||||
/**
|
||||
* Convert a given avro record to bytes
|
||||
*/
|
||||
public static byte[] avroToBytes(GenericRecord record) throws IOException {
|
||||
GenericDatumWriter<GenericRecord> writer =
|
||||
new GenericDatumWriter<>(record.getSchema());
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
|
||||
writer.write(record, encoder);
|
||||
encoder.flush();
|
||||
out.close();
|
||||
return out.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert serialized bytes back into avro record
|
||||
*/
|
||||
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
|
||||
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
|
||||
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
|
||||
return reader.read(null, decoder);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the Hoodie metadata fields to the given schema
|
||||
*/
|
||||
public static Schema addMetadataFields(Schema schema) {
|
||||
List<Schema.Field> parentFields = new ArrayList<>();
|
||||
|
||||
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
|
||||
parentFields.add(commitTimeField);
|
||||
parentFields.add(commitSeqnoField);
|
||||
parentFields.add(recordKeyField);
|
||||
parentFields.add(partitionPathField);
|
||||
parentFields.add(fileNameField);
|
||||
for (Schema.Field field : schema.getFields()) {
|
||||
parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert serialized bytes back into avro record
|
||||
*/
|
||||
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
|
||||
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
|
||||
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
|
||||
return reader.read(null, decoder);
|
||||
Schema mergedSchema = Schema
|
||||
.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
|
||||
mergedSchema.setFields(parentFields);
|
||||
return mergedSchema;
|
||||
}
|
||||
|
||||
private static Schema initRecordKeySchema() {
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD,
|
||||
METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
|
||||
recordKeySchema.setFields(Arrays.asList(recordKeyField));
|
||||
return recordKeySchema;
|
||||
}
|
||||
|
||||
public static Schema getRecordKeySchema() {
|
||||
return RECORD_KEY_SCHEMA;
|
||||
}
|
||||
|
||||
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey,
|
||||
String partitionPath, String fileName) {
|
||||
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
|
||||
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
|
||||
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the Hoodie commit metadata into the provided Generic Record.
|
||||
*/
|
||||
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime,
|
||||
String commitSeqno) {
|
||||
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
||||
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
|
||||
return record;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a avro record with a given schema, rewrites it into the new schema
|
||||
*/
|
||||
public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
|
||||
GenericRecord newRecord = new GenericData.Record(newSchema);
|
||||
for (Schema.Field f : record.getSchema().getFields()) {
|
||||
newRecord.put(f.name(), record.get(f.name()));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the Hoodie metadata fields to the given schema
|
||||
*/
|
||||
public static Schema addMetadataFields(Schema schema) {
|
||||
List<Schema.Field> parentFields = new ArrayList<>();
|
||||
|
||||
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
|
||||
parentFields.add(commitTimeField);
|
||||
parentFields.add(commitSeqnoField);
|
||||
parentFields.add(recordKeyField);
|
||||
parentFields.add(partitionPathField);
|
||||
parentFields.add(fileNameField);
|
||||
for (Schema.Field field : schema.getFields()) {
|
||||
parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
|
||||
}
|
||||
|
||||
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
|
||||
mergedSchema.setFields(parentFields);
|
||||
return mergedSchema;
|
||||
if (!new GenericData().validate(newSchema, newRecord)) {
|
||||
throw new SchemaCompatabilityException(
|
||||
"Unable to validate the rewritten record " + record + " against schema "
|
||||
+ newSchema);
|
||||
}
|
||||
return newRecord;
|
||||
}
|
||||
|
||||
private static Schema initRecordKeySchema() {
|
||||
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
|
||||
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
|
||||
recordKeySchema.setFields(Arrays.asList(recordKeyField));
|
||||
return recordKeySchema;
|
||||
public static byte[] compress(String text) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try {
|
||||
OutputStream out = new DeflaterOutputStream(baos);
|
||||
out.write(text.getBytes("UTF-8"));
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("IOException while compressing text " + text, e);
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public static Schema getRecordKeySchema() {
|
||||
return RECORD_KEY_SCHEMA;
|
||||
}
|
||||
|
||||
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath, String fileName) {
|
||||
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
|
||||
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
|
||||
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the Hoodie commit metadata into the provided Generic Record.
|
||||
*/
|
||||
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
|
||||
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
|
||||
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
|
||||
return record;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a avro record with a given schema, rewrites it into the new schema
|
||||
*/
|
||||
public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema) {
|
||||
GenericRecord newRecord = new GenericData.Record(newSchema);
|
||||
for (Schema.Field f : record.getSchema().getFields()) {
|
||||
newRecord.put(f.name(), record.get(f.name()));
|
||||
}
|
||||
if (!new GenericData().validate(newSchema, newRecord)) {
|
||||
throw new SchemaCompatabilityException(
|
||||
"Unable to validate the rewritten record " + record + " against schema "
|
||||
+ newSchema);
|
||||
}
|
||||
return newRecord;
|
||||
}
|
||||
|
||||
public static byte[] compress(String text) {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try {
|
||||
OutputStream out = new DeflaterOutputStream(baos);
|
||||
out.write(text.getBytes("UTF-8"));
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("IOException while compressing text " + text, e);
|
||||
}
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public static String decompress(byte[] bytes) {
|
||||
InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try {
|
||||
byte[] buffer = new byte[8192];
|
||||
int len;
|
||||
while((len = in.read(buffer))>0)
|
||||
baos.write(buffer, 0, len);
|
||||
return new String(baos.toByteArray(), "UTF-8");
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("IOException while decompressing text", e);
|
||||
}
|
||||
public static String decompress(byte[] bytes) {
|
||||
InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try {
|
||||
byte[] buffer = new byte[8192];
|
||||
int len;
|
||||
while ((len = in.read(buffer)) > 0) {
|
||||
baos.write(buffer, 0, len);
|
||||
}
|
||||
return new String(baos.toByteArray(), "UTF-8");
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("IOException while decompressing text", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,10 +17,13 @@
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
public class NumericUtils {
|
||||
public static String humanReadableByteCount(double bytes) {
|
||||
if (bytes < 1024) return String.format("%.1f B", bytes);
|
||||
int exp = (int) (Math.log(bytes) / Math.log(1024));
|
||||
String pre = "KMGTPE".charAt(exp-1) + "";
|
||||
return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
|
||||
|
||||
public static String humanReadableByteCount(double bytes) {
|
||||
if (bytes < 1024) {
|
||||
return String.format("%.1f B", bytes);
|
||||
}
|
||||
int exp = (int) (Math.log(bytes) / Math.log(1024));
|
||||
String pre = "KMGTPE".charAt(exp - 1) + "";
|
||||
return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,14 +16,20 @@
|
||||
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import static com.uber.hoodie.common.util.FSUtils.getFs;
|
||||
|
||||
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
|
||||
import com.uber.hoodie.common.BloomFilter;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.exception.MetadataNotFoundException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@@ -36,163 +42,144 @@ import org.apache.parquet.hadoop.ParquetReader;
|
||||
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
|
||||
import org.apache.parquet.schema.MessageType;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import static com.uber.hoodie.common.util.FSUtils.getFs;
|
||||
|
||||
/**
|
||||
* Utility functions involving with parquet.
|
||||
*/
|
||||
public class ParquetUtils {
|
||||
|
||||
/**
|
||||
* Read the rowKey list from the given parquet file.
|
||||
*
|
||||
* @param filePath The parquet file path.
|
||||
*/
|
||||
public static Set<String> readRowKeysFromParquet(Path filePath) {
|
||||
Configuration conf = new Configuration();
|
||||
conf.addResource(getFs().getConf());
|
||||
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
|
||||
AvroReadSupport.setAvroReadSchema(conf, readSchema);
|
||||
AvroReadSupport.setRequestedProjection(conf, readSchema);
|
||||
ParquetReader reader = null;
|
||||
Set<String> rowKeys = new HashSet<>();
|
||||
/**
|
||||
* Read the rowKey list from the given parquet file.
|
||||
*
|
||||
* @param filePath The parquet file path.
|
||||
*/
|
||||
public static Set<String> readRowKeysFromParquet(Path filePath) {
|
||||
Configuration conf = new Configuration();
|
||||
conf.addResource(getFs().getConf());
|
||||
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
|
||||
AvroReadSupport.setAvroReadSchema(conf, readSchema);
|
||||
AvroReadSupport.setRequestedProjection(conf, readSchema);
|
||||
ParquetReader reader = null;
|
||||
Set<String> rowKeys = new HashSet<>();
|
||||
try {
|
||||
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
|
||||
Object obj = reader.read();
|
||||
while (obj != null) {
|
||||
if (obj instanceof GenericRecord) {
|
||||
rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
|
||||
}
|
||||
obj = reader.read();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
|
||||
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
|
||||
Object obj = reader.read();
|
||||
while (obj != null) {
|
||||
if (obj instanceof GenericRecord) {
|
||||
rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
|
||||
}
|
||||
obj = reader.read();
|
||||
}
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
|
||||
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
// ignore
|
||||
}
|
||||
return rowKeys;
|
||||
}
|
||||
}
|
||||
return rowKeys;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Read the metadata from a parquet file
|
||||
*
|
||||
* @param parquetFilePath
|
||||
* @return
|
||||
*/
|
||||
public static ParquetMetadata readMetadata(Path parquetFilePath) {
|
||||
return readMetadata(new Configuration(), parquetFilePath);
|
||||
/**
|
||||
* Read the metadata from a parquet file
|
||||
*/
|
||||
public static ParquetMetadata readMetadata(Path parquetFilePath) {
|
||||
return readMetadata(new Configuration(), parquetFilePath);
|
||||
}
|
||||
|
||||
public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
|
||||
ParquetMetadata footer;
|
||||
try {
|
||||
// TODO(vc): Should we use the parallel reading version here?
|
||||
footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
|
||||
e);
|
||||
}
|
||||
return footer;
|
||||
}
|
||||
|
||||
public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
|
||||
ParquetMetadata footer;
|
||||
|
||||
/**
|
||||
* Get the schema of the given parquet file.
|
||||
*/
|
||||
public static MessageType readSchema(Path parquetFilePath) {
|
||||
return readMetadata(parquetFilePath).getFileMetaData().getSchema();
|
||||
}
|
||||
|
||||
|
||||
private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
|
||||
List<String> footerVals = new ArrayList<>();
|
||||
ParquetMetadata footer = readMetadata(parquetFilePath);
|
||||
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
|
||||
for (String footerName : footerNames) {
|
||||
if (metadata.containsKey(footerName)) {
|
||||
footerVals.add(metadata.get(footerName));
|
||||
} else {
|
||||
throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
|
||||
"Looked for key " + footerName + " in " + parquetFilePath);
|
||||
}
|
||||
}
|
||||
return footerVals;
|
||||
}
|
||||
|
||||
public static Schema readAvroSchema(Path parquetFilePath) {
|
||||
return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
|
||||
}
|
||||
|
||||
/**
|
||||
* Read out the bloom filter from the parquet file meta data.
|
||||
*/
|
||||
public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
|
||||
String footerVal = readParquetFooter(parquetFilePath,
|
||||
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
|
||||
return new BloomFilter(footerVal);
|
||||
}
|
||||
|
||||
public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
|
||||
List<String> minMaxKeys = readParquetFooter(parquetFilePath,
|
||||
HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
|
||||
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
|
||||
if (minMaxKeys.size() != 2) {
|
||||
throw new HoodieException(String.format(
|
||||
"Could not read min/max record key out of footer correctly from %s. read) : %s",
|
||||
parquetFilePath, minMaxKeys));
|
||||
}
|
||||
return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: This literally reads the entire file contents, thus should be used with caution.
|
||||
*/
|
||||
public static List<GenericRecord> readAvroRecords(Path filePath) {
|
||||
ParquetReader reader = null;
|
||||
List<GenericRecord> records = new ArrayList<>();
|
||||
try {
|
||||
reader = AvroParquetReader.builder(filePath).build();
|
||||
Object obj = reader.read();
|
||||
while (obj != null) {
|
||||
if (obj instanceof GenericRecord) {
|
||||
records.add(((GenericRecord) obj));
|
||||
}
|
||||
obj = reader.read();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
|
||||
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
// TODO(vc): Should we use the parallel reading version here?
|
||||
footer = ParquetFileReader.readFooter(getFs().getConf(), parquetFilePath);
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath,
|
||||
e);
|
||||
// ignore
|
||||
}
|
||||
return footer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the schema of the given parquet file.
|
||||
*
|
||||
* @param parquetFilePath
|
||||
* @return
|
||||
*/
|
||||
public static MessageType readSchema(Path parquetFilePath) {
|
||||
return readMetadata(parquetFilePath).getFileMetaData().getSchema();
|
||||
}
|
||||
|
||||
|
||||
private static List<String> readParquetFooter(Path parquetFilePath, String... footerNames) {
|
||||
List<String> footerVals = new ArrayList<>();
|
||||
ParquetMetadata footer = readMetadata(parquetFilePath);
|
||||
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
|
||||
for (String footerName : footerNames) {
|
||||
if (metadata.containsKey(footerName)) {
|
||||
footerVals.add(metadata.get(footerName));
|
||||
} else {
|
||||
throw new MetadataNotFoundException("Could not find index in Parquet footer. " +
|
||||
"Looked for key " + footerName + " in " + parquetFilePath);
|
||||
}
|
||||
}
|
||||
return footerVals;
|
||||
}
|
||||
|
||||
public static Schema readAvroSchema(Path parquetFilePath) {
|
||||
return new AvroSchemaConverter().convert(readSchema(parquetFilePath));
|
||||
}
|
||||
|
||||
/**
|
||||
* Read out the bloom filter from the parquet file meta data.
|
||||
*/
|
||||
public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
|
||||
String footerVal = readParquetFooter(parquetFilePath,
|
||||
HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY).get(0);
|
||||
return new BloomFilter(footerVal);
|
||||
}
|
||||
|
||||
public static String[] readMinMaxRecordKeys(Path parquetFilePath) {
|
||||
List<String> minMaxKeys = readParquetFooter(parquetFilePath, HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER,
|
||||
HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
|
||||
if (minMaxKeys.size() != 2) {
|
||||
throw new HoodieException(String.format(
|
||||
"Could not read min/max record key out of footer correctly from %s. read) : %s",
|
||||
parquetFilePath, minMaxKeys));
|
||||
}
|
||||
return new String[]{minMaxKeys.get(0), minMaxKeys.get(1)};
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* NOTE: This literally reads the entire file contents, thus should be used with caution.
|
||||
*
|
||||
* @param filePath
|
||||
* @return
|
||||
*/
|
||||
public static List<GenericRecord> readAvroRecords(Path filePath) {
|
||||
ParquetReader reader = null;
|
||||
List<GenericRecord> records = new ArrayList<>();
|
||||
try {
|
||||
reader = AvroParquetReader.builder(filePath).build();
|
||||
Object obj = reader.read();
|
||||
while (obj != null) {
|
||||
if (obj instanceof GenericRecord) {
|
||||
records.add(((GenericRecord) obj));
|
||||
}
|
||||
obj = reader.read();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
|
||||
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
return records;
|
||||
}
|
||||
}
|
||||
return records;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,22 +17,18 @@
|
||||
package com.uber.hoodie.common.util;
|
||||
|
||||
import com.uber.hoodie.common.model.HoodieRecordPayload;
|
||||
|
||||
import com.uber.hoodie.exception.HoodieException;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class ReflectionUtils {
|
||||
|
||||
private static Map<String, Class<?>> clazzCache = new HashMap<>();
|
||||
|
||||
public static <T> T loadClass(String fqcn) {
|
||||
try {
|
||||
if(clazzCache.get(fqcn) == null) {
|
||||
if (clazzCache.get(fqcn) == null) {
|
||||
Class<?> clazz = Class.<HoodieRecordPayload>forName(fqcn);
|
||||
clazzCache.put(fqcn, clazz);
|
||||
}
|
||||
@@ -48,21 +44,17 @@ public class ReflectionUtils {
|
||||
|
||||
/**
|
||||
* Instantiate a given class with a generic record payload
|
||||
*
|
||||
* @param recordPayloadClass
|
||||
* @param payloadArgs
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass,
|
||||
Object [] payloadArgs,
|
||||
Class<?> ... constructorArgTypes) {
|
||||
Object[] payloadArgs,
|
||||
Class<?>... constructorArgTypes) {
|
||||
try {
|
||||
if(clazzCache.get(recordPayloadClass) == null) {
|
||||
if (clazzCache.get(recordPayloadClass) == null) {
|
||||
Class<?> clazz = Class.<HoodieRecordPayload>forName(recordPayloadClass);
|
||||
clazzCache.put(recordPayloadClass, clazz);
|
||||
}
|
||||
return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes).newInstance(payloadArgs);
|
||||
return (T) clazzCache.get(recordPayloadClass).getConstructor(constructorArgTypes)
|
||||
.newInstance(payloadArgs);
|
||||
} catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
|
||||
throw new HoodieException("Unable to instantiate payload class ", e);
|
||||
} catch (ClassNotFoundException e) {
|
||||
|
||||
@@ -16,43 +16,40 @@
|
||||
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown to indicate that a hoodie dataset was not found on the path provided
|
||||
* <p>
|
||||
* <p> Exception thrown to indicate that a hoodie dataset was not found on the path provided <p>
|
||||
*/
|
||||
public class DatasetNotFoundException extends HoodieException {
|
||||
public DatasetNotFoundException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Hoodie dataset not found in path " + basePath;
|
||||
}
|
||||
public DatasetNotFoundException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
|
||||
throws DatasetNotFoundException {
|
||||
// Check if the base path is found
|
||||
try {
|
||||
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
|
||||
throw new DatasetNotFoundException(basePathDir.toString());
|
||||
}
|
||||
// Check if the meta path is found
|
||||
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
// if the base path is file:///, then we have a IllegalArgumentException
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not check if dataset " + basePathDir + " is valid dataset", e);
|
||||
}
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Hoodie dataset not found in path " + basePath;
|
||||
}
|
||||
|
||||
public static void checkValidDataset(FileSystem fs, Path basePathDir, Path metaPathDir)
|
||||
throws DatasetNotFoundException {
|
||||
// Check if the base path is found
|
||||
try {
|
||||
if (!fs.exists(basePathDir) || !fs.isDirectory(basePathDir)) {
|
||||
throw new DatasetNotFoundException(basePathDir.toString());
|
||||
}
|
||||
// Check if the meta path is found
|
||||
if (!fs.exists(metaPathDir) || !fs.isDirectory(metaPathDir)) {
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
}
|
||||
} catch (IllegalArgumentException e) {
|
||||
// if the base path is file:///, then we have a IllegalArgumentException
|
||||
throw new DatasetNotFoundException(metaPathDir.toString());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not check if dataset " + basePathDir + " is valid dataset", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,39 +19,34 @@ package com.uber.hoodie.exception;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for Hoodie failures. The root of
|
||||
* the exception hierarchy.
|
||||
* </p>
|
||||
* <p>
|
||||
* Hoodie Write/Read clients will throw this exception if
|
||||
* any of its operations fail. This is a runtime (unchecked) exception.
|
||||
* </p>
|
||||
*
|
||||
* <p> Exception thrown for Hoodie failures. The root of the exception hierarchy. </p> <p> Hoodie
|
||||
* Write/Read clients will throw this exception if any of its operations fail. This is a runtime
|
||||
* (unchecked) exception. </p>
|
||||
*/
|
||||
public class HoodieException extends RuntimeException implements Serializable {
|
||||
public HoodieException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieException(String message) {
|
||||
super(message);
|
||||
}
|
||||
public HoodieException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public HoodieException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
public HoodieException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public HoodieException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
public HoodieException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
protected static String format(String message, Object... args) {
|
||||
String[] argStrings = new String[args.length];
|
||||
for (int i = 0; i < args.length; i += 1) {
|
||||
argStrings[i] = String.valueOf(args[i]);
|
||||
}
|
||||
return String.format(String.valueOf(message), (Object[]) argStrings);
|
||||
public HoodieException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
|
||||
protected static String format(String message, Object... args) {
|
||||
String[] argStrings = new String[args.length];
|
||||
for (int i = 0; i < args.length; i += 1) {
|
||||
argStrings[i] = String.valueOf(args[i]);
|
||||
}
|
||||
return String.format(String.valueOf(message), (Object[]) argStrings);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -19,23 +19,22 @@ package com.uber.hoodie.exception;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for dataset IO-related failures.
|
||||
* </p>
|
||||
* <p> Exception thrown for dataset IO-related failures. </p>
|
||||
*/
|
||||
public class HoodieIOException extends HoodieException {
|
||||
private IOException ioException;
|
||||
|
||||
public HoodieIOException(String msg, IOException t) {
|
||||
super(msg, t);
|
||||
this.ioException = t;
|
||||
}
|
||||
private IOException ioException;
|
||||
|
||||
public HoodieIOException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
public HoodieIOException(String msg, IOException t) {
|
||||
super(msg, t);
|
||||
this.ioException = t;
|
||||
}
|
||||
|
||||
public IOException getIOException() {
|
||||
return ioException;
|
||||
}
|
||||
public HoodieIOException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public IOException getIOException() {
|
||||
return ioException;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,16 +17,15 @@
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown for HoodieIndex related errors.
|
||||
* </p>
|
||||
* <p> Exception thrown for HoodieIndex related errors. </p>
|
||||
*/
|
||||
public class HoodieIndexException extends HoodieException {
|
||||
public HoodieIndexException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public HoodieIndexException(String msg, Throwable e) {
|
||||
super(msg, e);
|
||||
}
|
||||
public HoodieIndexException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public HoodieIndexException(String msg, Throwable e) {
|
||||
super(msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
public class HoodieNotSupportedException extends HoodieException {
|
||||
public HoodieNotSupportedException(String errorMsg) {
|
||||
super(errorMsg);
|
||||
}
|
||||
|
||||
public HoodieNotSupportedException(String errorMsg) {
|
||||
super(errorMsg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,17 +19,15 @@ package com.uber.hoodie.exception;
|
||||
import com.uber.hoodie.common.model.HoodieRecord;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception throws when indexing fails to locate the hoodie record.
|
||||
* HoodieRecord current location and partition path does not match.
|
||||
* This is an unrecoverable error
|
||||
* </p>
|
||||
* <p> Exception throws when indexing fails to locate the hoodie record. HoodieRecord current
|
||||
* location and partition path does not match. This is an unrecoverable error </p>
|
||||
*/
|
||||
public class HoodieRecordMissingException extends HoodieException {
|
||||
public HoodieRecordMissingException(HoodieRecord record) {
|
||||
super(
|
||||
"Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
|
||||
+ " in current location " + record.getCurrentLocation()
|
||||
+ " is not found in the partition");
|
||||
}
|
||||
|
||||
public HoodieRecordMissingException(HoodieRecord record) {
|
||||
super(
|
||||
"Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
|
||||
+ " in current location " + record.getCurrentLocation()
|
||||
+ " is not found in the partition");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,16 +17,15 @@
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception thrown to indicate that a hoodie dataset is invalid
|
||||
* <p>
|
||||
* <p> Exception thrown to indicate that a hoodie dataset is invalid <p>
|
||||
*/
|
||||
public class InvalidDatasetException extends HoodieException {
|
||||
public InvalidDatasetException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Invalid Hoodie Dataset. " + basePath;
|
||||
}
|
||||
public InvalidDatasetException(String basePath) {
|
||||
super(getErrorMessage(basePath));
|
||||
}
|
||||
|
||||
private static String getErrorMessage(String basePath) {
|
||||
return "Invalid Hoodie Dataset. " + basePath;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,8 @@ package com.uber.hoodie.exception;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
public class InvalidHoodiePathException extends HoodieException {
|
||||
public InvalidHoodiePathException(Path path, String type) {
|
||||
super("Invalid path " + path + " of type " + type);
|
||||
}
|
||||
|
||||
public InvalidHoodiePathException(Path path, String type) {
|
||||
super("Invalid path " + path + " of type " + type);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,11 +22,12 @@ package com.uber.hoodie.exception;
|
||||
* Thrown when expected metadata is not found
|
||||
*/
|
||||
public class MetadataNotFoundException extends HoodieException {
|
||||
public MetadataNotFoundException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public MetadataNotFoundException(String msg, Throwable e) {
|
||||
super(msg, e);
|
||||
}
|
||||
public MetadataNotFoundException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public MetadataNotFoundException(String msg, Throwable e) {
|
||||
super(msg, e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,15 +17,16 @@
|
||||
package com.uber.hoodie.exception;
|
||||
|
||||
public class SchemaCompatabilityException extends HoodieException {
|
||||
public SchemaCompatabilityException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public SchemaCompatabilityException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
public SchemaCompatabilityException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public SchemaCompatabilityException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
public SchemaCompatabilityException(String message, Throwable t) {
|
||||
super(message, t);
|
||||
}
|
||||
|
||||
public SchemaCompatabilityException(Throwable t) {
|
||||
super(t);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user