1
0

Import from Hoodie private repo: Part 1

This commit is contained in:
Prasanna Rajaperumal
2016-12-16 14:03:59 -08:00
commit 0512da094b
56 changed files with 8868 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.avro;
import com.uber.hoodie.common.BloomFilter;
import org.apache.avro.Schema;
import org.apache.parquet.avro.AvroWriteSupport;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.schema.MessageType;
import java.io.*;
import java.util.HashMap;
/**
* Wrap AvroWriterSupport for plugging in the bloom filter.
*/
public class HoodieAvroWriteSupport extends AvroWriteSupport {
private BloomFilter bloomFilter;
public final static String HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY =
"com.uber.hoodie.bloomfilter";
public HoodieAvroWriteSupport(MessageType schema, Schema avroSchema, BloomFilter bloomFilter) {
super(schema, avroSchema);
this.bloomFilter = bloomFilter;
}
@Override public WriteSupport.FinalizedWriteContext finalizeWrite() {
HashMap<String, String> extraMetaData = new HashMap<>();
if (bloomFilter != null) {
extraMetaData
.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString());
}
return new WriteSupport.FinalizedWriteContext(extraMetaData);
}
public void add(String recordKey) {
this.bloomFilter.add(recordKey);
}
}

View File

@@ -0,0 +1,166 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.avro;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.codehaus.jackson.map.ObjectMapper;
/**
* Marjority of this is copied from
* https://github.com/jwills/avro-json/blob/master/src/main/java/com/cloudera/science/avro/common/JsonConverter.java
* Adjusted for expected behavior of our use cases
*/
public class MercifulJsonConverter {
private final ObjectMapper mapper = new ObjectMapper();
private final Schema baseSchema;
public MercifulJsonConverter(Schema schema) {
this.baseSchema = schema;
}
public GenericRecord convert(String json) throws IOException {
try {
return convert(mapper.readValue(json, Map.class), baseSchema);
} catch (IOException e) {
throw new IOException("Failed to parse as Json: " + json + "\n\n" + e.getMessage());
}
}
private GenericRecord convert(Map<String, Object> raw, Schema schema)
throws IOException {
GenericRecord result = new GenericData.Record(schema);
for (Schema.Field f : schema.getFields()) {
String name = f.name();
Object rawValue = raw.get(name);
if (rawValue != null) {
result.put(f.pos(), typeConvert(rawValue, name, f.schema()));
}
}
return result;
}
private Object typeConvert(Object value, String name, Schema schema) throws IOException {
if (isOptional(schema)) {
if (value == null) {
return null;
} else {
schema = getNonNull(schema);
}
} else if (value == null) {
// Always fail on null for non-nullable schemas
throw new JsonConversionException(null, name, schema);
}
switch (schema.getType()) {
case BOOLEAN:
if (value instanceof Boolean) {
return (Boolean) value;
}
break;
case DOUBLE:
if (value instanceof Number) {
return ((Number) value).doubleValue();
}
break;
case FLOAT:
if (value instanceof Number) {
return ((Number) value).floatValue();
}
break;
case INT:
if (value instanceof Number) {
return ((Number) value).intValue();
}
break;
case LONG:
if (value instanceof Number) {
return ((Number) value).longValue();
}
break;
case STRING:
return value.toString();
case ENUM:
if (schema.getEnumSymbols().contains(value.toString())) {
return new GenericData.EnumSymbol(schema, value.toString());
}
throw new JsonConversionException(String.format("Symbol %s not in enum", value.toString()),
schema.getFullName(), schema);
case RECORD:
return convert((Map<String, Object>) value, schema);
case ARRAY:
Schema elementSchema = schema.getElementType();
List listRes = new ArrayList();
for (Object v : (List) value) {
listRes.add(typeConvert(v, name, elementSchema));
}
return listRes;
case MAP:
Schema valueSchema = schema.getValueType();
Map<String, Object> mapRes = new HashMap<String, Object>();
for (Map.Entry<String, Object> v : ((Map<String, Object>) value).entrySet()) {
mapRes.put(v.getKey(), typeConvert(v.getValue(), name, valueSchema));
}
return mapRes;
default:
throw new IllegalArgumentException(
"JsonConverter cannot handle type: " + schema.getType());
}
throw new JsonConversionException(value, name, schema);
}
private boolean isOptional(Schema schema) {
return schema.getType().equals(Schema.Type.UNION) &&
schema.getTypes().size() == 2 &&
(schema.getTypes().get(0).getType().equals(Schema.Type.NULL) ||
schema.getTypes().get(1).getType().equals(Schema.Type.NULL));
}
private Schema getNonNull(Schema schema) {
List<Schema> types = schema.getTypes();
return types.get(0).getType().equals(Schema.Type.NULL) ? types.get(1) : types.get(0);
}
public static class JsonConversionException extends RuntimeException {
private Object value;
private String fieldName;
private Schema schema;
public JsonConversionException(Object value, String fieldName, Schema schema) {
this.value = value;
this.fieldName = fieldName;
this.schema = schema;
}
@Override
public String toString() {
return String.format("Type conversion error for field %s, %s for %s",
fieldName, value, schema);
}
}
}

View File

@@ -0,0 +1,100 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common;
import com.uber.hoodie.exception.HoodieIndexException;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.util.bloom.Key;
import org.apache.hadoop.util.hash.Hash;
import javax.xml.bind.DatatypeConverter;
import java.io.*;
import java.nio.charset.StandardCharsets;
/**
* A Bloom filter implementation built on top of {@link org.apache.hadoop.util.bloom.BloomFilter}.
*/
public class BloomFilter {
/**
* Used in computing the optimal Bloom filter size. This approximately equals 0.480453.
*/
public static final double LOG2_SQUARED = Math.log(2) * Math.log(2);
private org.apache.hadoop.util.bloom.BloomFilter filter = null;
public BloomFilter(int numEntries, double errorRate) {
this(numEntries, errorRate, Hash.MURMUR_HASH);
}
/**
* Create a new Bloom filter with the given configurations.
*/
public BloomFilter(int numEntries, double errorRate, int hashType) {
// Bit size
int bitSize = (int) Math.ceil(numEntries * (-Math.log(errorRate) / LOG2_SQUARED));
// Number of the hash functions
int numHashs = (int) Math.ceil(Math.log(2) * bitSize / numEntries);
// The filter
this.filter = new org.apache.hadoop.util.bloom.BloomFilter(bitSize, numHashs, hashType);
}
/**
* Create the bloom filter from serialized string.
*/
public BloomFilter(String filterStr) {
this.filter = new org.apache.hadoop.util.bloom.BloomFilter();
byte[] bytes = DatatypeConverter.parseBase64Binary(filterStr);
DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
try {
this.filter.readFields(dis);
dis.close();
} catch (IOException e) {
throw new HoodieIndexException("Could not deserialize BloomFilter instance", e);
}
}
public void add(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
filter.add(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
public boolean mightContain(String key) {
if (key == null) {
throw new NullPointerException("Key cannot by null");
}
return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8)));
}
/**
* Serialize the bloom filter as a string.
*/
public String serializeToString() {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
try {
filter.write(dos);
byte[] bytes = baos.toByteArray();
dos.close();
return DatatypeConverter.printBase64Binary(bytes);
} catch (IOException e) {
throw new HoodieIndexException("Could not serialize BloomFilter instance", e);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,190 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.codehaus.jackson.annotate.JsonAutoDetect;
import org.codehaus.jackson.annotate.JsonMethod;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* All the metadata that gets stored along with a commit.
*/
public class HoodieCommitMetadata implements Serializable {
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
private HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
public HoodieCommitMetadata() {
partitionToWriteStats = new HashMap<>();
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
if (!partitionToWriteStats.containsKey(partitionPath)) {
partitionToWriteStats.put(partitionPath, new ArrayList<HoodieWriteStat>());
}
partitionToWriteStats.get(partitionPath).add(stat);
}
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
return partitionToWriteStats.get(partitionPath);
}
public HashMap<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
return partitionToWriteStats;
}
public HashMap<String, String> getFileIdAndFullPaths() {
HashMap<String, String> filePaths = new HashMap<>();
// list all partitions paths
for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat: entry.getValue()) {
filePaths.put(stat.getFileId(), stat.getFullPath());
}
}
return filePaths;
}
public String toJsonString() throws IOException {
if(partitionToWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToWriteStats.get(null));
partitionToWriteStats.remove(null);
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
}
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file (no data or somethings bad happen).
return new HoodieCommitMetadata();
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
}
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
public long fetchTotalPartitionsWritten() {
return partitionToWriteStats.size();
}
public long fetchTotalFilesInsert() {
long totalFilesInsert = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalFilesInsert ++;
}
}
}
return totalFilesInsert;
}
public long fetchTotalFilesUpdated() {
long totalFilesUpdated = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
totalFilesUpdated ++;
}
}
}
return totalFilesUpdated;
}
public long fetchTotalUpdateRecordsWritten() {
long totalUpdateRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
}
}
return totalUpdateRecordsWritten;
}
public long fetchTotalInsertRecordsWritten() {
long totalInsertRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalInsertRecordsWritten += stat.getNumWrites();
}
}
}
return totalInsertRecordsWritten;
}
public long fetchTotalRecordsWritten() {
long totalRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalRecordsWritten += stat.getNumWrites();
}
}
return totalRecordsWritten;
}
public long fetchTotalBytesWritten() {
long totalBytesWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalBytesWritten += stat.getTotalWriteBytes();
}
}
return totalBytesWritten;
}
public long fetchTotalWriteErrors() {
long totalWriteErrors = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalWriteErrors += stat.getTotalWriteErrors();
}
}
return totalWriteErrors;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
return partitionToWriteStats != null ?
partitionToWriteStats.equals(that.partitionToWriteStats) :
that.partitionToWriteStats == null;
}
@Override
public int hashCode() {
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
}
}

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Manages the commit meta and provides operations on the commit timeline
*/
public class HoodieCommits implements Serializable {
private List<String> commitList;
public HoodieCommits(List<String> commitList) {
this.commitList = new ArrayList<>(commitList);
Collections.sort(this.commitList);
this.commitList = Collections.unmodifiableList(this.commitList);
}
/**
* Returns the commits which are in the range (startsTs, endTs].
*
* @param startTs - exclusive start commit ts
* @param endTs - inclusive end commit ts
*/
public List<String> findCommitsInRange(String startTs, String endTs) {
if (commitList.isEmpty()) {
return Collections.EMPTY_LIST;
}
int startIndex = 0;
if (startTs != null) {
startIndex = Collections.binarySearch(commitList, startTs);
// If startIndex is negative
if (startIndex < 0) {
startIndex = -(startIndex + 1);
}
}
int endIndex = Collections.binarySearch(commitList, endTs);
// If endIndex is negative
if (endIndex < 0) {
endIndex = -(endIndex + 1);
}
if (endIndex < startIndex) {
throw new IllegalArgumentException(
"Start Commit Ts " + startTs + " cannot be less than end commit ts" + endTs);
}
List<String> returns = new ArrayList<>(commitList.subList(startIndex, endIndex));
if(endIndex < commitList.size()) {
// Be inclusive of the endIndex
returns.add(commitList.get(endIndex));
}
return Collections.unmodifiableList(returns);
}
/**
* Finds the list of commits on or before asOfTs
*/
public List<String> findCommitsAfter(String commitTimeStamp, int numCommits) {
if (commitList.isEmpty()) {
return null;
}
int startIndex = Collections.binarySearch(commitList, commitTimeStamp);
if (startIndex < 0) {
startIndex = -(startIndex + 1);
} else {
// we found asOfTs at startIndex. We want to exclude it.
startIndex++;
}
List<String> commits = new ArrayList<>();
while (numCommits > 0 && startIndex < commitList.size()) {
commits.add(commitList.get(startIndex));
startIndex++;
numCommits--;
}
return Collections.unmodifiableList(commits);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieCommits{");
sb.append("commitList=").append(commitList);
sb.append('}');
return sb.toString();
}
public boolean isEmpty() {
return commitList.isEmpty();
}
public int getNumCommits() {
return commitList.size();
}
public String firstCommit() {
return commitList.isEmpty() ? null : commitList.get(0);
}
public String nthCommit(int n) {
return commitList.isEmpty() || n >= commitList.size() ? null : commitList.get(n);
}
public String lastCommit() {
return commitList.isEmpty() ? null : commitList.get(commitList.size() - 1);
}
/**
* Returns the nth commit from the latest commit such that lastCommit(0) => lastCommit()
*/
public String lastCommit(int n) {
if (commitList.size() < n + 1) {
return null;
}
return commitList.get(commitList.size() - 1 - n);
}
public boolean contains(String commitTs) {
return commitList.contains(commitTs);
}
public String max(String commit1, String commit2) {
if (commit1 == null && commit2 == null) {
return null;
}
if (commit1 == null) {
return commit2;
}
if (commit2 == null) {
return commit1;
}
return (isCommit1BeforeOrOn(commit1, commit2) ? commit2 : commit1);
}
public static boolean isCommit1BeforeOrOn(String commit1, String commit2) {
return commit1.compareTo(commit2) <= 0;
}
public static boolean isCommit1After(String commit1, String commit2) {
return commit1.compareTo(commit2) > 0;
}
public List<String> getCommitList() {
return commitList;
}
public boolean isCommitBeforeEarliestCommit(String commitTs) {
return isCommit1BeforeOrOn(commitTs, firstCommit());
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieCommits that = (HoodieCommits) o;
return commitList != null ? commitList.equals(that.commitList) : that.commitList == null;
}
@Override
public int hashCode() {
return commitList != null ? commitList.hashCode() : 0;
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import org.apache.hadoop.fs.FileStatus;
public class HoodieFile {
private final FileStatus fileStatus;
private String fileNameWithoutCommitTs;
private String commitTs;
public HoodieFile(FileStatus fileStatus) {
this.fileStatus = fileStatus;
String fileName = fileStatus.getPath().getName();
this.fileNameWithoutCommitTs = FSUtils.getFileId(fileName);
this.commitTs = FSUtils.getCommitTime(fileName);
}
public String getFileNameWithoutCommitTs() {
return fileNameWithoutCommitTs;
}
public String getCommitTs() {
return commitTs;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieFile{");
sb.append("fileStatus=").append(fileStatus);
sb.append(", fileNameWithoutCommitTs='").append(fileNameWithoutCommitTs).append('\'');
sb.append(", commitTs='").append(commitTs).append('\'');
sb.append('}');
return sb.toString();
}
public FileStatus getFileStatus() {
return fileStatus;
}
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* HoodieKey consists of
*
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
* partition that contains the record
*/
public class HoodieKey implements Serializable {
private final String recordKey;
private final String partitionPath;
public HoodieKey(String recordKey, String partitionPath) {
this.recordKey = recordKey;
this.partitionPath = partitionPath;
}
public String getRecordKey() {
return recordKey;
}
public String getPartitionPath() {
return partitionPath;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey) &&
Objects.equal(partitionPath, otherKey.partitionPath);
}
@Override
public int hashCode() {
return Objects.hashCode(recordKey, partitionPath);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieKey {");
sb.append(" recordKey=").append(recordKey);
sb.append(" partitionPath=").append(partitionPath);
sb.append('}');
return sb.toString();
}
}

View File

@@ -0,0 +1,153 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* A Single Record managed by Hoodie TODO - Make this generic
*/
public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
/**
* Identifies the record across the table
*/
private HoodieKey key;
/**
* Actual payload of the record
*/
private T data;
/**
* Current location of record on storage. Filled in by looking up index
*/
private HoodieRecordLocation currentLocation;
/**
* New location of record on storage, after written
*/
private HoodieRecordLocation newLocation;
public HoodieRecord(HoodieKey key, T data) {
this.key = key;
this.data = data;
this.currentLocation = null;
this.newLocation = null;
}
public HoodieKey getKey() {
return key;
}
public T getData() {
if (data == null) {
throw new IllegalStateException("Payload already deflated for record.");
}
return data;
}
/**
* Release the actual payload, to ease memory pressure. To be called after the record
* has been written to storage. Once deflated, cannot be inflated.
*/
public void deflate() {
this.data = null;
}
/**
* Sets the current currentLocation of the record. This should happen exactly-once
*/
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
assert currentLocation == null;
this.currentLocation = location;
return this;
}
public HoodieRecordLocation getCurrentLocation() {
return currentLocation;
}
/**
* Sets the new currentLocation of the record, after being written. This again should happen
* exactly-once.
*/
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
assert newLocation == null;
this.newLocation = location;
return this;
}
public HoodieRecordLocation getNewLocation() {
return this.newLocation;
}
public boolean isCurrentLocationKnown() {
return this.currentLocation != null;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key) &&
Objects.equal(data, that.data) &&
Objects.equal(currentLocation, that.currentLocation) &&
Objects.equal(newLocation, that.newLocation);
}
@Override
public int hashCode() {
return Objects.hashCode(key, data, currentLocation, newLocation);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecord{");
sb.append("key=").append(key);
sb.append(", currentLocation='").append(currentLocation).append('\'');
sb.append(", newLocation='").append(newLocation).append('\'');
sb.append('}');
return sb.toString();
}
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
return commitTime + "_" + partitionId + "_" + recordIndex;
}
public String getPartitionPath() {
assert key != null;
return key.getPartitionPath();
}
public String getRecordKey() {
assert key != null;
return key.getRecordKey();
}
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* Location of a HoodieRecord within the parition it belongs to. Ultimately, this points to an
* actual file on disk
*/
public class HoodieRecordLocation implements Serializable {
private final String commitTime;
private final String fileId;
public HoodieRecordLocation(String commitTime, String fileId) {
this.commitTime = commitTime;
this.fileId = fileId;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(commitTime, otherLoc.commitTime) &&
Objects.equal(fileId, otherLoc.fileId);
}
@Override
public int hashCode() {
return Objects.hashCode(commitTime, fileId);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
sb.append("commitTime=").append(commitTime).append(", ");
sb.append("fileId=").append(fileId);
sb.append('}');
return sb.toString();
}
public String getCommitTime() {
return commitTime;
}
public String getFileId() {
return fileId;
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
import java.io.Serializable;
/**
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
* This abstracts out callbacks which depend on record specific logic
*/
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them
* before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
*/
T preCombine(T another);
/**
*
* This methods lets you write custom merging/combining logic to produce new values
* as a function of current value on storage and whats contained in this object.
*
* eg:
* 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
* 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
*
* @param currentValue Current value in storage, to merge/combine this payload with
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage
*/
IndexedRecord combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
/**
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
* storage to be combined against. (i.e insert)
*/
IndexedRecord getInsertValue(Schema schema) throws IOException;
}

View File

@@ -0,0 +1,480 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.DatasetNotFoundException;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.InvalidDatasetException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* Manages all file system level interactions for the Hoodie tables.
*/
public class HoodieTableMetadata implements Serializable {
public static final String MAX_COMMIT_TS = String.valueOf(Long.MAX_VALUE);
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
private static final String HOODIE_HDRONE_PROFILE_DEFAULT_VALUE = "HOODIE";
private static final java.lang.String HOODIE_HDRONE_PROFILE_PROP_NAME =
"hoodie.hdrone.dataset.profile";
private static Logger log = LogManager.getLogger(HoodieTableMetadata.class);
private transient final FileSystem fs;
private transient final Path metadataFolder;
private final Properties properties;
private HoodieCommits commits;
private List<String> inflightCommits;
private String basePath;
public static final String METAFOLDER_NAME = ".hoodie";
public static final String COMMIT_FILE_SUFFIX = ".commit";
public static final String INFLIGHT_FILE_SUFFIX = ".inflight";
/**
* Constructor which initializes the hoodie table metadata. It will initialize the meta-data if not already present.
*
* @param fs
* @param basePath
* @param tableName
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath, String tableName) {
this(fs, basePath, tableName, true);
}
/**
* Constructor which loads the hoodie table metadata, It requires the meta-data to be present already
* @param fs
* @param basePath
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath) {
this(fs, basePath, null, false);
}
private HoodieTableMetadata(FileSystem fs, String basePath, String tableName,
boolean initOnMissing) {
this.fs = fs;
this.basePath = basePath;
try {
Path basePathDir = new Path(this.basePath);
if (!fs.exists(basePathDir)) {
if (initOnMissing) {
fs.mkdirs(basePathDir);
} else {
throw new DatasetNotFoundException(this.basePath);
}
}
if (!fs.isDirectory(new Path(basePath))) {
throw new DatasetNotFoundException(this.basePath);
}
// create .hoodie folder if it does not exist.
this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME);
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
if (!fs.exists(propertyPath)) {
if (initOnMissing) {
createHoodieProperties(metadataFolder, tableName);
} else {
throw new InvalidDatasetException(this.basePath);
}
}
// Load meta data
this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX));
this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX);
this.properties = readHoodieProperties();
log.info("All commits :" + commits);
} catch (IOException e) {
throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e);
}
}
/**
* Returns all the commit metadata for this table. Reads all the commit files from HDFS.
* Expensive operation, use with caution.
*
* @return SortedMap of CommitTime,<class>HoodieCommitMetadata</class>
* @throws IOException
*/
public SortedMap<String, HoodieCommitMetadata> getAllCommitMetadata() {
try {
TreeMap<String, HoodieCommitMetadata> metadataMap = new TreeMap<>();
for (String commitTs : commits.getCommitList()) {
metadataMap.put(commitTs, getCommitMetadata(commitTs));
}
return Collections.unmodifiableSortedMap(metadataMap);
} catch (IOException e) {
throw new HoodieIOException("Could not load all commits for table " + getTableName(),
e);
}
}
public HoodieCommitMetadata getCommitMetadata(String commitTime) throws IOException {
FSDataInputStream is = fs.open(new Path(metadataFolder, FSUtils.makeCommitFileName(commitTime)));
try {
String jsonStr = IOUtils.toString(is);
return HoodieCommitMetadata.fromJsonString(jsonStr);
} finally {
is.close();
}
}
public HoodieTableType getTableType() {
return HoodieTableType.valueOf(properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
}
/**
* Lookup the file name for specified <code>HoodieRecord</code>
* <p/>
* TODO(vc): This metadata needs to be cached in each executor, statically, and used across, if
* we need to be nicer to the NameNode
*/
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record) {
String fileId = record.getCurrentLocation().getFileId();
return getFilenameForRecord(fs, record, fileId);
}
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record, String fileId) {
try {
FileStatus[] files = fs.listStatus(new Path(basePath, record.getPartitionPath()));
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(files, commits.lastCommit());
// If the record is not found
if(!fileIdToVersions.containsKey(fileId)) {
throw new FileNotFoundException("Cannot find valid versions for fileId " + fileId);
}
List<FileStatus> statuses = fileIdToVersions.get(fileId);
return statuses.get(0).getPath().getName();
} catch (IOException e) {
throw new HoodieIOException(
"Could not get Filename for record " + record, e);
}
}
/**
* Get only the latest file in the partition with precondition commitTime(file) < maxCommitTime
*
* @param fs
* @param partitionPathStr
* @param maxCommitTime
* @return
*/
public FileStatus[] getLatestVersionInPartition(FileSystem fs, String partitionPathStr,
String maxCommitTime) {
try {
Path partitionPath = new Path(basePath, partitionPathStr);
if(!fs.exists(partitionPath)) {
return new FileStatus[0];
}
FileStatus[] files = fs.listStatus(partitionPath);
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(files, commits.lastCommit());
HashMap<String, FileStatus> validFiles = new HashMap<>();
for (String fileId : fileIdToVersions.keySet()) {
List<FileStatus> versions = fileIdToVersions.get(fileId);
for (FileStatus file : versions) {
String filename = file.getPath().getName();
String commitTime = FSUtils.getCommitTime(filename);
if (HoodieCommits.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
validFiles.put(fileId, file);
break;
}
}
}
return validFiles.values().toArray(new FileStatus[validFiles.size()]);
} catch (IOException e) {
throw new HoodieIOException(
"Could not get latest versions in Partition " + partitionPathStr, e);
}
}
/**
* Get ALL the data files in partition grouped by fileId and sorted by the commitTime
* Given a partition path, provide all the files with a list of their commits, sorted by commit time.
*/
public Map<String, List<FileStatus>> getAllVersionsInPartition(FileSystem fs, String partitionPath) {
try {
FileStatus[] files = fs.listStatus(new Path(basePath, partitionPath));
return groupFilesByFileId(files, commits.lastCommit());
} catch (IOException e) {
throw new HoodieIOException(
"Could not load all file versions in partition " + partitionPath, e);
}
}
/**
* Get all the versions of files, within the commit range provided.
*
* @param commitsToReturn - commits to include
*/
public FileStatus[] getLatestVersionInRange(FileStatus[] fileStatuses, List<String> commitsToReturn) {
if (commitsToReturn.isEmpty()) {
return new FileStatus[0];
}
try {
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(fileStatuses, commits.lastCommit());
List<FileStatus> statuses = new ArrayList<>();
for (List<FileStatus> entry : fileIdToVersions.values()) {
for (FileStatus status : entry) {
String commitTime = FSUtils.getCommitTime(status.getPath().getName());
if (commitsToReturn.contains(commitTime)) {
statuses.add(status);
break;
}
}
}
return statuses.toArray(new FileStatus[statuses.size()]);
} catch (IOException e) {
throw new HoodieIOException("Could not filter files from commits " + commitsToReturn, e);
}
}
/**
*
* Get the latest versions of all the files.
*
* @param fileStatuses
* @return
*/
public FileStatus[] getLatestVersions(FileStatus[] fileStatuses) {
try {
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(fileStatuses, commits.lastCommit());
List<FileStatus> statuses = new ArrayList<>();
for(List<FileStatus> entry:fileIdToVersions.values()) {
// first file is the latest one
statuses.add(entry.get(0));
}
return statuses.toArray(new FileStatus[statuses.size()]);
} catch (IOException e) {
throw new HoodieIOException("Could not filter files for latest version ", e);
}
}
/**
* Get the base path for the Hoodie Table
*
* @return
*/
public String getBasePath() {
return basePath;
}
public boolean isCommitsEmpty() {
return commits.isEmpty();
}
public boolean isCommitTsSafe(String commitTs) {
return !isCommitsEmpty() && (commits.isCommitBeforeEarliestCommit(commitTs) || commits
.contains(commitTs));
}
public List<String> findCommitsSinceTs(String startTs) {
return commits.findCommitsInRange(startTs, MAX_COMMIT_TS);
}
public List<String> findCommitsInRange(String startTs, String endTs) {
return commits.findCommitsInRange(startTs, endTs);
}
public List<String> findCommitsAfter(String startTs, Integer maxCommits) {
return commits.findCommitsAfter(startTs, maxCommits);
}
public HoodieCommits getAllCommits() {
return commits;
}
public List<String> getAllInflightCommits() {
return inflightCommits;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieTableMetadata{");
sb.append("commits=").append(commits);
sb.append('}');
return sb.toString();
}
public String getTableName() {
return properties.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
}
public String getHDroneDatasetProfile() {
return properties.getProperty(HOODIE_HDRONE_PROFILE_PROP_NAME, HOODIE_HDRONE_PROFILE_DEFAULT_VALUE);
}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
*
* @param metadataFolder
* @param tableName
* @throws IOException
*/
private void createHoodieProperties(Path metadataFolder, String tableName) throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataOutputStream outputStream = fs.create(propertyPath);
try {
Properties props = new Properties();
props.setProperty(HOODIE_TABLE_NAME_PROP_NAME, tableName);
props.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
props
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
} finally {
outputStream.close();
}
}
/**
* Loads the hoodie table properties from the hoodie.properties file under the .hoodie path
*/
private Properties readHoodieProperties() throws IOException {
Properties props = new Properties();
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataInputStream inputStream = fs.open(propertyPath);
try {
props.load(inputStream);
} finally {
inputStream.close();
}
return props;
}
/**
* Scan the commit times (only choosing commit file with the given suffix)
*/
private List<String> scanCommits(final String commitFileSuffix) throws IOException {
log.info("Attempting to load the commits under " + metadataFolder + " with suffix " + commitFileSuffix);
final List<String> commitFiles = new ArrayList<>();
fs.listStatus(metadataFolder, new PathFilter() {
@Override
public boolean accept(Path path) {
if (path.getName().endsWith(commitFileSuffix)) {
commitFiles.add(path.getName().split("\\.")[0]);
return true;
}
return false;
}
});
return commitFiles;
}
/**
* Takes a bunch of file versions, and returns a map keyed by fileId, with the necessary
* version safety checking. Returns a map of commitTime and Sorted list of FileStats
* ( by reverse commit time )
*
* @param maxCommitTime maximum permissible commit time
*
* @return
*/
private Map<String, List<FileStatus>> groupFilesByFileId(FileStatus[] files,
String maxCommitTime) throws IOException {
HashMap<String, List<FileStatus>> fileIdtoVersions = new HashMap<>();
for (FileStatus file : files) {
String filename = file.getPath().getName();
String fileId = FSUtils.getFileId(filename);
String commitTime = FSUtils.getCommitTime(filename);
if (isCommitTsSafe(commitTime) && HoodieCommits
.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
if (!fileIdtoVersions.containsKey(fileId)) {
fileIdtoVersions.put(fileId, new ArrayList<FileStatus>());
}
fileIdtoVersions.get(fileId).add(file);
}
}
for (Map.Entry<String, List<FileStatus>> entry : fileIdtoVersions.entrySet()) {
Collections.sort(fileIdtoVersions.get(entry.getKey()), new Comparator<FileStatus>() {
@Override
public int compare(FileStatus o1, FileStatus o2) {
String o1CommitTime = FSUtils.getCommitTime(o1.getPath().getName());
String o2CommitTime = FSUtils.getCommitTime(o2.getPath().getName());
// Reverse the order
return o2CommitTime.compareTo(o1CommitTime);
}
});
}
return fileIdtoVersions;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieTableMetadata metadata = (HoodieTableMetadata) o;
if (commits != null ? !commits.equals(metadata.commits) : metadata.commits != null)
return false;
return basePath != null ? basePath.equals(metadata.basePath) : metadata.basePath == null;
}
@Override
public int hashCode() {
int result = commits != null ? commits.hashCode() : 0;
result = 31 * result + (basePath != null ? basePath.hashCode() : 0);
return result;
}
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
/**
* Type of the Hoodie Table.
*
* Currently, 1 type is supported
*
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer
* value of a record.
*
* In the future, following might be added.
*
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
*
* SIMPLE_LSM - A simple 2 level LSM tree.
*/
public enum HoodieTableType {
COPY_ON_WRITE
}

View File

@@ -0,0 +1,158 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
/**
* Statistics about a single Hoodie write operation.
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieWriteStat implements Serializable {
public static final String NULL_COMMIT = "null";
/**
* Id of the file being written
*/
private String fileId;
/**
* Full path to the file on underlying file system
*/
private String fullPath;
/**
* The previous version of the file. (null if this is the first version. i.e insert)
*/
private String prevCommit;
/**
* Total number of records written for this file.
* - for updates, its the entire number of records in the file
* - for inserts, its the actual number of records inserted.
*/
private long numWrites;
/**
* Total number of records actually changed. (0 for inserts)
*/
private long numUpdateWrites;
/**
* Total size of file written
*/
private long totalWriteBytes;
/**
* Total number of records, that were n't able to be written due to errors.
*/
private long totalWriteErrors;
public HoodieWriteStat() {
// called by jackson json lib
}
public void setFileId(String fileId) {
this.fileId = fileId;
}
public void setFullPath(String fullFilePath) {
this.fullPath = fullFilePath;
}
public void setPrevCommit(String prevCommit) {
this.prevCommit = prevCommit;
}
public void setNumWrites(long numWrites) {
this.numWrites = numWrites;
}
public void setNumUpdateWrites(long numUpdateWrites) {
this.numUpdateWrites = numUpdateWrites;
}
public long getTotalWriteBytes() {
return totalWriteBytes;
}
public void setTotalWriteBytes(long totalWriteBytes) {
this.totalWriteBytes = totalWriteBytes;
}
public long getTotalWriteErrors() { return totalWriteErrors; }
public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
public String getPrevCommit() {
return prevCommit;
}
public long getNumWrites() {
return numWrites;
}
public long getNumUpdateWrites() {
return numUpdateWrites;
}
public String getFileId() {
return fileId;
}
public String getFullPath() {
return fullPath;
}
@Override
public String toString() {
return new StringBuilder()
.append("HoodieWriteStat {")
.append("fullPath='" + fullPath + '\'')
.append(", prevCommit='" + prevCommit + '\'')
.append(", numWrites=" + numWrites)
.append(", numUpdateWrites=" + numUpdateWrites)
.append(", numWriteBytes=" + totalWriteBytes)
.append('}')
.toString();
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieWriteStat that = (HoodieWriteStat) o;
if (!fullPath.equals(that.fullPath))
return false;
return prevCommit.equals(that.prevCommit);
}
@Override
public int hashCode() {
int result = fullPath.hashCode();
result = 31 * result + prevCommit.hashCode();
return result;
}
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.common.model.HoodieTableMetadata;
import com.uber.hoodie.exception.HoodieIOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Utility functions related to accessing the file storage
*/
public class FSUtils {
private static final Logger LOG = LogManager.getLogger(FSUtils.class);
public static FileSystem getFs() {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
FileSystem fs;
try {
fs = FileSystem.get(conf);
} catch (IOException e) {
throw new HoodieIOException("Failed to get instance of " + FileSystem.class.getName(),
e);
}
LOG.info(String.format("Hadoop Configuration: fs.defaultFS: [%s], Config:[%s], FileSystem: [%s]",
conf.getRaw("fs.defaultFS"), conf.toString(), fs.toString()));
return fs;
}
public static String makeDataFileName(String commitTime, int taskPartitionId, String fileId) {
return String.format("%s_%d_%s.parquet", fileId, taskPartitionId, commitTime);
}
public static String maskWithoutFileId(String commitTime, int taskPartitionId) {
return String.format("*_%s_%s.parquet", taskPartitionId, commitTime);
}
public static String maskWithoutTaskPartitionId(String commitTime, String fileId) {
return String.format("%s_*_%s.parquet", fileId, commitTime);
}
public static String maskWithOnlyCommitTime(String commitTime) {
return String.format("*_*_%s.parquet", commitTime);
}
public static String makeInflightCommitFileName(String commitTime) {
return commitTime + HoodieTableMetadata.INFLIGHT_FILE_SUFFIX;
}
public static String makeCommitFileName(String commitTime) {
return commitTime + HoodieTableMetadata.COMMIT_FILE_SUFFIX;
}
public static String getCommitFromCommitFile(String commitFileName) {
return commitFileName.split("\\.")[0];
}
public static String getCommitTime(String fullFileName) {
return fullFileName.split("_")[2].split("\\.")[0];
}
public static long getFileSize(FileSystem fs, Path path) throws IOException {
return fs.listStatus(path)[0].getLen();
}
public static String globAllFiles(String basePath) {
return String.format("%s/*/*/*/*", basePath);
}
// TODO (weiy): rename the function for better readability
public static String getFileId(String fullFileName) {
return fullFileName.split("_")[0];
}
/**
* Obtain all the partition paths, that are present in this table.
*/
public static List<String> getAllPartitionPaths(FileSystem fs, String basePath) throws IOException {
List<String> partitionsToClean = new ArrayList<>();
// TODO(vc): For now, assume partitions are two levels down from base path.
FileStatus[] folders = fs.globStatus(new Path(basePath + "/*/*/*"));
for (FileStatus status : folders) {
Path path = status.getPath();
partitionsToClean.add(String.format("%s/%s/%s",
path.getParent().getParent().getName(),
path.getParent().getName(),
path.getName()));
}
return partitionsToClean;
}
}

View File

@@ -0,0 +1,140 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.common.model.HoodieRecord;
import org.apache.avro.Schema;
import org.apache.avro.generic.*;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Helper class to do common stuff across Avro.
*/
public class HoodieAvroUtils {
// All metadata fields are optional strings.
private final static Schema METADATA_FIELD_SCHEMA = Schema.createUnion(Arrays.asList(
Schema.create(Schema.Type.NULL),
Schema.create(Schema.Type.STRING)));
private final static Schema RECORD_KEY_SCHEMA = initRecordKeySchema();
/**
* Convert a given avro record to bytes
*/
public static byte[] avroToBytes(GenericRecord record) throws IOException {
GenericDatumWriter<GenericRecord> writer =
new GenericDatumWriter<>(record.getSchema());
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
/**
* Convert serialized bytes back into avro record
*/
public static GenericRecord bytesToAvro(byte[] bytes, Schema schema) throws IOException {
Decoder decoder = DecoderFactory.get().binaryDecoder(bytes, null);
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
return reader.read(null, decoder);
}
/**
* Adds the Hoodie metadata fields to the given schema
*/
public static Schema addMetadataFields(Schema schema) {
List<Schema.Field> parentFields = new ArrayList<>();
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field partitionPathField = new Schema.Field(HoodieRecord.PARTITION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
parentFields.add(commitTimeField);
parentFields.add(commitSeqnoField);
parentFields.add(recordKeyField);
parentFields.add(partitionPathField);
parentFields.add(fileNameField);
for (Schema.Field field : schema.getFields()) {
parentFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), null));
}
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
mergedSchema.setFields(parentFields);
return mergedSchema;
}
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", null);
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Arrays.asList(recordKeyField));
return recordKeySchema;
}
public static Schema getRecordKeySchema() {
return RECORD_KEY_SCHEMA;
}
public static GenericRecord addHoodieKeyToRecord(GenericRecord record, String recordKey, String partitionPath, String fileName) {
record.put(HoodieRecord.FILENAME_METADATA_FIELD, fileName);
record.put(HoodieRecord.PARTITION_PATH_METADATA_FIELD, partitionPath);
record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recordKey);
return record;
}
/**
* Adds the Hoodie commit metadata into the provided Generic Record.
*/
public static GenericRecord addCommitMetadataToRecord(GenericRecord record, String commitTime, String commitSeqno) {
record.put(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitTime);
record.put(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, commitSeqno);
return record;
}
/**
* Given a avro record with a given schema, rewrites it into the new schema
*/
public static GenericRecord rewriteRecord(GenericRecord record, Schema newSchema)
throws Exception {
GenericRecord newRecord = new GenericData.Record(newSchema);
for (Schema.Field f : record.getSchema().getFields()) {
newRecord.put(f.name(), record.get(f.name()));
}
if (!new GenericData().validate(newSchema, newRecord)) {
throw new Exception(
"Unable to validate the rewritten record " + record + " against schema "
+ newSchema);
}
return newRecord;
}
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
public class NumericUtils {
public static String humanReadableByteCount(double bytes) {
if (bytes < 1024) return String.format("%.1f B", bytes);
int exp = (int) (Math.log(bytes) / Math.log(1024));
String pre = "KMGTPE".charAt(exp-1) + "";
return String.format("%.1f %sB", bytes / Math.pow(1024, exp), pre);
}
}

View File

@@ -0,0 +1,138 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.avro.HoodieAvroWriteSupport;
import com.uber.hoodie.common.BloomFilter;
import com.uber.hoodie.common.model.HoodieRecord;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.HoodieIndexException;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import java.io.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Utility functions involving with parquet.
*/
public class ParquetUtils {
/**
* Read the rowKey list from the given parquet file.
*
* @param filePath The parquet file path.
*/
public static Set<String> readRowKeysFromParquet(Path filePath) {
Configuration conf = new Configuration();
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
AvroReadSupport.setAvroReadSchema(conf, readSchema);
AvroReadSupport.setRequestedProjection(conf, readSchema);
ParquetReader reader = null;
Set<String> rowKeys = new HashSet<>();
try {
reader = AvroParquetReader.builder(filePath).withConf(conf).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
rowKeys.add(((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString());
}
obj = reader.read();
}
} catch (IOException e) {
throw new HoodieIOException("Failed to read row keys from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// ignore
}
}
}
return rowKeys;
}
/**
* Read out the bloom filter from the parquet file meta data.
*/
public static BloomFilter readBloomFilterFromParquetMetadata(Path parquetFilePath) {
ParquetMetadata footer;
try {
footer = ParquetFileReader.readFooter(new Configuration(), parquetFilePath);
} catch (IOException e) {
throw new HoodieIndexException("Failed to read footer for parquet " + parquetFilePath,
e);
}
Map<String, String> metadata = footer.getFileMetaData().getKeyValueMetaData();
if (metadata.containsKey(HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY)) {
return new BloomFilter(metadata.get(HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY));
} else {
throw new HoodieIndexException("Could not find index in Parquet footer. Looked for key "
+ HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY + " in "
+ parquetFilePath);
}
}
/**
*
* NOTE: This literally reads the entire file contents, thus should be used with caution.
*
* @param filePath
* @return
*/
public static List<GenericRecord> readAvroRecords(Path filePath) {
ParquetReader reader = null;
List<GenericRecord> records = new ArrayList<>();
try {
reader = AvroParquetReader.builder(filePath).build();
Object obj = reader.read();
while (obj != null) {
if (obj instanceof GenericRecord) {
records.add(((GenericRecord) obj));
}
obj = reader.read();
}
} catch (IOException e) {
throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// ignore
}
}
}
return records;
}
}

View File

@@ -0,0 +1,43 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.util;
import com.uber.hoodie.common.model.HoodieRecordPayload;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class ReflectionUtils {
private static Map<String, Class<?>> clazzCache = new HashMap<>();
public static <T extends HoodieRecordPayload> T loadPayload(String recordPayloadClass) throws IOException {
try {
if(clazzCache.get(recordPayloadClass) == null) {
Class<?> clazz = Class.<HoodieRecordPayload>forName(recordPayloadClass);
clazzCache.put(recordPayloadClass, clazz);
}
return (T) clazzCache.get(recordPayloadClass).newInstance();
} catch (ClassNotFoundException e) {
throw new IOException("Could not load payload class " + recordPayloadClass, e);
} catch (InstantiationException e) {
throw new IOException("Could not load payload class " + recordPayloadClass, e);
} catch (IllegalAccessException e) {
throw new IOException("Could not load payload class " + recordPayloadClass, e);
}
}
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
/**
* <p>
* Exception thrown to indicate that a hoodie dataset was not found on the path provided
* <p>
*/
public class DatasetNotFoundException extends HoodieException {
public DatasetNotFoundException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Hoodie dataset not found in path " + basePath;
}
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
/**
* <p>
* Exception thrown for Hoodie failures. The root of
* the exception hierarchy.
* </p>
* <p>
* Hoodie Write/Read clients will throw this exception if
* any of its operations fail. This is a runtime (unchecked) exception.
* </p>
*
*/
public class HoodieException extends RuntimeException {
public HoodieException() {
super();
}
public HoodieException(String message) {
super(message);
}
public HoodieException(String message, Throwable t) {
super(message, t);
}
public HoodieException(Throwable t) {
super(t);
}
protected static String format(String message, Object... args) {
String[] argStrings = new String[args.length];
for (int i = 0; i < args.length; i += 1) {
argStrings[i] = String.valueOf(args[i]);
}
return String.format(String.valueOf(message), (Object[]) argStrings);
}
}

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
import java.io.IOException;
/**
* <p>
* Exception thrown for dataset IO-related failures.
* </p>
*/
public class HoodieIOException extends HoodieException {
private final IOException ioException;
public HoodieIOException(String msg, IOException t) {
super(msg, t);
this.ioException = t;
}
public IOException getIOException() {
return ioException;
}
}

View File

@@ -0,0 +1,34 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
import java.io.IOException;
/**
* <p>
* Exception thrown for HoodieIndex related errors.
* </p>
*/
public class HoodieIndexException extends HoodieException {
public HoodieIndexException(String msg) {
super(msg);
}
public HoodieIndexException(String msg, Throwable e) {
super(msg, e);
}
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
import com.uber.hoodie.common.model.HoodieRecord;
/**
* <p>
* Exception throws when indexing fails to locate the hoodie record.
* HoodieRecord current location and partition path does not match.
* This is an unrecoverable error
* </p>
*/
public class HoodieRecordMissingException extends HoodieException {
public HoodieRecordMissingException(HoodieRecord record) {
super(
"Record " + record.getRecordKey() + " with partition path " + record.getPartitionPath()
+ " in current location " + record.getCurrentLocation()
+ " is not found in the partition");
}
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.exception;
/**
* <p>
* Exception thrown to indicate that a hoodie dataset is invalid
* <p>
*/
public class InvalidDatasetException extends HoodieException {
public InvalidDatasetException(String basePath) {
super(getErrorMessage(basePath));
}
private static String getErrorMessage(String basePath) {
return "Invalid Hoodie Dataset. " + basePath;
}
}