Import from Hoodie private repo: Part 1
This commit is contained in:
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.codehaus.jackson.annotate.JsonAutoDetect;
|
||||
import org.codehaus.jackson.annotate.JsonMethod;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* All the metadata that gets stored along with a commit.
|
||||
*/
|
||||
public class HoodieCommitMetadata implements Serializable {
|
||||
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
|
||||
private HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
|
||||
|
||||
public HoodieCommitMetadata() {
|
||||
partitionToWriteStats = new HashMap<>();
|
||||
}
|
||||
|
||||
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
|
||||
if (!partitionToWriteStats.containsKey(partitionPath)) {
|
||||
partitionToWriteStats.put(partitionPath, new ArrayList<HoodieWriteStat>());
|
||||
}
|
||||
partitionToWriteStats.get(partitionPath).add(stat);
|
||||
}
|
||||
|
||||
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
|
||||
return partitionToWriteStats.get(partitionPath);
|
||||
}
|
||||
|
||||
public HashMap<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
|
||||
return partitionToWriteStats;
|
||||
}
|
||||
|
||||
public HashMap<String, String> getFileIdAndFullPaths() {
|
||||
HashMap<String, String> filePaths = new HashMap<>();
|
||||
// list all partitions paths
|
||||
for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
|
||||
for (HoodieWriteStat stat: entry.getValue()) {
|
||||
filePaths.put(stat.getFileId(), stat.getFullPath());
|
||||
}
|
||||
}
|
||||
return filePaths;
|
||||
}
|
||||
|
||||
|
||||
public String toJsonString() throws IOException {
|
||||
if(partitionToWriteStats.containsKey(null)) {
|
||||
log.info("partition path is null for " + partitionToWriteStats.get(null));
|
||||
partitionToWriteStats.remove(null);
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
|
||||
}
|
||||
|
||||
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
|
||||
if (jsonStr == null || jsonStr.isEmpty()) {
|
||||
// For empty commit file (no data or somethings bad happen).
|
||||
return new HoodieCommitMetadata();
|
||||
}
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
|
||||
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
|
||||
}
|
||||
|
||||
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
|
||||
public long fetchTotalPartitionsWritten() {
|
||||
return partitionToWriteStats.size();
|
||||
}
|
||||
|
||||
public long fetchTotalFilesInsert() {
|
||||
long totalFilesInsert = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalFilesInsert ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalFilesInsert;
|
||||
}
|
||||
|
||||
public long fetchTotalFilesUpdated() {
|
||||
long totalFilesUpdated = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
|
||||
totalFilesUpdated ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalFilesUpdated;
|
||||
}
|
||||
|
||||
public long fetchTotalUpdateRecordsWritten() {
|
||||
long totalUpdateRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
|
||||
}
|
||||
}
|
||||
return totalUpdateRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalInsertRecordsWritten() {
|
||||
long totalInsertRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
|
||||
totalInsertRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
}
|
||||
}
|
||||
return totalInsertRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalRecordsWritten() {
|
||||
long totalRecordsWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalRecordsWritten += stat.getNumWrites();
|
||||
}
|
||||
}
|
||||
return totalRecordsWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalBytesWritten() {
|
||||
long totalBytesWritten = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalBytesWritten += stat.getTotalWriteBytes();
|
||||
}
|
||||
}
|
||||
return totalBytesWritten;
|
||||
}
|
||||
|
||||
public long fetchTotalWriteErrors() {
|
||||
long totalWriteErrors = 0;
|
||||
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
|
||||
for (HoodieWriteStat stat : stats) {
|
||||
totalWriteErrors += stat.getTotalWriteErrors();
|
||||
}
|
||||
}
|
||||
return totalWriteErrors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
|
||||
|
||||
return partitionToWriteStats != null ?
|
||||
partitionToWriteStats.equals(that.partitionToWriteStats) :
|
||||
that.partitionToWriteStats == null;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Manages the commit meta and provides operations on the commit timeline
|
||||
*/
|
||||
public class HoodieCommits implements Serializable {
|
||||
|
||||
private List<String> commitList;
|
||||
|
||||
public HoodieCommits(List<String> commitList) {
|
||||
this.commitList = new ArrayList<>(commitList);
|
||||
Collections.sort(this.commitList);
|
||||
this.commitList = Collections.unmodifiableList(this.commitList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the commits which are in the range (startsTs, endTs].
|
||||
*
|
||||
* @param startTs - exclusive start commit ts
|
||||
* @param endTs - inclusive end commit ts
|
||||
*/
|
||||
public List<String> findCommitsInRange(String startTs, String endTs) {
|
||||
if (commitList.isEmpty()) {
|
||||
return Collections.EMPTY_LIST;
|
||||
}
|
||||
int startIndex = 0;
|
||||
if (startTs != null) {
|
||||
startIndex = Collections.binarySearch(commitList, startTs);
|
||||
// If startIndex is negative
|
||||
if (startIndex < 0) {
|
||||
startIndex = -(startIndex + 1);
|
||||
}
|
||||
}
|
||||
|
||||
int endIndex = Collections.binarySearch(commitList, endTs);
|
||||
// If endIndex is negative
|
||||
if (endIndex < 0) {
|
||||
endIndex = -(endIndex + 1);
|
||||
}
|
||||
|
||||
if (endIndex < startIndex) {
|
||||
throw new IllegalArgumentException(
|
||||
"Start Commit Ts " + startTs + " cannot be less than end commit ts" + endTs);
|
||||
}
|
||||
List<String> returns = new ArrayList<>(commitList.subList(startIndex, endIndex));
|
||||
if(endIndex < commitList.size()) {
|
||||
// Be inclusive of the endIndex
|
||||
returns.add(commitList.get(endIndex));
|
||||
}
|
||||
return Collections.unmodifiableList(returns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the list of commits on or before asOfTs
|
||||
*/
|
||||
public List<String> findCommitsAfter(String commitTimeStamp, int numCommits) {
|
||||
if (commitList.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int startIndex = Collections.binarySearch(commitList, commitTimeStamp);
|
||||
if (startIndex < 0) {
|
||||
startIndex = -(startIndex + 1);
|
||||
} else {
|
||||
// we found asOfTs at startIndex. We want to exclude it.
|
||||
startIndex++;
|
||||
}
|
||||
|
||||
|
||||
List<String> commits = new ArrayList<>();
|
||||
while (numCommits > 0 && startIndex < commitList.size()) {
|
||||
commits.add(commitList.get(startIndex));
|
||||
startIndex++;
|
||||
numCommits--;
|
||||
}
|
||||
|
||||
return Collections.unmodifiableList(commits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieCommits{");
|
||||
sb.append("commitList=").append(commitList);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return commitList.isEmpty();
|
||||
}
|
||||
|
||||
public int getNumCommits() {
|
||||
return commitList.size();
|
||||
}
|
||||
|
||||
public String firstCommit() {
|
||||
return commitList.isEmpty() ? null : commitList.get(0);
|
||||
}
|
||||
|
||||
public String nthCommit(int n) {
|
||||
return commitList.isEmpty() || n >= commitList.size() ? null : commitList.get(n);
|
||||
}
|
||||
|
||||
public String lastCommit() {
|
||||
return commitList.isEmpty() ? null : commitList.get(commitList.size() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the nth commit from the latest commit such that lastCommit(0) => lastCommit()
|
||||
*/
|
||||
public String lastCommit(int n) {
|
||||
if (commitList.size() < n + 1) {
|
||||
return null;
|
||||
}
|
||||
return commitList.get(commitList.size() - 1 - n);
|
||||
}
|
||||
|
||||
public boolean contains(String commitTs) {
|
||||
return commitList.contains(commitTs);
|
||||
}
|
||||
|
||||
public String max(String commit1, String commit2) {
|
||||
if (commit1 == null && commit2 == null) {
|
||||
return null;
|
||||
}
|
||||
if (commit1 == null) {
|
||||
return commit2;
|
||||
}
|
||||
if (commit2 == null) {
|
||||
return commit1;
|
||||
}
|
||||
return (isCommit1BeforeOrOn(commit1, commit2) ? commit2 : commit1);
|
||||
}
|
||||
|
||||
public static boolean isCommit1BeforeOrOn(String commit1, String commit2) {
|
||||
return commit1.compareTo(commit2) <= 0;
|
||||
}
|
||||
|
||||
public static boolean isCommit1After(String commit1, String commit2) {
|
||||
return commit1.compareTo(commit2) > 0;
|
||||
}
|
||||
|
||||
public List<String> getCommitList() {
|
||||
return commitList;
|
||||
}
|
||||
|
||||
public boolean isCommitBeforeEarliestCommit(String commitTs) {
|
||||
return isCommit1BeforeOrOn(commitTs, firstCommit());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieCommits that = (HoodieCommits) o;
|
||||
|
||||
return commitList != null ? commitList.equals(that.commitList) : that.commitList == null;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return commitList != null ? commitList.hashCode() : 0;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
|
||||
public class HoodieFile {
|
||||
|
||||
private final FileStatus fileStatus;
|
||||
private String fileNameWithoutCommitTs;
|
||||
private String commitTs;
|
||||
|
||||
public HoodieFile(FileStatus fileStatus) {
|
||||
this.fileStatus = fileStatus;
|
||||
String fileName = fileStatus.getPath().getName();
|
||||
this.fileNameWithoutCommitTs = FSUtils.getFileId(fileName);
|
||||
this.commitTs = FSUtils.getCommitTime(fileName);
|
||||
}
|
||||
|
||||
public String getFileNameWithoutCommitTs() {
|
||||
return fileNameWithoutCommitTs;
|
||||
}
|
||||
|
||||
public String getCommitTs() {
|
||||
return commitTs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieFile{");
|
||||
sb.append("fileStatus=").append(fileStatus);
|
||||
sb.append(", fileNameWithoutCommitTs='").append(fileNameWithoutCommitTs).append('\'');
|
||||
sb.append(", commitTs='").append(commitTs).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public FileStatus getFileStatus() {
|
||||
return fileStatus;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* HoodieKey consists of
|
||||
*
|
||||
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
|
||||
* partition that contains the record
|
||||
*/
|
||||
public class HoodieKey implements Serializable {
|
||||
|
||||
|
||||
private final String recordKey;
|
||||
|
||||
private final String partitionPath;
|
||||
|
||||
public HoodieKey(String recordKey, String partitionPath) {
|
||||
this.recordKey = recordKey;
|
||||
this.partitionPath = partitionPath;
|
||||
}
|
||||
|
||||
public String getRecordKey() {
|
||||
return recordKey;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
return partitionPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieKey otherKey = (HoodieKey) o;
|
||||
return Objects.equal(recordKey, otherKey.recordKey) &&
|
||||
Objects.equal(partitionPath, otherKey.partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(recordKey, partitionPath);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieKey {");
|
||||
sb.append(" recordKey=").append(recordKey);
|
||||
sb.append(" partitionPath=").append(partitionPath);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* A Single Record managed by Hoodie TODO - Make this generic
|
||||
*/
|
||||
public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
|
||||
|
||||
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
|
||||
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
|
||||
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
|
||||
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
|
||||
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
|
||||
|
||||
/**
|
||||
* Identifies the record across the table
|
||||
*/
|
||||
private HoodieKey key;
|
||||
|
||||
/**
|
||||
* Actual payload of the record
|
||||
*/
|
||||
private T data;
|
||||
|
||||
/**
|
||||
* Current location of record on storage. Filled in by looking up index
|
||||
*/
|
||||
private HoodieRecordLocation currentLocation;
|
||||
|
||||
/**
|
||||
* New location of record on storage, after written
|
||||
*/
|
||||
private HoodieRecordLocation newLocation;
|
||||
|
||||
public HoodieRecord(HoodieKey key, T data) {
|
||||
this.key = key;
|
||||
this.data = data;
|
||||
this.currentLocation = null;
|
||||
this.newLocation = null;
|
||||
}
|
||||
|
||||
public HoodieKey getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public T getData() {
|
||||
if (data == null) {
|
||||
throw new IllegalStateException("Payload already deflated for record.");
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the actual payload, to ease memory pressure. To be called after the record
|
||||
* has been written to storage. Once deflated, cannot be inflated.
|
||||
*/
|
||||
public void deflate() {
|
||||
this.data = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the current currentLocation of the record. This should happen exactly-once
|
||||
*/
|
||||
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
|
||||
assert currentLocation == null;
|
||||
this.currentLocation = location;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieRecordLocation getCurrentLocation() {
|
||||
return currentLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the new currentLocation of the record, after being written. This again should happen
|
||||
* exactly-once.
|
||||
*/
|
||||
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
|
||||
assert newLocation == null;
|
||||
this.newLocation = location;
|
||||
return this;
|
||||
}
|
||||
|
||||
public HoodieRecordLocation getNewLocation() {
|
||||
return this.newLocation;
|
||||
}
|
||||
|
||||
public boolean isCurrentLocationKnown() {
|
||||
return this.currentLocation != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieRecord that = (HoodieRecord) o;
|
||||
return Objects.equal(key, that.key) &&
|
||||
Objects.equal(data, that.data) &&
|
||||
Objects.equal(currentLocation, that.currentLocation) &&
|
||||
Objects.equal(newLocation, that.newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(key, data, currentLocation, newLocation);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecord{");
|
||||
sb.append("key=").append(key);
|
||||
sb.append(", currentLocation='").append(currentLocation).append('\'');
|
||||
sb.append(", newLocation='").append(newLocation).append('\'');
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
|
||||
return commitTime + "_" + partitionId + "_" + recordIndex;
|
||||
}
|
||||
|
||||
public String getPartitionPath() {
|
||||
assert key != null;
|
||||
return key.getPartitionPath();
|
||||
}
|
||||
|
||||
public String getRecordKey() {
|
||||
assert key != null;
|
||||
return key.getRecordKey();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Location of a HoodieRecord within the parition it belongs to. Ultimately, this points to an
|
||||
* actual file on disk
|
||||
*/
|
||||
public class HoodieRecordLocation implements Serializable {
|
||||
|
||||
private final String commitTime;
|
||||
private final String fileId;
|
||||
|
||||
public HoodieRecordLocation(String commitTime, String fileId) {
|
||||
this.commitTime = commitTime;
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
|
||||
return Objects.equal(commitTime, otherLoc.commitTime) &&
|
||||
Objects.equal(fileId, otherLoc.fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(commitTime, fileId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
|
||||
sb.append("commitTime=").append(commitTime).append(", ");
|
||||
sb.append("fileId=").append(fileId);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getCommitTime() {
|
||||
return commitTime;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.generic.IndexedRecord;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
|
||||
* This abstracts out callbacks which depend on record specific logic
|
||||
*/
|
||||
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
|
||||
/**
|
||||
* When more than one HoodieRecord have the same HoodieKey, this function combines them
|
||||
* before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
|
||||
*/
|
||||
T preCombine(T another);
|
||||
|
||||
/**
|
||||
*
|
||||
* This methods lets you write custom merging/combining logic to produce new values
|
||||
* as a function of current value on storage and whats contained in this object.
|
||||
*
|
||||
* eg:
|
||||
* 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
|
||||
* 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
|
||||
*
|
||||
* @param currentValue Current value in storage, to merge/combine this payload with
|
||||
* @param schema Schema used for record
|
||||
* @return new combined/merged value to be written back to storage
|
||||
*/
|
||||
IndexedRecord combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
|
||||
|
||||
/**
|
||||
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
|
||||
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
|
||||
* storage to be combined against. (i.e insert)
|
||||
*/
|
||||
IndexedRecord getInsertValue(Schema schema) throws IOException;
|
||||
}
|
||||
@@ -0,0 +1,480 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import com.uber.hoodie.exception.DatasetNotFoundException;
|
||||
import com.uber.hoodie.exception.HoodieIOException;
|
||||
import com.uber.hoodie.exception.InvalidDatasetException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.PathFilter;
|
||||
import org.apache.log4j.LogManager;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Manages all file system level interactions for the Hoodie tables.
|
||||
*/
|
||||
public class HoodieTableMetadata implements Serializable {
|
||||
public static final String MAX_COMMIT_TS = String.valueOf(Long.MAX_VALUE);
|
||||
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
|
||||
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
|
||||
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
|
||||
|
||||
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
|
||||
private static final String HOODIE_HDRONE_PROFILE_DEFAULT_VALUE = "HOODIE";
|
||||
private static final java.lang.String HOODIE_HDRONE_PROFILE_PROP_NAME =
|
||||
"hoodie.hdrone.dataset.profile";
|
||||
|
||||
private static Logger log = LogManager.getLogger(HoodieTableMetadata.class);
|
||||
private transient final FileSystem fs;
|
||||
private transient final Path metadataFolder;
|
||||
private final Properties properties;
|
||||
private HoodieCommits commits;
|
||||
private List<String> inflightCommits;
|
||||
private String basePath;
|
||||
|
||||
public static final String METAFOLDER_NAME = ".hoodie";
|
||||
public static final String COMMIT_FILE_SUFFIX = ".commit";
|
||||
public static final String INFLIGHT_FILE_SUFFIX = ".inflight";
|
||||
|
||||
/**
|
||||
* Constructor which initializes the hoodie table metadata. It will initialize the meta-data if not already present.
|
||||
*
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @param tableName
|
||||
* @throws IOException
|
||||
*/
|
||||
public HoodieTableMetadata(FileSystem fs, String basePath, String tableName) {
|
||||
this(fs, basePath, tableName, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor which loads the hoodie table metadata, It requires the meta-data to be present already
|
||||
* @param fs
|
||||
* @param basePath
|
||||
* @throws IOException
|
||||
*/
|
||||
public HoodieTableMetadata(FileSystem fs, String basePath) {
|
||||
this(fs, basePath, null, false);
|
||||
}
|
||||
|
||||
private HoodieTableMetadata(FileSystem fs, String basePath, String tableName,
|
||||
boolean initOnMissing) {
|
||||
this.fs = fs;
|
||||
this.basePath = basePath;
|
||||
|
||||
try {
|
||||
Path basePathDir = new Path(this.basePath);
|
||||
if (!fs.exists(basePathDir)) {
|
||||
if (initOnMissing) {
|
||||
fs.mkdirs(basePathDir);
|
||||
} else {
|
||||
throw new DatasetNotFoundException(this.basePath);
|
||||
}
|
||||
}
|
||||
|
||||
if (!fs.isDirectory(new Path(basePath))) {
|
||||
throw new DatasetNotFoundException(this.basePath);
|
||||
}
|
||||
|
||||
// create .hoodie folder if it does not exist.
|
||||
this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME);
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
if (!fs.exists(propertyPath)) {
|
||||
if (initOnMissing) {
|
||||
createHoodieProperties(metadataFolder, tableName);
|
||||
} else {
|
||||
throw new InvalidDatasetException(this.basePath);
|
||||
}
|
||||
}
|
||||
|
||||
// Load meta data
|
||||
this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX));
|
||||
this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX);
|
||||
this.properties = readHoodieProperties();
|
||||
log.info("All commits :" + commits);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the commit metadata for this table. Reads all the commit files from HDFS.
|
||||
* Expensive operation, use with caution.
|
||||
*
|
||||
* @return SortedMap of CommitTime,<class>HoodieCommitMetadata</class>
|
||||
* @throws IOException
|
||||
*/
|
||||
public SortedMap<String, HoodieCommitMetadata> getAllCommitMetadata() {
|
||||
try {
|
||||
TreeMap<String, HoodieCommitMetadata> metadataMap = new TreeMap<>();
|
||||
for (String commitTs : commits.getCommitList()) {
|
||||
metadataMap.put(commitTs, getCommitMetadata(commitTs));
|
||||
}
|
||||
return Collections.unmodifiableSortedMap(metadataMap);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not load all commits for table " + getTableName(),
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieCommitMetadata getCommitMetadata(String commitTime) throws IOException {
|
||||
FSDataInputStream is = fs.open(new Path(metadataFolder, FSUtils.makeCommitFileName(commitTime)));
|
||||
try {
|
||||
String jsonStr = IOUtils.toString(is);
|
||||
return HoodieCommitMetadata.fromJsonString(jsonStr);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
public HoodieTableType getTableType() {
|
||||
return HoodieTableType.valueOf(properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
|
||||
}
|
||||
|
||||
/**
|
||||
* Lookup the file name for specified <code>HoodieRecord</code>
|
||||
* <p/>
|
||||
* TODO(vc): This metadata needs to be cached in each executor, statically, and used across, if
|
||||
* we need to be nicer to the NameNode
|
||||
*/
|
||||
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record) {
|
||||
String fileId = record.getCurrentLocation().getFileId();
|
||||
return getFilenameForRecord(fs, record, fileId);
|
||||
}
|
||||
|
||||
|
||||
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record, String fileId) {
|
||||
try {
|
||||
FileStatus[] files = fs.listStatus(new Path(basePath, record.getPartitionPath()));
|
||||
Map<String, List<FileStatus>> fileIdToVersions =
|
||||
groupFilesByFileId(files, commits.lastCommit());
|
||||
// If the record is not found
|
||||
if(!fileIdToVersions.containsKey(fileId)) {
|
||||
throw new FileNotFoundException("Cannot find valid versions for fileId " + fileId);
|
||||
}
|
||||
|
||||
List<FileStatus> statuses = fileIdToVersions.get(fileId);
|
||||
return statuses.get(0).getPath().getName();
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not get Filename for record " + record, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Get only the latest file in the partition with precondition commitTime(file) < maxCommitTime
|
||||
*
|
||||
* @param fs
|
||||
* @param partitionPathStr
|
||||
* @param maxCommitTime
|
||||
* @return
|
||||
*/
|
||||
public FileStatus[] getLatestVersionInPartition(FileSystem fs, String partitionPathStr,
|
||||
String maxCommitTime) {
|
||||
try {
|
||||
Path partitionPath = new Path(basePath, partitionPathStr);
|
||||
if(!fs.exists(partitionPath)) {
|
||||
return new FileStatus[0];
|
||||
}
|
||||
FileStatus[] files = fs.listStatus(partitionPath);
|
||||
Map<String, List<FileStatus>> fileIdToVersions =
|
||||
groupFilesByFileId(files, commits.lastCommit());
|
||||
HashMap<String, FileStatus> validFiles = new HashMap<>();
|
||||
for (String fileId : fileIdToVersions.keySet()) {
|
||||
List<FileStatus> versions = fileIdToVersions.get(fileId);
|
||||
for (FileStatus file : versions) {
|
||||
String filename = file.getPath().getName();
|
||||
String commitTime = FSUtils.getCommitTime(filename);
|
||||
if (HoodieCommits.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
|
||||
validFiles.put(fileId, file);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return validFiles.values().toArray(new FileStatus[validFiles.size()]);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not get latest versions in Partition " + partitionPathStr, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get ALL the data files in partition grouped by fileId and sorted by the commitTime
|
||||
* Given a partition path, provide all the files with a list of their commits, sorted by commit time.
|
||||
*/
|
||||
public Map<String, List<FileStatus>> getAllVersionsInPartition(FileSystem fs, String partitionPath) {
|
||||
try {
|
||||
FileStatus[] files = fs.listStatus(new Path(basePath, partitionPath));
|
||||
return groupFilesByFileId(files, commits.lastCommit());
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException(
|
||||
"Could not load all file versions in partition " + partitionPath, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the versions of files, within the commit range provided.
|
||||
*
|
||||
* @param commitsToReturn - commits to include
|
||||
*/
|
||||
public FileStatus[] getLatestVersionInRange(FileStatus[] fileStatuses, List<String> commitsToReturn) {
|
||||
if (commitsToReturn.isEmpty()) {
|
||||
return new FileStatus[0];
|
||||
}
|
||||
try {
|
||||
Map<String, List<FileStatus>> fileIdToVersions =
|
||||
groupFilesByFileId(fileStatuses, commits.lastCommit());
|
||||
|
||||
List<FileStatus> statuses = new ArrayList<>();
|
||||
for (List<FileStatus> entry : fileIdToVersions.values()) {
|
||||
for (FileStatus status : entry) {
|
||||
String commitTime = FSUtils.getCommitTime(status.getPath().getName());
|
||||
if (commitsToReturn.contains(commitTime)) {
|
||||
statuses.add(status);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return statuses.toArray(new FileStatus[statuses.size()]);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not filter files from commits " + commitsToReturn, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Get the latest versions of all the files.
|
||||
*
|
||||
* @param fileStatuses
|
||||
* @return
|
||||
*/
|
||||
public FileStatus[] getLatestVersions(FileStatus[] fileStatuses) {
|
||||
try {
|
||||
Map<String, List<FileStatus>> fileIdToVersions =
|
||||
groupFilesByFileId(fileStatuses, commits.lastCommit());
|
||||
|
||||
List<FileStatus> statuses = new ArrayList<>();
|
||||
for(List<FileStatus> entry:fileIdToVersions.values()) {
|
||||
// first file is the latest one
|
||||
statuses.add(entry.get(0));
|
||||
}
|
||||
return statuses.toArray(new FileStatus[statuses.size()]);
|
||||
} catch (IOException e) {
|
||||
throw new HoodieIOException("Could not filter files for latest version ", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the base path for the Hoodie Table
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getBasePath() {
|
||||
return basePath;
|
||||
}
|
||||
|
||||
|
||||
public boolean isCommitsEmpty() {
|
||||
return commits.isEmpty();
|
||||
}
|
||||
|
||||
public boolean isCommitTsSafe(String commitTs) {
|
||||
return !isCommitsEmpty() && (commits.isCommitBeforeEarliestCommit(commitTs) || commits
|
||||
.contains(commitTs));
|
||||
}
|
||||
|
||||
public List<String> findCommitsSinceTs(String startTs) {
|
||||
return commits.findCommitsInRange(startTs, MAX_COMMIT_TS);
|
||||
}
|
||||
|
||||
public List<String> findCommitsInRange(String startTs, String endTs) {
|
||||
return commits.findCommitsInRange(startTs, endTs);
|
||||
}
|
||||
|
||||
public List<String> findCommitsAfter(String startTs, Integer maxCommits) {
|
||||
return commits.findCommitsAfter(startTs, maxCommits);
|
||||
}
|
||||
|
||||
public HoodieCommits getAllCommits() {
|
||||
return commits;
|
||||
}
|
||||
|
||||
public List<String> getAllInflightCommits() {
|
||||
return inflightCommits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("HoodieTableMetadata{");
|
||||
sb.append("commits=").append(commits);
|
||||
sb.append('}');
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return properties.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
|
||||
}
|
||||
|
||||
public String getHDroneDatasetProfile() {
|
||||
return properties.getProperty(HOODIE_HDRONE_PROFILE_PROP_NAME, HOODIE_HDRONE_PROFILE_DEFAULT_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
|
||||
*
|
||||
* @param metadataFolder
|
||||
* @param tableName
|
||||
* @throws IOException
|
||||
*/
|
||||
private void createHoodieProperties(Path metadataFolder, String tableName) throws IOException {
|
||||
if (!fs.exists(metadataFolder)) {
|
||||
fs.mkdirs(metadataFolder);
|
||||
}
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
FSDataOutputStream outputStream = fs.create(propertyPath);
|
||||
try {
|
||||
Properties props = new Properties();
|
||||
props.setProperty(HOODIE_TABLE_NAME_PROP_NAME, tableName);
|
||||
props.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
|
||||
props
|
||||
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
|
||||
} finally {
|
||||
outputStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the hoodie table properties from the hoodie.properties file under the .hoodie path
|
||||
*/
|
||||
private Properties readHoodieProperties() throws IOException {
|
||||
Properties props = new Properties();
|
||||
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
|
||||
FSDataInputStream inputStream = fs.open(propertyPath);
|
||||
try {
|
||||
props.load(inputStream);
|
||||
} finally {
|
||||
inputStream.close();
|
||||
}
|
||||
return props;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan the commit times (only choosing commit file with the given suffix)
|
||||
*/
|
||||
private List<String> scanCommits(final String commitFileSuffix) throws IOException {
|
||||
log.info("Attempting to load the commits under " + metadataFolder + " with suffix " + commitFileSuffix);
|
||||
final List<String> commitFiles = new ArrayList<>();
|
||||
fs.listStatus(metadataFolder, new PathFilter() {
|
||||
@Override
|
||||
public boolean accept(Path path) {
|
||||
if (path.getName().endsWith(commitFileSuffix)) {
|
||||
commitFiles.add(path.getName().split("\\.")[0]);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
return commitFiles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a bunch of file versions, and returns a map keyed by fileId, with the necessary
|
||||
* version safety checking. Returns a map of commitTime and Sorted list of FileStats
|
||||
* ( by reverse commit time )
|
||||
*
|
||||
* @param maxCommitTime maximum permissible commit time
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Map<String, List<FileStatus>> groupFilesByFileId(FileStatus[] files,
|
||||
String maxCommitTime) throws IOException {
|
||||
HashMap<String, List<FileStatus>> fileIdtoVersions = new HashMap<>();
|
||||
for (FileStatus file : files) {
|
||||
String filename = file.getPath().getName();
|
||||
String fileId = FSUtils.getFileId(filename);
|
||||
String commitTime = FSUtils.getCommitTime(filename);
|
||||
if (isCommitTsSafe(commitTime) && HoodieCommits
|
||||
.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
|
||||
if (!fileIdtoVersions.containsKey(fileId)) {
|
||||
fileIdtoVersions.put(fileId, new ArrayList<FileStatus>());
|
||||
}
|
||||
fileIdtoVersions.get(fileId).add(file);
|
||||
}
|
||||
}
|
||||
for (Map.Entry<String, List<FileStatus>> entry : fileIdtoVersions.entrySet()) {
|
||||
Collections.sort(fileIdtoVersions.get(entry.getKey()), new Comparator<FileStatus>() {
|
||||
@Override
|
||||
public int compare(FileStatus o1, FileStatus o2) {
|
||||
String o1CommitTime = FSUtils.getCommitTime(o1.getPath().getName());
|
||||
String o2CommitTime = FSUtils.getCommitTime(o2.getPath().getName());
|
||||
// Reverse the order
|
||||
return o2CommitTime.compareTo(o1CommitTime);
|
||||
}
|
||||
});
|
||||
}
|
||||
return fileIdtoVersions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieTableMetadata metadata = (HoodieTableMetadata) o;
|
||||
|
||||
if (commits != null ? !commits.equals(metadata.commits) : metadata.commits != null)
|
||||
return false;
|
||||
return basePath != null ? basePath.equals(metadata.basePath) : metadata.basePath == null;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = commits != null ? commits.hashCode() : 0;
|
||||
result = 31 * result + (basePath != null ? basePath.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
/**
|
||||
* Type of the Hoodie Table.
|
||||
*
|
||||
* Currently, 1 type is supported
|
||||
*
|
||||
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer
|
||||
* value of a record.
|
||||
*
|
||||
* In the future, following might be added.
|
||||
*
|
||||
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
|
||||
*
|
||||
* SIMPLE_LSM - A simple 2 level LSM tree.
|
||||
*/
|
||||
public enum HoodieTableType {
|
||||
COPY_ON_WRITE
|
||||
}
|
||||
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.uber.hoodie.common.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.uber.hoodie.common.util.FSUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Statistics about a single Hoodie write operation.
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class HoodieWriteStat implements Serializable {
|
||||
|
||||
public static final String NULL_COMMIT = "null";
|
||||
|
||||
/**
|
||||
* Id of the file being written
|
||||
*/
|
||||
private String fileId;
|
||||
|
||||
/**
|
||||
* Full path to the file on underlying file system
|
||||
*/
|
||||
private String fullPath;
|
||||
|
||||
/**
|
||||
* The previous version of the file. (null if this is the first version. i.e insert)
|
||||
*/
|
||||
private String prevCommit;
|
||||
|
||||
/**
|
||||
* Total number of records written for this file.
|
||||
* - for updates, its the entire number of records in the file
|
||||
* - for inserts, its the actual number of records inserted.
|
||||
*/
|
||||
private long numWrites;
|
||||
|
||||
/**
|
||||
* Total number of records actually changed. (0 for inserts)
|
||||
*/
|
||||
private long numUpdateWrites;
|
||||
|
||||
/**
|
||||
* Total size of file written
|
||||
*/
|
||||
private long totalWriteBytes;
|
||||
|
||||
/**
|
||||
* Total number of records, that were n't able to be written due to errors.
|
||||
*/
|
||||
private long totalWriteErrors;
|
||||
|
||||
public HoodieWriteStat() {
|
||||
// called by jackson json lib
|
||||
}
|
||||
|
||||
public void setFileId(String fileId) {
|
||||
this.fileId = fileId;
|
||||
}
|
||||
|
||||
public void setFullPath(String fullFilePath) {
|
||||
this.fullPath = fullFilePath;
|
||||
}
|
||||
|
||||
public void setPrevCommit(String prevCommit) {
|
||||
this.prevCommit = prevCommit;
|
||||
}
|
||||
|
||||
public void setNumWrites(long numWrites) {
|
||||
this.numWrites = numWrites;
|
||||
}
|
||||
|
||||
public void setNumUpdateWrites(long numUpdateWrites) {
|
||||
this.numUpdateWrites = numUpdateWrites;
|
||||
}
|
||||
|
||||
public long getTotalWriteBytes() {
|
||||
return totalWriteBytes;
|
||||
}
|
||||
|
||||
public void setTotalWriteBytes(long totalWriteBytes) {
|
||||
this.totalWriteBytes = totalWriteBytes;
|
||||
}
|
||||
|
||||
public long getTotalWriteErrors() { return totalWriteErrors; }
|
||||
|
||||
public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
|
||||
|
||||
public String getPrevCommit() {
|
||||
return prevCommit;
|
||||
}
|
||||
|
||||
public long getNumWrites() {
|
||||
return numWrites;
|
||||
}
|
||||
|
||||
public long getNumUpdateWrites() {
|
||||
return numUpdateWrites;
|
||||
}
|
||||
|
||||
public String getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
public String getFullPath() {
|
||||
return fullPath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringBuilder()
|
||||
.append("HoodieWriteStat {")
|
||||
.append("fullPath='" + fullPath + '\'')
|
||||
.append(", prevCommit='" + prevCommit + '\'')
|
||||
.append(", numWrites=" + numWrites)
|
||||
.append(", numUpdateWrites=" + numUpdateWrites)
|
||||
.append(", numWriteBytes=" + totalWriteBytes)
|
||||
.append('}')
|
||||
.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (o == null || getClass() != o.getClass())
|
||||
return false;
|
||||
|
||||
HoodieWriteStat that = (HoodieWriteStat) o;
|
||||
if (!fullPath.equals(that.fullPath))
|
||||
return false;
|
||||
return prevCommit.equals(that.prevCommit);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = fullPath.hashCode();
|
||||
result = 31 * result + prevCommit.hashCode();
|
||||
return result;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user