1
0

Import from Hoodie private repo: Part 1

This commit is contained in:
Prasanna Rajaperumal
2016-12-16 14:03:59 -08:00
commit 0512da094b
56 changed files with 8868 additions and 0 deletions

View File

@@ -0,0 +1,190 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.codehaus.jackson.annotate.JsonAutoDetect;
import org.codehaus.jackson.annotate.JsonMethod;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* All the metadata that gets stored along with a commit.
*/
public class HoodieCommitMetadata implements Serializable {
private static volatile Logger log = LogManager.getLogger(HoodieCommitMetadata.class);
private HashMap<String, List<HoodieWriteStat>> partitionToWriteStats;
public HoodieCommitMetadata() {
partitionToWriteStats = new HashMap<>();
}
public void addWriteStat(String partitionPath, HoodieWriteStat stat) {
if (!partitionToWriteStats.containsKey(partitionPath)) {
partitionToWriteStats.put(partitionPath, new ArrayList<HoodieWriteStat>());
}
partitionToWriteStats.get(partitionPath).add(stat);
}
public List<HoodieWriteStat> getWriteStats(String partitionPath) {
return partitionToWriteStats.get(partitionPath);
}
public HashMap<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
return partitionToWriteStats;
}
public HashMap<String, String> getFileIdAndFullPaths() {
HashMap<String, String> filePaths = new HashMap<>();
// list all partitions paths
for (Map.Entry<String, List<HoodieWriteStat>> entry: getPartitionToWriteStats().entrySet()) {
for (HoodieWriteStat stat: entry.getValue()) {
filePaths.put(stat.getFileId(), stat.getFullPath());
}
}
return filePaths;
}
public String toJsonString() throws IOException {
if(partitionToWriteStats.containsKey(null)) {
log.info("partition path is null for " + partitionToWriteStats.get(null));
partitionToWriteStats.remove(null);
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.defaultPrettyPrintingWriter().writeValueAsString(this);
}
public static HoodieCommitMetadata fromJsonString(String jsonStr) throws IOException {
if (jsonStr == null || jsonStr.isEmpty()) {
// For empty commit file (no data or somethings bad happen).
return new HoodieCommitMetadata();
}
ObjectMapper mapper = new ObjectMapper();
mapper.setVisibility(JsonMethod.FIELD, JsonAutoDetect.Visibility.ANY);
return mapper.readValue(jsonStr, HoodieCommitMetadata.class);
}
// Here the functions are named "fetch" instead of "get", to get avoid of the json conversion.
public long fetchTotalPartitionsWritten() {
return partitionToWriteStats.size();
}
public long fetchTotalFilesInsert() {
long totalFilesInsert = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalFilesInsert ++;
}
}
}
return totalFilesInsert;
}
public long fetchTotalFilesUpdated() {
long totalFilesUpdated = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) {
totalFilesUpdated ++;
}
}
}
return totalFilesUpdated;
}
public long fetchTotalUpdateRecordsWritten() {
long totalUpdateRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalUpdateRecordsWritten += stat.getNumUpdateWrites();
}
}
return totalUpdateRecordsWritten;
}
public long fetchTotalInsertRecordsWritten() {
long totalInsertRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) {
totalInsertRecordsWritten += stat.getNumWrites();
}
}
}
return totalInsertRecordsWritten;
}
public long fetchTotalRecordsWritten() {
long totalRecordsWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalRecordsWritten += stat.getNumWrites();
}
}
return totalRecordsWritten;
}
public long fetchTotalBytesWritten() {
long totalBytesWritten = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalBytesWritten += stat.getTotalWriteBytes();
}
}
return totalBytesWritten;
}
public long fetchTotalWriteErrors() {
long totalWriteErrors = 0;
for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) {
for (HoodieWriteStat stat : stats) {
totalWriteErrors += stat.getTotalWriteErrors();
}
}
return totalWriteErrors;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieCommitMetadata that = (HoodieCommitMetadata) o;
return partitionToWriteStats != null ?
partitionToWriteStats.equals(that.partitionToWriteStats) :
that.partitionToWriteStats == null;
}
@Override
public int hashCode() {
return partitionToWriteStats != null ? partitionToWriteStats.hashCode() : 0;
}
}

View File

@@ -0,0 +1,191 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* Manages the commit meta and provides operations on the commit timeline
*/
public class HoodieCommits implements Serializable {
private List<String> commitList;
public HoodieCommits(List<String> commitList) {
this.commitList = new ArrayList<>(commitList);
Collections.sort(this.commitList);
this.commitList = Collections.unmodifiableList(this.commitList);
}
/**
* Returns the commits which are in the range (startsTs, endTs].
*
* @param startTs - exclusive start commit ts
* @param endTs - inclusive end commit ts
*/
public List<String> findCommitsInRange(String startTs, String endTs) {
if (commitList.isEmpty()) {
return Collections.EMPTY_LIST;
}
int startIndex = 0;
if (startTs != null) {
startIndex = Collections.binarySearch(commitList, startTs);
// If startIndex is negative
if (startIndex < 0) {
startIndex = -(startIndex + 1);
}
}
int endIndex = Collections.binarySearch(commitList, endTs);
// If endIndex is negative
if (endIndex < 0) {
endIndex = -(endIndex + 1);
}
if (endIndex < startIndex) {
throw new IllegalArgumentException(
"Start Commit Ts " + startTs + " cannot be less than end commit ts" + endTs);
}
List<String> returns = new ArrayList<>(commitList.subList(startIndex, endIndex));
if(endIndex < commitList.size()) {
// Be inclusive of the endIndex
returns.add(commitList.get(endIndex));
}
return Collections.unmodifiableList(returns);
}
/**
* Finds the list of commits on or before asOfTs
*/
public List<String> findCommitsAfter(String commitTimeStamp, int numCommits) {
if (commitList.isEmpty()) {
return null;
}
int startIndex = Collections.binarySearch(commitList, commitTimeStamp);
if (startIndex < 0) {
startIndex = -(startIndex + 1);
} else {
// we found asOfTs at startIndex. We want to exclude it.
startIndex++;
}
List<String> commits = new ArrayList<>();
while (numCommits > 0 && startIndex < commitList.size()) {
commits.add(commitList.get(startIndex));
startIndex++;
numCommits--;
}
return Collections.unmodifiableList(commits);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieCommits{");
sb.append("commitList=").append(commitList);
sb.append('}');
return sb.toString();
}
public boolean isEmpty() {
return commitList.isEmpty();
}
public int getNumCommits() {
return commitList.size();
}
public String firstCommit() {
return commitList.isEmpty() ? null : commitList.get(0);
}
public String nthCommit(int n) {
return commitList.isEmpty() || n >= commitList.size() ? null : commitList.get(n);
}
public String lastCommit() {
return commitList.isEmpty() ? null : commitList.get(commitList.size() - 1);
}
/**
* Returns the nth commit from the latest commit such that lastCommit(0) => lastCommit()
*/
public String lastCommit(int n) {
if (commitList.size() < n + 1) {
return null;
}
return commitList.get(commitList.size() - 1 - n);
}
public boolean contains(String commitTs) {
return commitList.contains(commitTs);
}
public String max(String commit1, String commit2) {
if (commit1 == null && commit2 == null) {
return null;
}
if (commit1 == null) {
return commit2;
}
if (commit2 == null) {
return commit1;
}
return (isCommit1BeforeOrOn(commit1, commit2) ? commit2 : commit1);
}
public static boolean isCommit1BeforeOrOn(String commit1, String commit2) {
return commit1.compareTo(commit2) <= 0;
}
public static boolean isCommit1After(String commit1, String commit2) {
return commit1.compareTo(commit2) > 0;
}
public List<String> getCommitList() {
return commitList;
}
public boolean isCommitBeforeEarliestCommit(String commitTs) {
return isCommit1BeforeOrOn(commitTs, firstCommit());
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieCommits that = (HoodieCommits) o;
return commitList != null ? commitList.equals(that.commitList) : that.commitList == null;
}
@Override
public int hashCode() {
return commitList != null ? commitList.hashCode() : 0;
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import org.apache.hadoop.fs.FileStatus;
public class HoodieFile {
private final FileStatus fileStatus;
private String fileNameWithoutCommitTs;
private String commitTs;
public HoodieFile(FileStatus fileStatus) {
this.fileStatus = fileStatus;
String fileName = fileStatus.getPath().getName();
this.fileNameWithoutCommitTs = FSUtils.getFileId(fileName);
this.commitTs = FSUtils.getCommitTime(fileName);
}
public String getFileNameWithoutCommitTs() {
return fileNameWithoutCommitTs;
}
public String getCommitTs() {
return commitTs;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieFile{");
sb.append("fileStatus=").append(fileStatus);
sb.append(", fileNameWithoutCommitTs='").append(fileNameWithoutCommitTs).append('\'');
sb.append(", commitTs='").append(commitTs).append('\'');
sb.append('}');
return sb.toString();
}
public FileStatus getFileStatus() {
return fileStatus;
}
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* HoodieKey consists of
*
* - recordKey : a recordKey that acts as primary key for a record - partitionPath : path to the
* partition that contains the record
*/
public class HoodieKey implements Serializable {
private final String recordKey;
private final String partitionPath;
public HoodieKey(String recordKey, String partitionPath) {
this.recordKey = recordKey;
this.partitionPath = partitionPath;
}
public String getRecordKey() {
return recordKey;
}
public String getPartitionPath() {
return partitionPath;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieKey otherKey = (HoodieKey) o;
return Objects.equal(recordKey, otherKey.recordKey) &&
Objects.equal(partitionPath, otherKey.partitionPath);
}
@Override
public int hashCode() {
return Objects.hashCode(recordKey, partitionPath);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieKey {");
sb.append(" recordKey=").append(recordKey);
sb.append(" partitionPath=").append(partitionPath);
sb.append('}');
return sb.toString();
}
}

View File

@@ -0,0 +1,153 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* A Single Record managed by Hoodie TODO - Make this generic
*/
public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
public static String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
public static String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
public static String RECORD_KEY_METADATA_FIELD = "_hoodie_record_key";
public static String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
public static String FILENAME_METADATA_FIELD = "_hoodie_file_name";
/**
* Identifies the record across the table
*/
private HoodieKey key;
/**
* Actual payload of the record
*/
private T data;
/**
* Current location of record on storage. Filled in by looking up index
*/
private HoodieRecordLocation currentLocation;
/**
* New location of record on storage, after written
*/
private HoodieRecordLocation newLocation;
public HoodieRecord(HoodieKey key, T data) {
this.key = key;
this.data = data;
this.currentLocation = null;
this.newLocation = null;
}
public HoodieKey getKey() {
return key;
}
public T getData() {
if (data == null) {
throw new IllegalStateException("Payload already deflated for record.");
}
return data;
}
/**
* Release the actual payload, to ease memory pressure. To be called after the record
* has been written to storage. Once deflated, cannot be inflated.
*/
public void deflate() {
this.data = null;
}
/**
* Sets the current currentLocation of the record. This should happen exactly-once
*/
public HoodieRecord setCurrentLocation(HoodieRecordLocation location) {
assert currentLocation == null;
this.currentLocation = location;
return this;
}
public HoodieRecordLocation getCurrentLocation() {
return currentLocation;
}
/**
* Sets the new currentLocation of the record, after being written. This again should happen
* exactly-once.
*/
public HoodieRecord setNewLocation(HoodieRecordLocation location) {
assert newLocation == null;
this.newLocation = location;
return this;
}
public HoodieRecordLocation getNewLocation() {
return this.newLocation;
}
public boolean isCurrentLocationKnown() {
return this.currentLocation != null;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecord that = (HoodieRecord) o;
return Objects.equal(key, that.key) &&
Objects.equal(data, that.data) &&
Objects.equal(currentLocation, that.currentLocation) &&
Objects.equal(newLocation, that.newLocation);
}
@Override
public int hashCode() {
return Objects.hashCode(key, data, currentLocation, newLocation);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecord{");
sb.append("key=").append(key);
sb.append(", currentLocation='").append(currentLocation).append('\'');
sb.append(", newLocation='").append(newLocation).append('\'');
sb.append('}');
return sb.toString();
}
public static String generateSequenceId(String commitTime, int partitionId, long recordIndex) {
return commitTime + "_" + partitionId + "_" + recordIndex;
}
public String getPartitionPath() {
assert key != null;
return key.getPartitionPath();
}
public String getRecordKey() {
assert key != null;
return key.getRecordKey();
}
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.google.common.base.Objects;
import java.io.Serializable;
/**
* Location of a HoodieRecord within the parition it belongs to. Ultimately, this points to an
* actual file on disk
*/
public class HoodieRecordLocation implements Serializable {
private final String commitTime;
private final String fileId;
public HoodieRecordLocation(String commitTime, String fileId) {
this.commitTime = commitTime;
this.fileId = fileId;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieRecordLocation otherLoc = (HoodieRecordLocation) o;
return Objects.equal(commitTime, otherLoc.commitTime) &&
Objects.equal(fileId, otherLoc.fileId);
}
@Override
public int hashCode() {
return Objects.hashCode(commitTime, fileId);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieRecordLocation {");
sb.append("commitTime=").append(commitTime).append(", ");
sb.append("fileId=").append(fileId);
sb.append('}');
return sb.toString();
}
public String getCommitTime() {
return commitTime;
}
public String getFileId() {
return fileId;
}
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import java.io.IOException;
import java.io.Serializable;
/**
* Every Hoodie dataset has an implementation of the <code>HoodieRecordPayload</code>
* This abstracts out callbacks which depend on record specific logic
*/
public interface HoodieRecordPayload<T extends HoodieRecordPayload> extends Serializable {
/**
* When more than one HoodieRecord have the same HoodieKey, this function combines them
* before attempting to insert/upsert (if combining turned on in HoodieClientConfig)
*/
T preCombine(T another);
/**
*
* This methods lets you write custom merging/combining logic to produce new values
* as a function of current value on storage and whats contained in this object.
*
* eg:
* 1) You are updating counters, you may want to add counts to currentValue and write back updated counts
* 2) You may be reading DB redo logs, and merge them with current image for a database row on storage
*
* @param currentValue Current value in storage, to merge/combine this payload with
* @param schema Schema used for record
* @return new combined/merged value to be written back to storage
*/
IndexedRecord combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException;
/**
* Generates an avro record out of the given HoodieRecordPayload, to be written out to storage.
* Called when writing a new value for the given HoodieKey, wherein there is no existing record in
* storage to be combined against. (i.e insert)
*/
IndexedRecord getInsertValue(Schema schema) throws IOException;
}

View File

@@ -0,0 +1,480 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.uber.hoodie.common.util.FSUtils;
import com.uber.hoodie.exception.DatasetNotFoundException;
import com.uber.hoodie.exception.HoodieIOException;
import com.uber.hoodie.exception.InvalidDatasetException;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;
/**
* Manages all file system level interactions for the Hoodie tables.
*/
public class HoodieTableMetadata implements Serializable {
public static final String MAX_COMMIT_TS = String.valueOf(Long.MAX_VALUE);
public static final String HOODIE_TABLE_NAME_PROP_NAME = "hoodie.table.name";
public static final String HOODIE_TABLE_TYPE_PROP_NAME = "hoodie.table.type";
public static final HoodieTableType DEFAULT_TABLE_TYPE = HoodieTableType.COPY_ON_WRITE;
public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties";
private static final String HOODIE_HDRONE_PROFILE_DEFAULT_VALUE = "HOODIE";
private static final java.lang.String HOODIE_HDRONE_PROFILE_PROP_NAME =
"hoodie.hdrone.dataset.profile";
private static Logger log = LogManager.getLogger(HoodieTableMetadata.class);
private transient final FileSystem fs;
private transient final Path metadataFolder;
private final Properties properties;
private HoodieCommits commits;
private List<String> inflightCommits;
private String basePath;
public static final String METAFOLDER_NAME = ".hoodie";
public static final String COMMIT_FILE_SUFFIX = ".commit";
public static final String INFLIGHT_FILE_SUFFIX = ".inflight";
/**
* Constructor which initializes the hoodie table metadata. It will initialize the meta-data if not already present.
*
* @param fs
* @param basePath
* @param tableName
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath, String tableName) {
this(fs, basePath, tableName, true);
}
/**
* Constructor which loads the hoodie table metadata, It requires the meta-data to be present already
* @param fs
* @param basePath
* @throws IOException
*/
public HoodieTableMetadata(FileSystem fs, String basePath) {
this(fs, basePath, null, false);
}
private HoodieTableMetadata(FileSystem fs, String basePath, String tableName,
boolean initOnMissing) {
this.fs = fs;
this.basePath = basePath;
try {
Path basePathDir = new Path(this.basePath);
if (!fs.exists(basePathDir)) {
if (initOnMissing) {
fs.mkdirs(basePathDir);
} else {
throw new DatasetNotFoundException(this.basePath);
}
}
if (!fs.isDirectory(new Path(basePath))) {
throw new DatasetNotFoundException(this.basePath);
}
// create .hoodie folder if it does not exist.
this.metadataFolder = new Path(this.basePath, METAFOLDER_NAME);
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
if (!fs.exists(propertyPath)) {
if (initOnMissing) {
createHoodieProperties(metadataFolder, tableName);
} else {
throw new InvalidDatasetException(this.basePath);
}
}
// Load meta data
this.commits = new HoodieCommits(scanCommits(COMMIT_FILE_SUFFIX));
this.inflightCommits = scanCommits(INFLIGHT_FILE_SUFFIX);
this.properties = readHoodieProperties();
log.info("All commits :" + commits);
} catch (IOException e) {
throw new HoodieIOException("Could not load HoodieMetadata from path " + basePath, e);
}
}
/**
* Returns all the commit metadata for this table. Reads all the commit files from HDFS.
* Expensive operation, use with caution.
*
* @return SortedMap of CommitTime,<class>HoodieCommitMetadata</class>
* @throws IOException
*/
public SortedMap<String, HoodieCommitMetadata> getAllCommitMetadata() {
try {
TreeMap<String, HoodieCommitMetadata> metadataMap = new TreeMap<>();
for (String commitTs : commits.getCommitList()) {
metadataMap.put(commitTs, getCommitMetadata(commitTs));
}
return Collections.unmodifiableSortedMap(metadataMap);
} catch (IOException e) {
throw new HoodieIOException("Could not load all commits for table " + getTableName(),
e);
}
}
public HoodieCommitMetadata getCommitMetadata(String commitTime) throws IOException {
FSDataInputStream is = fs.open(new Path(metadataFolder, FSUtils.makeCommitFileName(commitTime)));
try {
String jsonStr = IOUtils.toString(is);
return HoodieCommitMetadata.fromJsonString(jsonStr);
} finally {
is.close();
}
}
public HoodieTableType getTableType() {
return HoodieTableType.valueOf(properties.getProperty(HOODIE_TABLE_TYPE_PROP_NAME));
}
/**
* Lookup the file name for specified <code>HoodieRecord</code>
* <p/>
* TODO(vc): This metadata needs to be cached in each executor, statically, and used across, if
* we need to be nicer to the NameNode
*/
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record) {
String fileId = record.getCurrentLocation().getFileId();
return getFilenameForRecord(fs, record, fileId);
}
public String getFilenameForRecord(FileSystem fs, final HoodieRecord record, String fileId) {
try {
FileStatus[] files = fs.listStatus(new Path(basePath, record.getPartitionPath()));
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(files, commits.lastCommit());
// If the record is not found
if(!fileIdToVersions.containsKey(fileId)) {
throw new FileNotFoundException("Cannot find valid versions for fileId " + fileId);
}
List<FileStatus> statuses = fileIdToVersions.get(fileId);
return statuses.get(0).getPath().getName();
} catch (IOException e) {
throw new HoodieIOException(
"Could not get Filename for record " + record, e);
}
}
/**
* Get only the latest file in the partition with precondition commitTime(file) < maxCommitTime
*
* @param fs
* @param partitionPathStr
* @param maxCommitTime
* @return
*/
public FileStatus[] getLatestVersionInPartition(FileSystem fs, String partitionPathStr,
String maxCommitTime) {
try {
Path partitionPath = new Path(basePath, partitionPathStr);
if(!fs.exists(partitionPath)) {
return new FileStatus[0];
}
FileStatus[] files = fs.listStatus(partitionPath);
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(files, commits.lastCommit());
HashMap<String, FileStatus> validFiles = new HashMap<>();
for (String fileId : fileIdToVersions.keySet()) {
List<FileStatus> versions = fileIdToVersions.get(fileId);
for (FileStatus file : versions) {
String filename = file.getPath().getName();
String commitTime = FSUtils.getCommitTime(filename);
if (HoodieCommits.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
validFiles.put(fileId, file);
break;
}
}
}
return validFiles.values().toArray(new FileStatus[validFiles.size()]);
} catch (IOException e) {
throw new HoodieIOException(
"Could not get latest versions in Partition " + partitionPathStr, e);
}
}
/**
* Get ALL the data files in partition grouped by fileId and sorted by the commitTime
* Given a partition path, provide all the files with a list of their commits, sorted by commit time.
*/
public Map<String, List<FileStatus>> getAllVersionsInPartition(FileSystem fs, String partitionPath) {
try {
FileStatus[] files = fs.listStatus(new Path(basePath, partitionPath));
return groupFilesByFileId(files, commits.lastCommit());
} catch (IOException e) {
throw new HoodieIOException(
"Could not load all file versions in partition " + partitionPath, e);
}
}
/**
* Get all the versions of files, within the commit range provided.
*
* @param commitsToReturn - commits to include
*/
public FileStatus[] getLatestVersionInRange(FileStatus[] fileStatuses, List<String> commitsToReturn) {
if (commitsToReturn.isEmpty()) {
return new FileStatus[0];
}
try {
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(fileStatuses, commits.lastCommit());
List<FileStatus> statuses = new ArrayList<>();
for (List<FileStatus> entry : fileIdToVersions.values()) {
for (FileStatus status : entry) {
String commitTime = FSUtils.getCommitTime(status.getPath().getName());
if (commitsToReturn.contains(commitTime)) {
statuses.add(status);
break;
}
}
}
return statuses.toArray(new FileStatus[statuses.size()]);
} catch (IOException e) {
throw new HoodieIOException("Could not filter files from commits " + commitsToReturn, e);
}
}
/**
*
* Get the latest versions of all the files.
*
* @param fileStatuses
* @return
*/
public FileStatus[] getLatestVersions(FileStatus[] fileStatuses) {
try {
Map<String, List<FileStatus>> fileIdToVersions =
groupFilesByFileId(fileStatuses, commits.lastCommit());
List<FileStatus> statuses = new ArrayList<>();
for(List<FileStatus> entry:fileIdToVersions.values()) {
// first file is the latest one
statuses.add(entry.get(0));
}
return statuses.toArray(new FileStatus[statuses.size()]);
} catch (IOException e) {
throw new HoodieIOException("Could not filter files for latest version ", e);
}
}
/**
* Get the base path for the Hoodie Table
*
* @return
*/
public String getBasePath() {
return basePath;
}
public boolean isCommitsEmpty() {
return commits.isEmpty();
}
public boolean isCommitTsSafe(String commitTs) {
return !isCommitsEmpty() && (commits.isCommitBeforeEarliestCommit(commitTs) || commits
.contains(commitTs));
}
public List<String> findCommitsSinceTs(String startTs) {
return commits.findCommitsInRange(startTs, MAX_COMMIT_TS);
}
public List<String> findCommitsInRange(String startTs, String endTs) {
return commits.findCommitsInRange(startTs, endTs);
}
public List<String> findCommitsAfter(String startTs, Integer maxCommits) {
return commits.findCommitsAfter(startTs, maxCommits);
}
public HoodieCommits getAllCommits() {
return commits;
}
public List<String> getAllInflightCommits() {
return inflightCommits;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("HoodieTableMetadata{");
sb.append("commits=").append(commits);
sb.append('}');
return sb.toString();
}
public String getTableName() {
return properties.getProperty(HOODIE_TABLE_NAME_PROP_NAME);
}
public String getHDroneDatasetProfile() {
return properties.getProperty(HOODIE_HDRONE_PROFILE_PROP_NAME, HOODIE_HDRONE_PROFILE_DEFAULT_VALUE);
}
/**
* Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties)
*
* @param metadataFolder
* @param tableName
* @throws IOException
*/
private void createHoodieProperties(Path metadataFolder, String tableName) throws IOException {
if (!fs.exists(metadataFolder)) {
fs.mkdirs(metadataFolder);
}
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataOutputStream outputStream = fs.create(propertyPath);
try {
Properties props = new Properties();
props.setProperty(HOODIE_TABLE_NAME_PROP_NAME, tableName);
props.setProperty(HOODIE_TABLE_TYPE_PROP_NAME, DEFAULT_TABLE_TYPE.name());
props
.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
} finally {
outputStream.close();
}
}
/**
* Loads the hoodie table properties from the hoodie.properties file under the .hoodie path
*/
private Properties readHoodieProperties() throws IOException {
Properties props = new Properties();
Path propertyPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE);
FSDataInputStream inputStream = fs.open(propertyPath);
try {
props.load(inputStream);
} finally {
inputStream.close();
}
return props;
}
/**
* Scan the commit times (only choosing commit file with the given suffix)
*/
private List<String> scanCommits(final String commitFileSuffix) throws IOException {
log.info("Attempting to load the commits under " + metadataFolder + " with suffix " + commitFileSuffix);
final List<String> commitFiles = new ArrayList<>();
fs.listStatus(metadataFolder, new PathFilter() {
@Override
public boolean accept(Path path) {
if (path.getName().endsWith(commitFileSuffix)) {
commitFiles.add(path.getName().split("\\.")[0]);
return true;
}
return false;
}
});
return commitFiles;
}
/**
* Takes a bunch of file versions, and returns a map keyed by fileId, with the necessary
* version safety checking. Returns a map of commitTime and Sorted list of FileStats
* ( by reverse commit time )
*
* @param maxCommitTime maximum permissible commit time
*
* @return
*/
private Map<String, List<FileStatus>> groupFilesByFileId(FileStatus[] files,
String maxCommitTime) throws IOException {
HashMap<String, List<FileStatus>> fileIdtoVersions = new HashMap<>();
for (FileStatus file : files) {
String filename = file.getPath().getName();
String fileId = FSUtils.getFileId(filename);
String commitTime = FSUtils.getCommitTime(filename);
if (isCommitTsSafe(commitTime) && HoodieCommits
.isCommit1BeforeOrOn(commitTime, maxCommitTime)) {
if (!fileIdtoVersions.containsKey(fileId)) {
fileIdtoVersions.put(fileId, new ArrayList<FileStatus>());
}
fileIdtoVersions.get(fileId).add(file);
}
}
for (Map.Entry<String, List<FileStatus>> entry : fileIdtoVersions.entrySet()) {
Collections.sort(fileIdtoVersions.get(entry.getKey()), new Comparator<FileStatus>() {
@Override
public int compare(FileStatus o1, FileStatus o2) {
String o1CommitTime = FSUtils.getCommitTime(o1.getPath().getName());
String o2CommitTime = FSUtils.getCommitTime(o2.getPath().getName());
// Reverse the order
return o2CommitTime.compareTo(o1CommitTime);
}
});
}
return fileIdtoVersions;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieTableMetadata metadata = (HoodieTableMetadata) o;
if (commits != null ? !commits.equals(metadata.commits) : metadata.commits != null)
return false;
return basePath != null ? basePath.equals(metadata.basePath) : metadata.basePath == null;
}
@Override
public int hashCode() {
int result = commits != null ? commits.hashCode() : 0;
result = 31 * result + (basePath != null ? basePath.hashCode() : 0);
return result;
}
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
/**
* Type of the Hoodie Table.
*
* Currently, 1 type is supported
*
* COPY_ON_WRITE - Performs upserts by versioning entire files, with later versions containing newer
* value of a record.
*
* In the future, following might be added.
*
* MERGE_ON_READ - Speeds up upserts, by delaying merge until enough work piles up.
*
* SIMPLE_LSM - A simple 2 level LSM tree.
*/
public enum HoodieTableType {
COPY_ON_WRITE
}

View File

@@ -0,0 +1,158 @@
/*
* Copyright (c) 2016 Uber Technologies, Inc. (hoodie-dev-group@uber.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.uber.hoodie.common.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.uber.hoodie.common.util.FSUtils;
import java.io.Serializable;
/**
* Statistics about a single Hoodie write operation.
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class HoodieWriteStat implements Serializable {
public static final String NULL_COMMIT = "null";
/**
* Id of the file being written
*/
private String fileId;
/**
* Full path to the file on underlying file system
*/
private String fullPath;
/**
* The previous version of the file. (null if this is the first version. i.e insert)
*/
private String prevCommit;
/**
* Total number of records written for this file.
* - for updates, its the entire number of records in the file
* - for inserts, its the actual number of records inserted.
*/
private long numWrites;
/**
* Total number of records actually changed. (0 for inserts)
*/
private long numUpdateWrites;
/**
* Total size of file written
*/
private long totalWriteBytes;
/**
* Total number of records, that were n't able to be written due to errors.
*/
private long totalWriteErrors;
public HoodieWriteStat() {
// called by jackson json lib
}
public void setFileId(String fileId) {
this.fileId = fileId;
}
public void setFullPath(String fullFilePath) {
this.fullPath = fullFilePath;
}
public void setPrevCommit(String prevCommit) {
this.prevCommit = prevCommit;
}
public void setNumWrites(long numWrites) {
this.numWrites = numWrites;
}
public void setNumUpdateWrites(long numUpdateWrites) {
this.numUpdateWrites = numUpdateWrites;
}
public long getTotalWriteBytes() {
return totalWriteBytes;
}
public void setTotalWriteBytes(long totalWriteBytes) {
this.totalWriteBytes = totalWriteBytes;
}
public long getTotalWriteErrors() { return totalWriteErrors; }
public void setTotalWriteErrors(long totalWriteErrors) { this.totalWriteErrors = totalWriteErrors; }
public String getPrevCommit() {
return prevCommit;
}
public long getNumWrites() {
return numWrites;
}
public long getNumUpdateWrites() {
return numUpdateWrites;
}
public String getFileId() {
return fileId;
}
public String getFullPath() {
return fullPath;
}
@Override
public String toString() {
return new StringBuilder()
.append("HoodieWriteStat {")
.append("fullPath='" + fullPath + '\'')
.append(", prevCommit='" + prevCommit + '\'')
.append(", numWrites=" + numWrites)
.append(", numUpdateWrites=" + numUpdateWrites)
.append(", numWriteBytes=" + totalWriteBytes)
.append('}')
.toString();
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
HoodieWriteStat that = (HoodieWriteStat) o;
if (!fullPath.equals(that.fullPath))
return false;
return prevCommit.equals(that.prevCommit);
}
@Override
public int hashCode() {
int result = fullPath.hashCode();
result = 31 * result + prevCommit.hashCode();
return result;
}
}