diff --git a/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java b/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java index 94acbff3c..928979c24 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/WriteStatus.java @@ -119,6 +119,8 @@ public class WriteStatus implements Serializable { return totalRecords; } + public long getTotalErrorRecords() { return totalErrorRecords; } + @Override public String toString() { final StringBuilder sb = new StringBuilder("WriteStatus {"); diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java index b3f260a44..91b23bc0c 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/HoodieIndex.java @@ -88,6 +88,35 @@ public abstract class HoodieIndex implements Seri */ public abstract boolean rollbackCommit(String commitTime); + /** + * An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`. + * Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey` + * but different `partitionPath` + * + * @return whether or not, the index implementation is global in nature + */ + public abstract boolean isGlobal(); + + /** + * This is used by storage to determine, if its safe to send inserts, straight to the log, + * i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file. + * + * @return Returns true/false depending on whether the impl has this capability + */ + public abstract boolean canIndexLogFiles(); + + + /** + * + * An index is "implicit" with respect to storage, if just writing new data to a file slice, + * updates the index as well. This is used by storage, to save memory footprint in + * certain cases. + * + * @return + */ + public abstract boolean isImplicitWithStorage(); + + public static HoodieIndex createIndex( HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException { switch (config.getIndexType()) { diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java index 0b271b77f..7f202f662 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/InMemoryHashIndex.java @@ -110,7 +110,36 @@ public class InMemoryHashIndex extends HoodieInde @Override public boolean rollbackCommit(String commitTime) { - // TODO (weiy) return true; } + + /** + * Only looks up by recordKey + * + * @return + */ + @Override + public boolean isGlobal() { + return true; + } + + /** + * Mapping is available in HBase already. + * + * @return + */ + @Override + public boolean canIndexLogFiles() { + return true; + } + + /** + * Index needs to be explicitly updated after storage write. + * + * @return + */ + @Override + public boolean isImplicitWithStorage() { + return false; + } } diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java index 9b6eae768..37e0bc719 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bloom/HoodieBloomIndex.java @@ -264,6 +264,36 @@ public class HoodieBloomIndex extends HoodieIndex return true; } + /** + * This is not global, since we depend on the partitionPath to do the lookup + * + * @return + */ + @Override + public boolean isGlobal() { + return false; + } + + /** + * No indexes into log files yet. + * + * @return + */ + @Override + public boolean canIndexLogFiles() { + return false; + } + + /** + * Bloom filters are stored, into the same data files. + * + * @return + */ + @Override + public boolean isImplicitWithStorage() { + return true; + } + /** * if we dont have key ranges, then also we need to compare against the file. no other choice * if we do, then only compare the file if the record key falls in range. diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java index 5dc697fa1..ba45bc666 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/bucketed/BucketedIndex.java @@ -50,6 +50,8 @@ import scala.Tuple2; * - Could increase write amplification on copy-on-write storage since inserts always rewrite files * - Not global. * + * + * */ public class BucketedIndex extends HoodieIndex { @@ -88,4 +90,35 @@ public class BucketedIndex extends HoodieIndex // nothing to rollback in the index. return true; } + + /** + * Bucketing is still done within each partition. + * + * @return + */ + @Override + public boolean isGlobal() { + return false; + } + + /** + * Since indexing is just a deterministic hash, we can identify file group correctly even without an index + * on the actual log file. + * + * @return + */ + @Override + public boolean canIndexLogFiles() { + return true; + } + + /** + * Indexing is just a hash function. + * + * @return + */ + @Override + public boolean isImplicitWithStorage() { + return true; + } } diff --git a/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java b/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java index 03ac1438f..39929876f 100644 --- a/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java +++ b/hoodie-client/src/main/java/com/uber/hoodie/index/hbase/HBaseIndex.java @@ -241,4 +241,34 @@ public class HBaseIndex extends HoodieIndex { // not the other way around return true; } + + /** + * Only looks up by recordKey + * + * @return + */ + @Override + public boolean isGlobal() { + return true; + } + + /** + * Mapping is available in HBase already. + * + * @return + */ + @Override + public boolean canIndexLogFiles() { + return true; + } + + /** + * Index needs to be explicitly updated after storage write. + * + * @return + */ + @Override + public boolean isImplicitWithStorage() { + return false; + } }