1
0

Adding canIndexLogFiles(), isImplicitWithStorage(), isGlobal() to HoodieIndex

This commit is contained in:
Vinoth Chandar
2017-09-28 09:57:28 -07:00
committed by vinoth chandar
parent 6230e15191
commit 9f98ae643b
6 changed files with 154 additions and 1 deletions

View File

@@ -119,6 +119,8 @@ public class WriteStatus implements Serializable {
return totalRecords;
}
public long getTotalErrorRecords() { return totalErrorRecords; }
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("WriteStatus {");

View File

@@ -88,6 +88,35 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
*/
public abstract boolean rollbackCommit(String commitTime);
/**
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
* Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
* but different `partitionPath`
*
* @return whether or not, the index implementation is global in nature
*/
public abstract boolean isGlobal();
/**
* This is used by storage to determine, if its safe to send inserts, straight to the log,
* i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
*
* @return Returns true/false depending on whether the impl has this capability
*/
public abstract boolean canIndexLogFiles();
/**
*
* An index is "implicit" with respect to storage, if just writing new data to a file slice,
* updates the index as well. This is used by storage, to save memory footprint in
* certain cases.
*
* @return
*/
public abstract boolean isImplicitWithStorage();
public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
switch (config.getIndexType()) {

View File

@@ -110,7 +110,36 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
@Override
public boolean rollbackCommit(String commitTime) {
// TODO (weiy)
return true;
}
/**
* Only looks up by recordKey
*
* @return
*/
@Override
public boolean isGlobal() {
return true;
}
/**
* Mapping is available in HBase already.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}
/**
* Index needs to be explicitly updated after storage write.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return false;
}
}

View File

@@ -264,6 +264,36 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
return true;
}
/**
* This is not global, since we depend on the partitionPath to do the lookup
*
* @return
*/
@Override
public boolean isGlobal() {
return false;
}
/**
* No indexes into log files yet.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return false;
}
/**
* Bloom filters are stored, into the same data files.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return true;
}
/**
* if we dont have key ranges, then also we need to compare against the file. no other choice
* if we do, then only compare the file if the record key falls in range.

View File

@@ -50,6 +50,8 @@ import scala.Tuple2;
* - Could increase write amplification on copy-on-write storage since inserts always rewrite files
* - Not global.
*
*
*
*/
public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
@@ -88,4 +90,35 @@ public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T>
// nothing to rollback in the index.
return true;
}
/**
* Bucketing is still done within each partition.
*
* @return
*/
@Override
public boolean isGlobal() {
return false;
}
/**
* Since indexing is just a deterministic hash, we can identify file group correctly even without an index
* on the actual log file.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}
/**
* Indexing is just a hash function.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return true;
}
}

View File

@@ -241,4 +241,34 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
// not the other way around
return true;
}
/**
* Only looks up by recordKey
*
* @return
*/
@Override
public boolean isGlobal() {
return true;
}
/**
* Mapping is available in HBase already.
*
* @return
*/
@Override
public boolean canIndexLogFiles() {
return true;
}
/**
* Index needs to be explicitly updated after storage write.
*
* @return
*/
@Override
public boolean isImplicitWithStorage() {
return false;
}
}