Adding canIndexLogFiles(), isImplicitWithStorage(), isGlobal() to HoodieIndex
This commit is contained in:
committed by
vinoth chandar
parent
6230e15191
commit
9f98ae643b
@@ -119,6 +119,8 @@ public class WriteStatus implements Serializable {
|
||||
return totalRecords;
|
||||
}
|
||||
|
||||
public long getTotalErrorRecords() { return totalErrorRecords; }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final StringBuilder sb = new StringBuilder("WriteStatus {");
|
||||
|
||||
@@ -88,6 +88,35 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
|
||||
*/
|
||||
public abstract boolean rollbackCommit(String commitTime);
|
||||
|
||||
/**
|
||||
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
|
||||
* Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
|
||||
* but different `partitionPath`
|
||||
*
|
||||
* @return whether or not, the index implementation is global in nature
|
||||
*/
|
||||
public abstract boolean isGlobal();
|
||||
|
||||
/**
|
||||
* This is used by storage to determine, if its safe to send inserts, straight to the log,
|
||||
* i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
|
||||
*
|
||||
* @return Returns true/false depending on whether the impl has this capability
|
||||
*/
|
||||
public abstract boolean canIndexLogFiles();
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* An index is "implicit" with respect to storage, if just writing new data to a file slice,
|
||||
* updates the index as well. This is used by storage, to save memory footprint in
|
||||
* certain cases.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public abstract boolean isImplicitWithStorage();
|
||||
|
||||
|
||||
public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
|
||||
HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
|
||||
switch (config.getIndexType()) {
|
||||
|
||||
@@ -110,7 +110,36 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
|
||||
|
||||
@Override
|
||||
public boolean rollbackCommit(String commitTime) {
|
||||
// TODO (weiy)
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Only looks up by recordKey
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping is available in HBase already.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean canIndexLogFiles() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Index needs to be explicitly updated after storage write.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isImplicitWithStorage() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -264,6 +264,36 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is not global, since we depend on the partitionPath to do the lookup
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* No indexes into log files yet.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean canIndexLogFiles() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bloom filters are stored, into the same data files.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isImplicitWithStorage() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* if we dont have key ranges, then also we need to compare against the file. no other choice
|
||||
* if we do, then only compare the file if the record key falls in range.
|
||||
|
||||
@@ -50,6 +50,8 @@ import scala.Tuple2;
|
||||
* - Could increase write amplification on copy-on-write storage since inserts always rewrite files
|
||||
* - Not global.
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
|
||||
@@ -88,4 +90,35 @@ public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T>
|
||||
// nothing to rollback in the index.
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bucketing is still done within each partition.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Since indexing is just a deterministic hash, we can identify file group correctly even without an index
|
||||
* on the actual log file.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean canIndexLogFiles() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexing is just a hash function.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isImplicitWithStorage() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,4 +241,34 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||
// not the other way around
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Only looks up by recordKey
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isGlobal() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping is available in HBase already.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean canIndexLogFiles() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Index needs to be explicitly updated after storage write.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean isImplicitWithStorage() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user