Adding canIndexLogFiles(), isImplicitWithStorage(), isGlobal() to HoodieIndex
This commit is contained in:
committed by
vinoth chandar
parent
6230e15191
commit
9f98ae643b
@@ -119,6 +119,8 @@ public class WriteStatus implements Serializable {
|
|||||||
return totalRecords;
|
return totalRecords;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getTotalErrorRecords() { return totalErrorRecords; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuilder sb = new StringBuilder("WriteStatus {");
|
final StringBuilder sb = new StringBuilder("WriteStatus {");
|
||||||
|
|||||||
@@ -88,6 +88,35 @@ public abstract class HoodieIndex<T extends HoodieRecordPayload> implements Seri
|
|||||||
*/
|
*/
|
||||||
public abstract boolean rollbackCommit(String commitTime);
|
public abstract boolean rollbackCommit(String commitTime);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An index is `global` if {@link HoodieKey} to fileID mapping, does not depend on the `partitionPath`.
|
||||||
|
* Such an implementation is able to obtain the same mapping, for two hoodie keys with same `recordKey`
|
||||||
|
* but different `partitionPath`
|
||||||
|
*
|
||||||
|
* @return whether or not, the index implementation is global in nature
|
||||||
|
*/
|
||||||
|
public abstract boolean isGlobal();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is used by storage to determine, if its safe to send inserts, straight to the log,
|
||||||
|
* i.e having a {@link com.uber.hoodie.common.model.FileSlice}, with no data file.
|
||||||
|
*
|
||||||
|
* @return Returns true/false depending on whether the impl has this capability
|
||||||
|
*/
|
||||||
|
public abstract boolean canIndexLogFiles();
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* An index is "implicit" with respect to storage, if just writing new data to a file slice,
|
||||||
|
* updates the index as well. This is used by storage, to save memory footprint in
|
||||||
|
* certain cases.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public abstract boolean isImplicitWithStorage();
|
||||||
|
|
||||||
|
|
||||||
public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
|
public static <T extends HoodieRecordPayload> HoodieIndex<T> createIndex(
|
||||||
HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
|
HoodieWriteConfig config, JavaSparkContext jsc) throws HoodieIndexException {
|
||||||
switch (config.getIndexType()) {
|
switch (config.getIndexType()) {
|
||||||
|
|||||||
@@ -110,7 +110,36 @@ public class InMemoryHashIndex<T extends HoodieRecordPayload> extends HoodieInde
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean rollbackCommit(String commitTime) {
|
public boolean rollbackCommit(String commitTime) {
|
||||||
// TODO (weiy)
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only looks up by recordKey
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isGlobal() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mapping is available in HBase already.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean canIndexLogFiles() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Index needs to be explicitly updated after storage write.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isImplicitWithStorage() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -264,6 +264,36 @@ public class HoodieBloomIndex<T extends HoodieRecordPayload> extends HoodieIndex
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is not global, since we depend on the partitionPath to do the lookup
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isGlobal() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* No indexes into log files yet.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean canIndexLogFiles() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bloom filters are stored, into the same data files.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isImplicitWithStorage() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* if we dont have key ranges, then also we need to compare against the file. no other choice
|
* if we dont have key ranges, then also we need to compare against the file. no other choice
|
||||||
* if we do, then only compare the file if the record key falls in range.
|
* if we do, then only compare the file if the record key falls in range.
|
||||||
|
|||||||
@@ -50,6 +50,8 @@ import scala.Tuple2;
|
|||||||
* - Could increase write amplification on copy-on-write storage since inserts always rewrite files
|
* - Could increase write amplification on copy-on-write storage since inserts always rewrite files
|
||||||
* - Not global.
|
* - Not global.
|
||||||
*
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
||||||
|
|
||||||
@@ -88,4 +90,35 @@ public class BucketedIndex<T extends HoodieRecordPayload> extends HoodieIndex<T>
|
|||||||
// nothing to rollback in the index.
|
// nothing to rollback in the index.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bucketing is still done within each partition.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isGlobal() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Since indexing is just a deterministic hash, we can identify file group correctly even without an index
|
||||||
|
* on the actual log file.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean canIndexLogFiles() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indexing is just a hash function.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isImplicitWithStorage() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -241,4 +241,34 @@ public class HBaseIndex<T extends HoodieRecordPayload> extends HoodieIndex<T> {
|
|||||||
// not the other way around
|
// not the other way around
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only looks up by recordKey
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isGlobal() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mapping is available in HBase already.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean canIndexLogFiles() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Index needs to be explicitly updated after storage write.
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean isImplicitWithStorage() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user