1
0

[HUDI-427] [HUDI-971] Implement CLI support for performing bootstrap (#1869)

* [HUDI-971] Clean partitions & fileIds returned by HFileBootstrapIndex
* [HUDI-427] Implement CLI support for performing bootstrap

Co-authored-by: Wenning Ding <wenningd@amazon.com>
Co-authored-by: Balaji Varadarajan <vbalaji@apache.org>
This commit is contained in:
wenningd
2020-08-08 12:37:29 -07:00
committed by GitHub
parent 5ee676e34f
commit 9fe2d2b14a
11 changed files with 448 additions and 28 deletions

View File

@@ -71,14 +71,15 @@ public abstract class BootstrapIndex implements Serializable {
}
/**
* Check if bootstrap Index is present and ensures readable.
* Check if bootstrap Index is physically present. It does not guarantee the validity of the index.
* To ensure an index is valid, use useIndex() API.
*/
protected abstract boolean isPresent();
/**
* Bootstrap Index Reader Interface.
*/
public abstract static class IndexReader implements Serializable, AutoCloseable {
public abstract static class IndexReader implements Serializable, AutoCloseable {
protected final HoodieTableMetaClient metaClient;
@@ -102,7 +103,7 @@ public abstract class BootstrapIndex implements Serializable {
* Return list file-ids indexed.
* @return
*/
public abstract List<String> getIndexedFileIds();
public abstract List<HoodieFileGroupId> getIndexedFileGroupIds();
/**
* Lookup bootstrap index by partition.

View File

@@ -29,6 +29,7 @@ import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
@@ -57,6 +58,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
@@ -79,6 +81,13 @@ public class HFileBootstrapIndex extends BootstrapIndex {
public static final String BOOTSTRAP_INDEX_FILE_ID = "00000000-0000-0000-0000-000000000000-0";
private static final String PARTITION_KEY_PREFIX = "part";
private static final String FILE_ID_KEY_PREFIX = "fileid";
private static final String KEY_VALUE_SEPARATOR = "=";
private static final String KEY_PARTS_SEPARATOR = ";";
// This is part of the suffix that HFIle appends to every key
private static final String HFILE_CELL_KEY_SUFFIX_PART = "//LATEST_TIMESTAMP/Put/vlen";
// Additional Metadata written to HFiles.
public static final byte[] INDEX_INFO_KEY = Bytes.toBytes("INDEX_INFO");
@@ -96,12 +105,44 @@ public class HFileBootstrapIndex extends BootstrapIndex {
}
}
/**
* Returns partition-key to be used in HFile.
* @param partition Partition-Path
* @return
*/
private static String getPartitionKey(String partition) {
return "part=" + partition;
return getKeyValueString(PARTITION_KEY_PREFIX, partition);
}
/**
* Returns file group key to be used in HFile.
* @param fileGroupId File Group Id.
* @return
*/
private static String getFileGroupKey(HoodieFileGroupId fileGroupId) {
return "part=" + fileGroupId.getPartitionPath() + ";fileid=" + fileGroupId.getFileId();
return getPartitionKey(fileGroupId.getPartitionPath()) + KEY_PARTS_SEPARATOR
+ getKeyValueString(FILE_ID_KEY_PREFIX, fileGroupId.getFileId());
}
private static String getPartitionFromKey(String key) {
String[] parts = key.split("=", 2);
ValidationUtils.checkArgument(parts[0].equals(PARTITION_KEY_PREFIX));
return parts[1];
}
private static String getFileIdFromKey(String key) {
String[] parts = key.split("=", 2);
ValidationUtils.checkArgument(parts[0].equals(FILE_ID_KEY_PREFIX));
return parts[1];
}
private static HoodieFileGroupId getFileGroupFromKey(String key) {
String[] parts = key.split(KEY_PARTS_SEPARATOR, 2);
return new HoodieFileGroupId(getPartitionFromKey(parts[0]), getFileIdFromKey(parts[1]));
}
private static String getKeyValueString(String key, String value) {
return key + KEY_VALUE_SEPARATOR + value;
}
private static Path partitionIndexPath(HoodieTableMetaClient metaClient) {
@@ -116,6 +157,17 @@ public class HFileBootstrapIndex extends BootstrapIndex {
HoodieFileFormat.HFILE.getFileExtension()));
}
/**
* HFile stores cell key in the format example : "2020/03/18//LATEST_TIMESTAMP/Put/vlen=3692/seqid=0".
* This API returns only the user key part from it.
* @param cellKey HFIle Cell Key
* @return
*/
private static String getUserKeyFromCellKey(String cellKey) {
int hfileSuffixBeginIndex = cellKey.lastIndexOf(HFILE_CELL_KEY_SUFFIX_PART);
return cellKey.substring(0, hfileSuffixBeginIndex);
}
/**
* Helper method to create HFile Reader.
*
@@ -160,7 +212,7 @@ public class HFileBootstrapIndex extends BootstrapIndex {
}
@Override
protected boolean isPresent() {
public boolean isPresent() {
return isPresent;
}
@@ -240,21 +292,21 @@ public class HFileBootstrapIndex extends BootstrapIndex {
@Override
public List<String> getIndexedPartitionPaths() {
HFileScanner scanner = partitionIndexReader().getScanner(true, true);
return getAllKeys(scanner);
return getAllKeys(scanner, HFileBootstrapIndex::getPartitionFromKey);
}
@Override
public List<String> getIndexedFileIds() {
public List<HoodieFileGroupId> getIndexedFileGroupIds() {
HFileScanner scanner = fileIdIndexReader().getScanner(true, true);
return getAllKeys(scanner);
return getAllKeys(scanner, HFileBootstrapIndex::getFileGroupFromKey);
}
private List<String> getAllKeys(HFileScanner scanner) {
List<String> keys = new ArrayList<>();
private <T> List<T> getAllKeys(HFileScanner scanner, Function<String, T> converter) {
List<T> keys = new ArrayList<>();
try {
boolean available = scanner.seekTo();
while (available) {
keys.add(CellUtil.getCellKeyAsString(scanner.getKeyValue()));
keys.add(converter.apply(getUserKeyFromCellKey(CellUtil.getCellKeyAsString(scanner.getKeyValue()))));
available = scanner.next();
}
} catch (IOException ioe) {