[HUDI-427] [HUDI-971] Implement CLI support for performing bootstrap (#1869)
* [HUDI-971] Clean partitions & fileIds returned by HFileBootstrapIndex * [HUDI-427] Implement CLI support for performing bootstrap Co-authored-by: Wenning Ding <wenningd@amazon.com> Co-authored-by: Balaji Varadarajan <vbalaji@apache.org>
This commit is contained in:
@@ -34,10 +34,12 @@ import org.apache.hadoop.fs.permission.FsAction;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
@@ -50,17 +52,18 @@ import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Unit Tests for Bootstrap Index.
|
||||
*/
|
||||
public class TestBootstrapIndex extends HoodieCommonTestHarness {
|
||||
|
||||
private static String[] PARTITIONS = {"2020/03/18", "2020/03/19", "2020/03/20", "2020/03/21"};
|
||||
private static String BOOTSTRAP_BASE_PATH = "/tmp/source/parquet_tables/table1";
|
||||
private static final String[] PARTITIONS = {"2020/03/18", "2020/03/19", "2020/03/20", "2020/03/21"};
|
||||
private static final Set<String> PARTITION_SET = Arrays.stream(PARTITIONS).collect(Collectors.toSet());
|
||||
private static final String BOOTSTRAP_BASE_PATH = "/tmp/source/parquet_tables/table1";
|
||||
|
||||
@BeforeEach
|
||||
|
||||
public void init() throws IOException {
|
||||
initMetaClient();
|
||||
}
|
||||
@@ -127,11 +130,14 @@ public class TestBootstrapIndex extends HoodieCommonTestHarness {
|
||||
private void validateBootstrapIndex(Map<String, List<BootstrapFileMapping>> bootstrapMapping) {
|
||||
BootstrapIndex index = new HFileBootstrapIndex(metaClient);
|
||||
try (BootstrapIndex.IndexReader reader = index.createReader()) {
|
||||
List<String> partitions = reader.getIndexedPartitionPaths();
|
||||
assertEquals(bootstrapMapping.size(), partitions.size());
|
||||
long expNumFileGroupKeys = bootstrapMapping.values().stream().flatMap(x -> x.stream()).count();
|
||||
long gotNumFileGroupKeys = reader.getIndexedFileIds().size();
|
||||
List<String> indexedPartitions = reader.getIndexedPartitionPaths();
|
||||
assertEquals(bootstrapMapping.size(), indexedPartitions.size());
|
||||
indexedPartitions.forEach(partition -> assertTrue(PARTITION_SET.contains(partition)));
|
||||
long expNumFileGroupKeys = bootstrapMapping.values().stream().flatMap(Collection::stream).count();
|
||||
List<HoodieFileGroupId> fileGroupIds = reader.getIndexedFileGroupIds();
|
||||
long gotNumFileGroupKeys = fileGroupIds.size();
|
||||
assertEquals(expNumFileGroupKeys, gotNumFileGroupKeys);
|
||||
fileGroupIds.forEach(fgId -> assertTrue(PARTITION_SET.contains(fgId.getPartitionPath())));
|
||||
|
||||
bootstrapMapping.entrySet().stream().forEach(e -> {
|
||||
List<BootstrapFileMapping> gotMapping = reader.getSourceFileMappingForPartition(e.getKey());
|
||||
|
||||
Reference in New Issue
Block a user