1
0

[HUDI-999] [RFC-12] Parallelize fetching of source data files/partitions (#1924)

This commit is contained in:
Udit Mehrotra
2020-08-06 23:44:57 -07:00
committed by GitHub
parent b51646dcc7
commit ab453f2623
4 changed files with 95 additions and 52 deletions

View File

@@ -63,20 +63,15 @@ public class TestBootstrapUtils extends HoodieClientTestBase {
}
});
List<Pair<String, List<HoodieFileStatus>>> collected =
BootstrapUtils.getAllLeafFoldersWithFiles(metaClient.getFs(), basePath, (status) -> {
return true;
});
List<Pair<String, List<HoodieFileStatus>>> collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient,
metaClient.getFs(), basePath, jsc);
assertEquals(3, collected.size());
collected.stream().forEach(k -> {
assertEquals(2, k.getRight().size());
});
// Simulate reading from un-partitioned dataset
collected =
BootstrapUtils.getAllLeafFoldersWithFiles(metaClient.getFs(), basePath + "/" + folders.get(0), (status) -> {
return true;
});
collected = BootstrapUtils.getAllLeafFoldersWithFiles(metaClient, metaClient.getFs(), basePath + "/" + folders.get(0), jsc);
assertEquals(1, collected.size());
collected.stream().forEach(k -> {
assertEquals(2, k.getRight().size());