1
0

Remove redundant string from file comp rdd

This commit is contained in:
David Muto (pseudomuto)
2019-05-10 10:10:51 -04:00
committed by vinoth chandar
parent a7e6cf5197
commit 3fd2fd6e9d
6 changed files with 74 additions and 71 deletions

View File

@@ -259,12 +259,12 @@ public class TestHoodieBloomIndex {
new Tuple2<>("2017/10/22", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"),
new Tuple2<>("2017/10/22", "004"))).mapToPair(t -> t);
List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index.explodeRecordRDDWithFileComparisons(
List<Tuple2<String, HoodieKey>> comparisonKeyList = index.explodeRecordRDDWithFileComparisons(
partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
assertEquals(10, comparisonKeyList.size());
Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream().collect(Collectors.groupingBy(
t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));
t -> t._2.getRecordKey(), Collectors.mapping(t -> t._1, Collectors.toList())));
assertEquals(4, recordKeyToFileComps.size());
assertEquals(new HashSet<>(Arrays.asList("f1", "f3", "f4")), new HashSet<>(recordKeyToFileComps.get("002")));

View File

@@ -190,25 +190,25 @@ public class TestHoodieGlobalBloomIndex {
new Tuple2<>("2017/10/21", "003"), new Tuple2<>("2017/10/22", "002"), new Tuple2<>("2017/10/22", "005"),
new Tuple2<>("2017/10/23", "004"))).mapToPair(t -> t);
List<Tuple2<String, Tuple2<String, HoodieKey>>> comparisonKeyList = index.explodeRecordRDDWithFileComparisons(
partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
List<Tuple2<String, HoodieKey>> comparisonKeyList =
index.explodeRecordRDDWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyPairRDD).collect();
/* epecting:
f4#003, f4, HoodieKey { recordKey=003 partitionPath=2017/10/23}
f1#003, f1, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f3#003, f3, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f4#002, f4, HoodieKey { recordKey=002 partitionPath=2017/10/23}
f1#002, f1, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f3#002, f3, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f4#005, f4, HoodieKey { recordKey=005 partitionPath=2017/10/23}
f1#005, f1, HoodieKey { recordKey=005 partitionPath=2017/10/22}
f4#004, f4, HoodieKey { recordKey=004 partitionPath=2017/10/23}
f1#004, f1, HoodieKey { recordKey=004 partitionPath=2017/10/22}
/* expecting:
f4, HoodieKey { recordKey=003 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f3, HoodieKey { recordKey=003 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=002 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f3, HoodieKey { recordKey=002 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=005 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=005 partitionPath=2017/10/22}
f4, HoodieKey { recordKey=004 partitionPath=2017/10/23}
f1, HoodieKey { recordKey=004 partitionPath=2017/10/22}
*/
assertEquals(10, comparisonKeyList.size());
Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream().collect(Collectors.groupingBy(
t -> t._2()._2().getRecordKey(), Collectors.mapping(t -> t._2()._1().split("#")[0], Collectors.toList())));
Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
.collect(Collectors.groupingBy(t -> t._2.getRecordKey(), Collectors.mapping(Tuple2::_1, Collectors.toList())));
assertEquals(4, recordKeyToFileComps.size());
assertEquals(new HashSet<>(Arrays.asList("f4", "f1", "f3")), new HashSet<>(recordKeyToFileComps.get("002")));