[HUDI-2502] Refactor index in hudi-client module (#3778)
- Refactor Index to reduce Line of Code and re-use across engines.
This commit is contained in:
@@ -21,6 +21,8 @@ package org.apache.hudi.index.bloom;
|
||||
import org.apache.hudi.common.bloom.BloomFilter;
|
||||
import org.apache.hudi.common.bloom.BloomFilterFactory;
|
||||
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
|
||||
import org.apache.hudi.common.data.HoodieList;
|
||||
import org.apache.hudi.common.data.HoodieMapPair;
|
||||
import org.apache.hudi.common.model.HoodieKey;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
@@ -100,7 +102,7 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
@MethodSource("configParams")
|
||||
public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) throws Exception {
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
FlinkHoodieBloomIndex index = new FlinkHoodieBloomIndex(config);
|
||||
HoodieBloomIndex index = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
HoodieTable hoodieTable = HoodieFlinkTable.create(config, context, metaClient, false);
|
||||
HoodieFlinkWriteableTestTable testTable = HoodieFlinkWriteableTestTable.of(hoodieTable, SCHEMA);
|
||||
|
||||
@@ -165,7 +167,7 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
@MethodSource("configParams")
|
||||
public void testRangePruning(boolean rangePruning, boolean treeFiltering, boolean bucketizedChecking) {
|
||||
HoodieWriteConfig config = makeConfig(rangePruning, treeFiltering, bucketizedChecking);
|
||||
FlinkHoodieBloomIndex index = new FlinkHoodieBloomIndex(config);
|
||||
HoodieBloomIndex index = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
|
||||
final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo = new HashMap<>();
|
||||
partitionToFileIndexInfo.put("2017/10/22",
|
||||
@@ -176,14 +178,14 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
Map<String, List<String>> partitionRecordKeyMap = new HashMap<>();
|
||||
asList(Pair.of("2017/10/22", "003"), Pair.of("2017/10/22", "002"),
|
||||
Pair.of("2017/10/22", "005"), Pair.of("2017/10/22", "004"))
|
||||
.forEach(t -> {
|
||||
List<String> recordKeyList = partitionRecordKeyMap.getOrDefault(t.getLeft(), new ArrayList<>());
|
||||
recordKeyList.add(t.getRight());
|
||||
partitionRecordKeyMap.put(t.getLeft(), recordKeyList);
|
||||
});
|
||||
.forEach(t -> {
|
||||
List<String> recordKeyList = partitionRecordKeyMap.getOrDefault(t.getLeft(), new ArrayList<>());
|
||||
recordKeyList.add(t.getRight());
|
||||
partitionRecordKeyMap.put(t.getLeft(), recordKeyList);
|
||||
});
|
||||
|
||||
List<Pair<String, HoodieKey>> comparisonKeyList =
|
||||
index.explodeRecordsWithFileComparisons(partitionToFileIndexInfo, partitionRecordKeyMap);
|
||||
List<Pair<String, HoodieKey>> comparisonKeyList = HoodieList.getList(
|
||||
index.explodeRecordsWithFileComparisons(partitionToFileIndexInfo, HoodieMapPair.of(partitionRecordKeyMap)));
|
||||
|
||||
assertEquals(10, comparisonKeyList.size());
|
||||
java.util.Map<String, List<String>> recordKeyToFileComps = comparisonKeyList.stream()
|
||||
@@ -264,10 +266,10 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
HoodieFlinkTable table = HoodieFlinkTable.create(config, context, metaClient);
|
||||
|
||||
// Let's tag
|
||||
FlinkHoodieBloomIndex bloomIndex = new FlinkHoodieBloomIndex(config);
|
||||
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
|
||||
assertDoesNotThrow(() -> {
|
||||
bloomIndex.tagLocation(records, context, table);
|
||||
tagLocation(bloomIndex, records, table);
|
||||
}, "EmptyList should not result in IllegalArgumentException: Positive number of slices required");
|
||||
}
|
||||
|
||||
@@ -303,8 +305,8 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
HoodieFlinkWriteableTestTable testTable = HoodieFlinkWriteableTestTable.of(hoodieTable, SCHEMA);
|
||||
|
||||
// Let's tag
|
||||
FlinkHoodieBloomIndex bloomIndex = new FlinkHoodieBloomIndex(config);
|
||||
List<HoodieRecord> taggedRecords = bloomIndex.tagLocation(records, context, hoodieTable);
|
||||
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
List<HoodieRecord> taggedRecords = tagLocation(bloomIndex, records, hoodieTable);
|
||||
|
||||
// Should not find any files
|
||||
for (HoodieRecord record : taggedRecords) {
|
||||
@@ -319,7 +321,7 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
metaClient.reloadActiveTimeline();
|
||||
|
||||
// We do the tag again
|
||||
taggedRecords = bloomIndex.tagLocation(records, context, HoodieFlinkTable.create(config, context, metaClient));
|
||||
taggedRecords = tagLocation(bloomIndex, records, HoodieFlinkTable.create(config, context, metaClient));
|
||||
|
||||
// Check results
|
||||
for (HoodieRecord record : taggedRecords) {
|
||||
@@ -370,10 +372,10 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
HoodieFlinkWriteableTestTable testTable = HoodieFlinkWriteableTestTable.of(hoodieTable, SCHEMA);
|
||||
|
||||
// Let's tag
|
||||
FlinkHoodieBloomIndex bloomIndex = new FlinkHoodieBloomIndex(config);
|
||||
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
List<HoodieRecord> toTagRecords = new ArrayList<>();
|
||||
toTagRecords.add(new HoodieRecord(record4.getKey(), null));
|
||||
List<HoodieRecord> taggedRecords = bloomIndex.tagLocation(toTagRecords, context, hoodieTable);
|
||||
List<HoodieRecord> taggedRecords = tagLocation(bloomIndex, toTagRecords, hoodieTable);
|
||||
Map<HoodieKey, Option<Pair<String, String>>> recordLocations = new HashMap<>();
|
||||
for (HoodieRecord taggedRecord : taggedRecords) {
|
||||
recordLocations.put(taggedRecord.getKey(), taggedRecord.isCurrentLocationKnown()
|
||||
@@ -398,7 +400,7 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
taggedRecords.add(new HoodieRecord(key, null));
|
||||
}
|
||||
|
||||
taggedRecords = bloomIndex.tagLocation(toTagRecords1, context, hoodieTable);
|
||||
taggedRecords = tagLocation(bloomIndex, toTagRecords1, hoodieTable);
|
||||
recordLocations.clear();
|
||||
for (HoodieRecord taggedRecord : taggedRecords) {
|
||||
recordLocations.put(taggedRecord.getKey(), taggedRecord.isCurrentLocationKnown()
|
||||
@@ -452,8 +454,8 @@ public class TestFlinkHoodieBloomIndex extends HoodieFlinkClientTestHarness {
|
||||
metaClient = HoodieTableMetaClient.reload(metaClient);
|
||||
HoodieTable table = HoodieFlinkTable.create(config, context, metaClient);
|
||||
|
||||
FlinkHoodieBloomIndex bloomIndex = new FlinkHoodieBloomIndex(config);
|
||||
List<HoodieRecord> taggedRecords = bloomIndex.tagLocation(records, context, table);
|
||||
HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
|
||||
List<HoodieRecord> taggedRecords = tagLocation(bloomIndex, records, table);
|
||||
|
||||
// Check results
|
||||
for (HoodieRecord record : taggedRecords) {
|
||||
|
||||
@@ -21,6 +21,7 @@ package org.apache.hudi.testutils;
|
||||
import org.apache.hudi.client.FlinkTaskContextSupplier;
|
||||
import org.apache.hudi.client.HoodieFlinkWriteClient;
|
||||
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
|
||||
import org.apache.hudi.common.data.HoodieList;
|
||||
import org.apache.hudi.common.fs.FSUtils;
|
||||
import org.apache.hudi.common.model.HoodieRecord;
|
||||
import org.apache.hudi.common.model.HoodieTableType;
|
||||
@@ -29,7 +30,9 @@ import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
|
||||
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
|
||||
import org.apache.hudi.common.testutils.HoodieTestUtils;
|
||||
import org.apache.hudi.common.testutils.minicluster.HdfsTestService;
|
||||
import org.apache.hudi.index.HoodieIndex;
|
||||
import org.apache.hudi.index.bloom.TestFlinkHoodieBloomIndex;
|
||||
import org.apache.hudi.table.HoodieTable;
|
||||
|
||||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
@@ -128,6 +131,10 @@ public class HoodieFlinkClientTestHarness extends HoodieCommonTestHarness implem
|
||||
metaClient = HoodieTestUtils.init(hadoopConf, basePath, tableType);
|
||||
}
|
||||
|
||||
protected List<HoodieRecord> tagLocation(
|
||||
HoodieIndex index, List<HoodieRecord> records, HoodieTable table) {
|
||||
return HoodieList.getList(index.tagLocation(HoodieList.of(records), context, table));
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanups file system.
|
||||
|
||||
Reference in New Issue
Block a user