1
0

[HUDI-2881] Compact the file group with larger log files to reduce write amplification (#4152)

This commit is contained in:
Shawy Geng
2021-12-02 09:41:04 +08:00
committed by GitHub
parent f4c25ba3fd
commit 5284730175
4 changed files with 27 additions and 6 deletions

View File

@@ -99,19 +99,20 @@ public class TestHoodieCompactionStrategy {
sizesMap.put(90 * MB, Collections.singletonList(1024 * MB));
LogFileSizeBasedCompactionStrategy strategy = new LogFileSizeBasedCompactionStrategy();
HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig(
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(400).build())
HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(1205)
.withLogFileSizeThresholdBasedCompaction(100 * 1024 * 1024).build())
.build();
List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap);
List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>());
assertTrue(returned.size() < operations.size(),
"LogFileSizeBasedCompactionStrategy should have resulted in fewer compactions");
assertEquals(1, returned.size(), "LogFileSizeBasedCompactionStrategy should have resulted in 1 compaction");
assertEquals(2, returned.size(), "LogFileSizeBasedCompactionStrategy should have resulted in 2 compaction");
// Total size of all the log files
Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB))
.map(Double::longValue).reduce(Long::sum).orElse(0L);
assertEquals(1204, (long) returnedSize,
"Should chose the first 2 compactions which should result in a total IO of 690 MB");
assertEquals(1594, (long) returnedSize,
"Should chose the first 2 compactions which should result in a total IO of 1594 MB");
}
@Test