perf(hudi-query): 优化hudi时间线的扫描速度

This commit is contained in:
v-zhangjc9
2024-05-22 13:11:50 +08:00
parent 2b7b7f838c
commit bff18280f3
7 changed files with 261 additions and 82 deletions

View File

@@ -1,6 +1,10 @@
package com.lanyuanxiaoyao.service.command.pro.commands;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.configuration.ExecutorProvider;
import com.lanyuanxiaoyao.service.forest.service.HudiService;
import com.lanyuanxiaoyao.service.forest.service.InfoService;
import java.io.IOException;
import java.time.Instant;
import java.util.List;
@@ -50,10 +54,19 @@ import org.springframework.shell.standard.ShellOption;
* @author lanyuanxiaoyao
* @date 2024-03-19
*/
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
@ShellComponent("Hudi相关操作")
public class HudiCommand {
private static final Logger logger = LoggerFactory.getLogger(HudiCommand.class);
private final InfoService infoService;
private final HudiService hudiService;
public HudiCommand(InfoService infoService, HudiService hudiService) {
this.infoService = infoService;
this.hudiService = hudiService;
}
public static void time(String name, Runnable runnable) {
logger.info(name);
long startTime = Instant.now().toEpochMilli();
@@ -158,6 +171,41 @@ public class HudiCommand {
time("reader 3", counter -> reader3(counter, configuration, root));
}
@ShellMethod("Count meta files")
public void countMetaFiles(@ShellOption(help = "alias", defaultValue = "") String alias) throws IOException {
LongAdder count = new LongAdder();
FileSystem fileSystem = FileSystem.get(new Configuration());
infoService
.tableMetaList()
.select(meta -> StrUtil.isBlank(alias) || StrUtil.equals(meta.getAlias(), alias))
.collect(TableMeta::getHudi)
.collect(TableMeta.HudiMeta::getTargetHdfsPath)
.asParallel(ExecutorProvider.EXECUTORS_5, 1)
.forEach(hdfs -> {
Path root = new Path(hdfs, ".hoodie");
try {
FileStatus[] statuses = fileSystem.listStatus(root);
for (FileStatus status : statuses) {
if (status.isFile()) {
count.increment();
}
if (StrUtil.containsIgnoreCase(status.getPath().toString(), "INVALID")) {
logger.info("{}", status.getPath().toString());
}
}
} catch (IOException e) {
logger.warn("List file error", e);
}
});
logger.info("Count: {}", count.longValue());
fileSystem.close();
}
@ShellMethod("Get timeline instants")
public void timelineInstant(@ShellOption(help = "root hdfs path") String hdfs) {
hudiService.timelineHdfsAllActive(hdfs).forEach(instant -> logger.info(instant.toString()));
}
public interface Runnable {
void run(LongAdder counter);
}