feature(hudi-query): 增加查询 Hudi 表全部时间线的功能
查询全部时间线默认包含已归档的时间线
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
package com.lanyuanxiaoyao.service.hudi;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.gson.GsonAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.ComponentScans;
|
||||
import org.springframework.retry.annotation.EnableRetry;
|
||||
|
||||
/**
|
||||
@@ -14,14 +16,20 @@ import org.springframework.retry.annotation.EnableRetry;
|
||||
* @date 2023-04-27
|
||||
*/
|
||||
@EnableDiscoveryClient
|
||||
@SpringBootApplication(exclude = {GsonAutoConfiguration.class})
|
||||
@ComponentScans({
|
||||
@ComponentScan("com.lanyuanxiaoyao.service")
|
||||
})
|
||||
@SpringBootApplication(
|
||||
scanBasePackages = {"com.lanyuanxiaoyao.service"},
|
||||
exclude = {GsonAutoConfiguration.class}
|
||||
)
|
||||
@EnableConfigurationProperties
|
||||
@EnableRetry
|
||||
public class HudiQueryApplication {
|
||||
public class HudiQueryApplication implements ApplicationRunner {
|
||||
private static final Logger logger = LoggerFactory.getLogger(HudiQueryApplication.class);
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(HudiQueryApplication.class, args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.lanyuanxiaoyao.service.hudi.controller;
|
||||
|
||||
import com.lanyuanxiaoyao.service.configuration.entity.hudi.HudiInstant;
|
||||
import com.lanyuanxiaoyao.service.hudi.service.TimelineService;
|
||||
import java.io.IOException;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
/**
|
||||
* 时间线
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
* @date 2023-05-01
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("timeline")
|
||||
public class TimelineController {
|
||||
private static final Logger logger = LoggerFactory.getLogger(TimelineController.class);
|
||||
|
||||
private final TimelineService timelineService;
|
||||
|
||||
public TimelineController(TimelineService timelineService) {
|
||||
this.timelineService = timelineService;
|
||||
}
|
||||
|
||||
@GetMapping("list")
|
||||
public ImmutableList<HudiInstant> allInstants(@RequestParam("flink_job_id") Long flinkJobId, @RequestParam("alias") String alias) throws IOException {
|
||||
return timelineService.timeline(flinkJobId, alias);
|
||||
}
|
||||
|
||||
@GetMapping("list_hdfs")
|
||||
public ImmutableList<HudiInstant> allInstants(@RequestParam("hdfs") String hdfs) throws IOException {
|
||||
return timelineService.timeline(hdfs);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
package com.lanyuanxiaoyao.service.hudi.service;
|
||||
|
||||
import com.eshore.odcp.hudi.connector.entity.TableMeta;
|
||||
import com.lanyuanxiaoyao.service.configuration.entity.hudi.HudiInstant;
|
||||
import com.lanyuanxiaoyao.service.forest.service.InfoService;
|
||||
import com.lanyuanxiaoyao.service.hudi.utils.HoodieUtils;
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.cache.annotation.Cacheable;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* 时间线查询
|
||||
*
|
||||
* @author lanayuanxiaoyao
|
||||
* @date 2023-05-01
|
||||
*/
|
||||
@Service
|
||||
public class TimelineService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(TimelineService.class);
|
||||
|
||||
private final InfoService infoService;
|
||||
|
||||
@SuppressWarnings("SpringJavaInjectionPointsAutowiringInspection")
|
||||
public TimelineService(InfoService infoService) {
|
||||
this.infoService = infoService;
|
||||
}
|
||||
|
||||
@Cacheable(value = "timeline", sync = true, key = "#flinkJobId.toString()+#alias")
|
||||
@Retryable(Throwable.class)
|
||||
public ImmutableList<HudiInstant> timeline(Long flinkJobId, String alias) throws IOException {
|
||||
TableMeta meta = infoService.tableMetaDetail(flinkJobId, alias);
|
||||
return timeline(meta.getHudi().getTargetHdfsPath());
|
||||
}
|
||||
|
||||
@Cacheable(value = "timeline", sync = true, key = "#hdfs")
|
||||
@Retryable(Throwable.class)
|
||||
public ImmutableList<HudiInstant> timeline(String hdfs) throws IOException {
|
||||
HoodieTableMetaClient client = HoodieTableMetaClient.builder()
|
||||
.setConf(new Configuration())
|
||||
.setBasePath(hdfs)
|
||||
.build();
|
||||
ImmutableList<HudiInstant> activeInstants = HoodieUtils.getAllInstants(client, HoodieTableMetaClient::getActiveTimeline)
|
||||
.collect(instant -> covert("active", instant));
|
||||
ImmutableList<HudiInstant> archiveInstants = HoodieUtils.getAllInstants(client, HoodieTableMetaClient::getArchivedTimeline)
|
||||
.collect(instant -> covert("archive", instant));
|
||||
return activeInstants.newWithAll(archiveInstants)
|
||||
.toSortedList(HudiInstant::compareTo)
|
||||
.toImmutable();
|
||||
}
|
||||
|
||||
private HudiInstant covert(String type, HoodieInstant instant) {
|
||||
return new HudiInstant(
|
||||
instant.getAction(),
|
||||
instant.getState().name(),
|
||||
instant.getTimestamp(),
|
||||
instant.getFileName(),
|
||||
type
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.lanyuanxiaoyao.service.hudi.utils;
|
||||
|
||||
import cn.hutool.core.util.ReUtil;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.eclipse.collections.api.factory.Lists;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Hudi 工具
|
||||
*
|
||||
* @author lanyuanxiaoyao
|
||||
* @date 2023-05-01
|
||||
*/
|
||||
public class HoodieUtils {
|
||||
private static final Logger logger = LoggerFactory.getLogger(HoodieUtils.class);
|
||||
|
||||
public static ImmutableList<HoodieInstant> getAllInstants(HoodieTableMetaClient client, Function<HoodieTableMetaClient, HoodieDefaultTimeline> getTimeline) throws IOException {
|
||||
FileSystem fileSystem = client.getRawFs();
|
||||
// 直接使用 toString 方法得到的值是被缓存的
|
||||
String hdfs = client.getBasePathV2().toUri().toString();
|
||||
Path metadataPath = new Path(hdfs + "/.hoodie");
|
||||
return getAllInstants(getTimeline.apply(client), fileSystem, metadataPath)
|
||||
.toSortedList(HoodieInstant::compareTo)
|
||||
.toImmutable();
|
||||
}
|
||||
|
||||
private static ImmutableList<HoodieInstant> getAllInstants(HoodieDefaultTimeline timeline, FileSystem fileSystem, Path metadataPath) throws IOException {
|
||||
Set<String> committedTimestamps = timeline.getCommitsTimeline()
|
||||
.filterCompletedInstants()
|
||||
.getInstants()
|
||||
.map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toSet());
|
||||
List<String> compactionRequestedTimestamps = Arrays.stream(fileSystem.listStatus(metadataPath))
|
||||
.filter(status -> status.getPath().toString().endsWith(HoodieTimeline.REQUESTED_COMPACTION_EXTENSION))
|
||||
.map(status -> status.getPath().getName())
|
||||
.map(name -> ReUtil.get("^(\\d+)\\..+", name, 1))
|
||||
.filter(committedTimestamps::contains)
|
||||
.collect(Collectors.toList());
|
||||
return Lists.immutable.ofAll(timeline.getInstants()
|
||||
.map(instant -> {
|
||||
if (compactionRequestedTimestamps.contains(instant.getTimestamp())) {
|
||||
return new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMPACTION_ACTION, instant.getTimestamp());
|
||||
}
|
||||
return instant;
|
||||
})
|
||||
.sorted(Comparator.comparingLong(i -> Long.parseLong(i.getTimestamp())))
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user