perf(hudi-query): 优化hudi时间线的扫描速度
This commit is contained in:
@@ -82,6 +82,25 @@ public class TimelineController {
|
||||
);
|
||||
}
|
||||
|
||||
@GetMapping("all")
|
||||
public ImmutableList<HudiInstant> allInstants(
|
||||
@RequestParam("flink_job_id") Long flinkJobId,
|
||||
@RequestParam("alias") String alias,
|
||||
@RequestParam(value = "active", defaultValue = "true") Boolean active,
|
||||
@RequestParam(value = "archive", defaultValue = "false") Boolean archive
|
||||
) throws IOException {
|
||||
return timelineService.timeline(flinkJobId, alias, active, archive);
|
||||
}
|
||||
|
||||
@GetMapping("all_hdfs")
|
||||
public ImmutableList<HudiInstant> allInstants(
|
||||
@RequestParam("hdfs") String hdfs,
|
||||
@RequestParam(value = "active", defaultValue = "true") Boolean active,
|
||||
@RequestParam(value = "archive", defaultValue = "false") Boolean archive
|
||||
) throws IOException {
|
||||
return timelineService.timeline(hdfs, active, archive);
|
||||
}
|
||||
|
||||
@GetMapping("read_compaction_plan")
|
||||
public HudiCompactionPlan readCompactionPlan(
|
||||
@RequestParam("flink_job_id") Long flinkJobId,
|
||||
|
||||
@@ -30,7 +30,6 @@ import org.apache.hudi.table.action.rollback.RollbackUtils;
|
||||
import org.eclipse.collections.api.factory.Lists;
|
||||
import org.eclipse.collections.api.factory.Maps;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.eclipse.collections.api.list.MutableList;
|
||||
import org.eclipse.collections.api.map.ImmutableMap;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -100,33 +99,11 @@ public class TimelineService {
|
||||
) throws IOException {
|
||||
Configuration configuration = new Configuration();
|
||||
FileSystem fileSystem = FileSystem.get(configuration);
|
||||
if (!fileSystem.exists(new Path(hdfs))) {
|
||||
throw new IOException(StrUtil.format("Path {} is not exists", hdfs));
|
||||
}
|
||||
HoodieTableMetaClient client = HoodieTableMetaClient.builder()
|
||||
.setConf(configuration)
|
||||
.setBasePath(hdfs)
|
||||
.build();
|
||||
MutableList<HudiInstant> instants = Lists.mutable.empty();
|
||||
if (ObjectUtil.isEmpty(filterType)) {
|
||||
filterType = Lists.immutable.of(INSTANT_TYPE_ARCHIVE, INSTANT_TYPE_ACTIVE);
|
||||
}
|
||||
if (filterType.contains(INSTANT_TYPE_ARCHIVE)) {
|
||||
HoodieUtils.getAllInstants(client, HoodieTableMetaClient::getArchivedTimeline)
|
||||
.collect(instant -> covert(INSTANT_TYPE_ARCHIVE, instant))
|
||||
.forEach(instants::add);
|
||||
}
|
||||
if (filterType.contains(INSTANT_TYPE_ACTIVE)) {
|
||||
HoodieUtils.getAllInstants(client, HoodieTableMetaClient::getActiveTimeline)
|
||||
.collect(instant -> covert(INSTANT_TYPE_ACTIVE, instant))
|
||||
.forEach(instants::add);
|
||||
}
|
||||
ImmutableList<HudiInstant> hudiInstants = instants
|
||||
.toSortedList(HudiInstant::compareTo)
|
||||
.select(instant -> ObjectUtil.isEmpty(filterAction) || filterAction.contains(instant.getAction()))
|
||||
.select(instant -> ObjectUtil.isEmpty(filterState) || filterState.contains(instant.getState()))
|
||||
.toSortedList(ComparatorUtil.stringComparator(order, direction, TIMELINE_SORT_MAP))
|
||||
.toImmutable();
|
||||
ImmutableList<HudiInstant> hudiInstants = timeline(order, direction, hdfs, filterType, filterAction, filterState);
|
||||
ImmutableList<HudiInstant> result = hudiInstants
|
||||
.drop(Math.max(page - 1, 0) * count)
|
||||
.take(count)
|
||||
@@ -148,6 +125,47 @@ public class TimelineService {
|
||||
return new PageResponse<>(result.toList(), hudiInstants.size());
|
||||
}
|
||||
|
||||
public ImmutableList<HudiInstant> timeline(
|
||||
String order,
|
||||
String direction,
|
||||
String hdfs,
|
||||
ImmutableList<String> filterType,
|
||||
ImmutableList<String> filterAction,
|
||||
ImmutableList<String> filterState
|
||||
) throws IOException {
|
||||
HoodieTableMetaClient client = HoodieTableMetaClient.builder()
|
||||
.setConf(new Configuration())
|
||||
.setBasePath(hdfs)
|
||||
.build();
|
||||
FileSystem fileSystem = client.getRawFs();
|
||||
if (!fileSystem.exists(new Path(hdfs))) {
|
||||
throw new IOException(StrUtil.format("Path {} is not exists", hdfs));
|
||||
}
|
||||
if (ObjectUtil.isEmpty(filterType)) {
|
||||
filterType = Lists.immutable.of(INSTANT_TYPE_ARCHIVE, INSTANT_TYPE_ACTIVE);
|
||||
}
|
||||
ImmutableList<HudiInstant> instants = HoodieUtils.getAllInstants(client, filterType.contains(INSTANT_TYPE_ACTIVE), filterType.contains(INSTANT_TYPE_ARCHIVE));
|
||||
return instants
|
||||
.toSortedList(HudiInstant::compareTo)
|
||||
.select(instant -> ObjectUtil.isEmpty(filterAction) || filterAction.contains(instant.getAction()))
|
||||
.select(instant -> ObjectUtil.isEmpty(filterState) || filterState.contains(instant.getState()))
|
||||
.toSortedList(ComparatorUtil.stringComparator(order, direction, TIMELINE_SORT_MAP))
|
||||
.toImmutable();
|
||||
}
|
||||
|
||||
public ImmutableList<HudiInstant> timeline(Long flinkJobId, String alias, Boolean active, Boolean archive) throws IOException {
|
||||
TableMeta meta = infoService.tableMetaDetail(flinkJobId, alias);
|
||||
return timeline(meta.getHudi().getTargetHdfsPath(), active, archive);
|
||||
}
|
||||
|
||||
public ImmutableList<HudiInstant> timeline(String hdfs, Boolean active, Boolean archive) throws IOException {
|
||||
HoodieTableMetaClient client = HoodieTableMetaClient.builder()
|
||||
.setConf(new Configuration())
|
||||
.setBasePath(hdfs)
|
||||
.build();
|
||||
return HoodieUtils.getAllInstants(client, active, archive);
|
||||
}
|
||||
|
||||
@Cacheable(value = "read-compaction-plan", sync = true)
|
||||
@Retryable(Throwable.class)
|
||||
public HudiCompactionPlan readCompactionPlan(Long flinkJobId, String alias, String instant) throws IOException {
|
||||
@@ -255,19 +273,8 @@ public class TimelineService {
|
||||
.setConf(new Configuration())
|
||||
.setBasePath(meta.getHudi().getTargetHdfsPath())
|
||||
.build();
|
||||
return HoodieUtils.getAllInstants(client, HoodieTableMetaClient::getActiveTimeline)
|
||||
return HoodieUtils.getAllActiveInstants(client)
|
||||
.select(instant -> StrUtil.equals(instant.getAction(), HoodieTimeline.COMPACTION_ACTION))
|
||||
.reject(instant -> ObjectUtil.equals(instant.getState(), HoodieInstant.State.COMPLETED))
|
||||
.collect(instant -> covert(INSTANT_TYPE_ACTIVE, instant));
|
||||
}
|
||||
|
||||
private HudiInstant covert(String type, HoodieInstant instant) {
|
||||
return new HudiInstant(
|
||||
instant.getAction(),
|
||||
instant.getState().name(),
|
||||
instant.getTimestamp(),
|
||||
instant.getFileName(),
|
||||
type
|
||||
);
|
||||
.reject(instant -> StrUtil.equals(instant.getState(), HoodieInstant.State.COMPLETED.name()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,24 +1,28 @@
|
||||
package com.lanyuanxiaoyao.service.hudi.utils;
|
||||
|
||||
import cn.hutool.core.util.ReUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import com.lanyuanxiaoyao.service.configuration.entity.hudi.HudiInstant;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.TableSchemaResolver;
|
||||
import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieArchivedTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.org.apache.avro.Schema;
|
||||
import org.eclipse.collections.api.factory.Lists;
|
||||
import org.eclipse.collections.api.list.ImmutableList;
|
||||
import org.eclipse.collections.api.list.MutableList;
|
||||
import org.eclipse.collections.api.map.MutableMap;
|
||||
import org.eclipse.collections.api.multimap.list.MutableListMultimap;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -44,37 +48,98 @@ public class HoodieUtils {
|
||||
return schemaUtil.getTableAvroSchema(true);
|
||||
}
|
||||
|
||||
public static ImmutableList<HoodieInstant> getAllInstants(HoodieTableMetaClient client, Function<HoodieTableMetaClient, HoodieDefaultTimeline> getTimeline) throws IOException {
|
||||
public static ImmutableList<HudiInstant> getAllActiveInstants(HoodieTableMetaClient client) throws IOException {
|
||||
return getAllInstants(client, true, false);
|
||||
}
|
||||
|
||||
public static ImmutableList<HudiInstant> getAllArchiveInstants(HoodieTableMetaClient client) throws IOException {
|
||||
return getAllInstants(client, false, true);
|
||||
}
|
||||
|
||||
public static ImmutableList<HudiInstant> getAllInstants(HoodieTableMetaClient client) throws IOException {
|
||||
return getAllInstants(client, true, true);
|
||||
}
|
||||
|
||||
public static ImmutableList<HudiInstant> getAllInstants(HoodieTableMetaClient client, Boolean active, Boolean archive) throws IOException {
|
||||
FileSystem fileSystem = client.getRawFs();
|
||||
// 直接使用 toString 方法得到的值是被缓存的
|
||||
String hdfs = client.getBasePathV2().toUri().toString();
|
||||
Path metadataPath = new Path(hdfs + "/.hoodie");
|
||||
return getAllInstants(getTimeline.apply(client), fileSystem, metadataPath)
|
||||
.toSortedList(HoodieInstant::compareTo)
|
||||
Path metadataPath = new Path(client.getBasePathV2().toString(), HoodieTableMetaClient.METAFOLDER_NAME);
|
||||
MutableList<HudiInstant> instants = Lists.mutable.empty();
|
||||
|
||||
if (active) {
|
||||
instants.addAllIterable(activeInstants(client.getActiveTimeline()));
|
||||
}
|
||||
if (archive) {
|
||||
instants.addAllIterable(archiveInstants(client.getArchivedTimeline()));
|
||||
}
|
||||
|
||||
MutableMap<String, Long> fileModifiedTimeMap = Lists.immutable.of(fileSystem.listStatus(metadataPath))
|
||||
.toMap(status -> status.getPath().toString(), FileStatus::getModificationTime);
|
||||
return instants
|
||||
.collect(instant -> {
|
||||
String instantPath = StrUtil.format("{}/{}", client.getMetaPath(), instant.getFileName());
|
||||
instant.setFileTime(fileModifiedTimeMap.getOrDefault(instantPath, 0L));
|
||||
return instant;
|
||||
})
|
||||
.sortThis(Comparator
|
||||
.<HudiInstant>comparingLong(instant -> Long.parseLong(instant.getTimestamp()))
|
||||
.thenComparingLong(HudiInstant::getFileTime))
|
||||
.toImmutable();
|
||||
}
|
||||
|
||||
private static ImmutableList<HoodieInstant> getAllInstants(HoodieDefaultTimeline timeline, FileSystem fileSystem, Path metadataPath) throws IOException {
|
||||
private static HudiInstant convert(HoodieInstant instant, String type) {
|
||||
return new HudiInstant(
|
||||
instant.getAction(),
|
||||
instant.getState().name(),
|
||||
instant.getTimestamp(),
|
||||
instant.getFileName(),
|
||||
0L,
|
||||
type
|
||||
);
|
||||
}
|
||||
|
||||
private static ImmutableList<HudiInstant> activeInstants(HoodieActiveTimeline timeline) {
|
||||
Set<String> committedTimestamps = timeline.getCommitsTimeline()
|
||||
.filterCompletedInstants()
|
||||
.getInstants()
|
||||
.map(HoodieInstant::getTimestamp)
|
||||
.collect(Collectors.toSet());
|
||||
List<String> compactionRequestedTimestamps = Arrays.stream(fileSystem.listStatus(metadataPath))
|
||||
.filter(status -> status.getPath().toString().endsWith(HoodieTimeline.REQUESTED_COMPACTION_EXTENSION))
|
||||
.map(status -> status.getPath().getName())
|
||||
.map(name -> ReUtil.get("^(\\d+)\\..+", name, 1))
|
||||
.filter(committedTimestamps::contains)
|
||||
.collect(Collectors.toList());
|
||||
return Lists.immutable.ofAll(timeline.getInstants()
|
||||
List<HudiInstant> instants = timeline.getInstants()
|
||||
.map(instant -> convert(instant, "active"))
|
||||
.map(instant -> {
|
||||
if (compactionRequestedTimestamps.contains(instant.getTimestamp())) {
|
||||
return new HoodieInstant(HoodieInstant.State.COMPLETED, HoodieTimeline.COMPACTION_ACTION, instant.getTimestamp());
|
||||
if (HoodieTimeline.COMPACTION_ACTION.equals(instant.getAction()) && committedTimestamps.contains(instant.getTimestamp())) {
|
||||
return new HudiInstant(
|
||||
HoodieTimeline.COMPACTION_ACTION,
|
||||
HoodieInstant.State.COMPLETED.name(),
|
||||
instant.getTimestamp(),
|
||||
instant.getFileName(),
|
||||
instant.getFileTime(),
|
||||
instant.getType()
|
||||
);
|
||||
}
|
||||
return instant;
|
||||
})
|
||||
.sorted(Comparator.comparingLong(i -> Long.parseLong(i.getTimestamp())))
|
||||
.collect(Collectors.toList()));
|
||||
.collect(Collectors.toList());
|
||||
return Lists.immutable.ofAll(instants);
|
||||
}
|
||||
|
||||
private static ImmutableList<HudiInstant> archiveInstants(HoodieArchivedTimeline timeline) {
|
||||
MutableList<HoodieInstant> instants = Lists.mutable.ofAll(timeline.getInstants().collect(Collectors.toList()));
|
||||
instants.forEach(instant -> logger.info(instant.toString()));
|
||||
MutableListMultimap<HoodieInstant.State, String> stateMap = instants.groupBy(HoodieInstant::getState).collectValues(HoodieInstant::getTimestamp);
|
||||
return instants
|
||||
.select(instant -> HoodieInstant.State.REQUESTED.equals(instant.getState()))
|
||||
.collect(instant -> convert(instant, "archive"))
|
||||
.collect(instant -> {
|
||||
if (stateMap.containsKeyAndValue(HoodieInstant.State.INVALID, instant.getTimestamp())) {
|
||||
instant.setState(HoodieInstant.State.INVALID.name());
|
||||
} else if (stateMap.containsKeyAndValue(HoodieInstant.State.COMPLETED, instant.getTimestamp())) {
|
||||
instant.setState(HoodieInstant.State.COMPLETED.name());
|
||||
} else if (stateMap.containsKeyAndValue(HoodieInstant.State.INFLIGHT, instant.getTimestamp())) {
|
||||
instant.setState(HoodieInstant.State.INFLIGHT.name());
|
||||
}
|
||||
return instant;
|
||||
})
|
||||
.toImmutable();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user