feat(executor-task): 增加查询指定hudi表base文件最新的timestamp

根据LATEST_OP_TS来判断比较timestamp先后,排序后取最后的
This commit is contained in:
2024-01-30 12:31:57 +08:00
parent 4b2585984c
commit cd3b340270
13 changed files with 377 additions and 166 deletions

View File

@@ -1,3 +1,83 @@
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/all
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=B7901EEEC95E299E2F94FB74E3A979F5
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=BFE3689E5BBBBFD448B87BFEE0D23E83
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=880932A3A6DBB5983CB032B005EA2B3E
Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/all
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
@@ -382,102 +462,3 @@ Accept-Encoding: br,deflate,gzip,x-gzip
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=3FB05EC585838B1A8DC11D0425515571
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T143301.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=3FB05EC585838B1A8DC11D0425515571
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T143253.503.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=3FB05EC585838B1A8DC11D0425515571
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T143252.503.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=7602F2E53C5FE9BD0182453EBE62D056
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T142946.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=7602F2E53C5FE9BD0182453EBE62D056
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T142928.503.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Cookie: JSESSIONID=7602F2E53C5FE9BD0182453EBE62D056
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T142657.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.126.207.130:35690/hudi_services/service_scheduler/schedule/schedule_times
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.14 (Java/17.0.9)
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2024-01-15T142605.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.122.116.152:37496/zookeeper/get_data?path=/hudi/lock/running/sync/sync_lock_1542097983881048064
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.13 (Java/17.0.5)
Cookie: JSESSIONID=9B2804B24676C18ABB793E669D789275
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2023-05-14T000645.200.txt
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.122.116.152:37496/zookeeper/exists_path?path=/hudi/lock/running/sync/sync_lock_1542097983881048064
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.13 (Java/17.0.5)
Cookie: JSESSIONID=9B2804B24676C18ABB793E669D789275
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2023-05-14T000637.200.json
###
GET http://AxhEbscwsJDbYMH2:cYxg3b4PtWoVD5SjFayWxtnSVsjzRsg4@132.122.116.152:37496/zookeeper/exists_path?path=/hudi
Connection: Keep-Alive
User-Agent: Apache-HttpClient/4.5.13 (Java/17.0.5)
Cookie: JSESSIONID=62C1CD6C50E4C9B2B521DA398F17A0AD
Accept-Encoding: br,deflate,gzip,x-gzip
<> 2023-05-14T000603.200.json
###

View File

@@ -51,6 +51,14 @@ public class ExecutorTaskController {
return executorTaskService.scanAvro(key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget);
}
@GetMapping("latest_op_ts")
public String latestOpTs(@RequestParam("hdfs") String hdfs) throws Exception {
if (StrUtil.isBlank(hdfs)) {
throw new RuntimeException("Hdfs path cannot be empty");
}
return executorTaskService.scanLatestOpTs(hdfs);
}
@GetMapping("results")
public ImmutableList<String> results(
@RequestParam("task_id") String taskId,

View File

@@ -161,6 +161,31 @@ public class ExecutorTaskService {
return applicationId.toString();
}
public String scanLatestOpTs(String hdfs) throws Exception {
String taskId = taskId();
Configuration configuration = generateConfiguration(taskId, "latest_op_ts");
configuration.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("1024m"));
MapBuilder<String, Object> builder = MapUtil.builder();
builder.put("hdfs", hdfs);
ApplicationId applicationId = Runner.run(
configuration,
"com.lanyuanxiaoyao.service.executor.task.LatestOperationTimeScan",
new String[]{
TaskConstants.TASK_CONTEXT_OPTION,
mapper.writeValueAsString(
new TaskContext(
taskId,
executorConfiguration.getTaskResultPath(),
Maps.mutable.ofMap(builder.build())
)
)
}
);
return applicationId.toString();
}
@Cacheable(value = "results", sync = true)
@Retryable(Throwable.class)
public ImmutableList<String> taskResult(String taskId, Integer limit) throws IOException {

View File

@@ -41,9 +41,6 @@
</encoder>
</appender>
<logger name="com.zaxxer.hikari" level="ERROR"/>
<logger name="com.netflix.discovery.shared.resolver.aws.ConfigClusterResolver" level="WARN"/>
<root level="INFO">
<appender-ref ref="Loki"/>
<appender-ref ref="Console"/>

View File

@@ -2,25 +2,19 @@ package com.lanyuanxiaoyao.service.executor.task;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.configuration.ExecutorProvider;
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
import com.lanyuanxiaoyao.service.executor.task.functions.ReadHudiFile;
import com.lanyuanxiaoyao.service.executor.task.functions.pulsar.ReadPulsarSource;
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
import com.lanyuanxiaoyao.service.executor.task.helper.FlinkHelper;
import java.io.IOException;
import com.lanyuanxiaoyao.service.executor.task.helper.HdfsHelper;
import java.util.Map;
import java.util.Optional;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.fs.FSUtils;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -34,36 +28,6 @@ import org.slf4j.LoggerFactory;
public class DataScanner {
private static final Logger logger = LoggerFactory.getLogger(DataScanner.class);
private static ImmutableList<String> parsePaths(FileSystem fileSystem, ImmutableList<Path> paths) {
return paths
.asParallel(ExecutorProvider.EXECUTORS, 1)
.reject(path -> {
try {
return FSUtils.getFileSize(fileSystem, path) < 1;
} catch (IOException e) {
logger.error("Get file size error", e);
}
return true;
})
.groupBy(FSUtils::getFileIdFromFilePath)
.multiValuesView()
.collect(pathList -> pathList
.toSortedListBy(path -> {
String commitTime = FSUtils.getCommitTime(path.getName());
try {
return Long.valueOf(commitTime);
} catch (Throwable throwable) {
return 0L;
}
})
.getLastOptional())
.select(Optional::isPresent)
.collect(Optional::get)
.collect(Path::toString)
.toList()
.toImmutable();
}
public static void main(String[] args) throws Exception {
TaskContext taskContext = ArgumentsHelper.getContext(args);
logger.info("Context: {}", taskContext);
@@ -102,30 +66,13 @@ public class DataScanner {
if (scanLog || scanBase) {
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
String hdfs = (String) metadata.get("hdfs");
Configuration configuration = new Configuration();
FileSystem fileSystem = FileSystem.get(configuration);
if (!fileSystem.exists(new Path(hdfs))) {
throw new RuntimeException(StrUtil.format("HDFS {} is not exists", hdfs));
}
FileSystem fileSystem = FileSystem.get(new Configuration());
HdfsHelper.checkHdfsPath(fileSystem, hdfs);
ImmutableList<Path> paths = Lists.immutable.of(fileSystem.listStatus(new Path(hdfs)))
.reject(status -> StrUtil.equals(".hoodie", status.getPath().getName()))
.flatCollect(status -> {
try {
if (status.isDirectory()) {
return Lists.immutable.of(fileSystem.listStatus(status.getPath()));
} else {
return Lists.immutable.of(status);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.collect(FileStatus::getPath);
if (scanLog) {
logger.info("Scan log hdfs: {}", hdfs);
ImmutableList<String> logPaths = paths.select(FSUtils::isLogFile).collect(Path::toString);
int parallelism = Math.max(1, Math.min(logPaths.size() / 20, 100));
ImmutableList<String> logPaths = HdfsHelper.logPaths(fileSystem, hdfs);
int parallelism = HdfsHelper.logScanParallelismPredict(logPaths);
totalParallelism = Math.max(totalParallelism, parallelism);
DataStream<RecordView> stream = environment
.fromCollection(logPaths.toList())
@@ -141,11 +88,11 @@ public class DataScanner {
}
if (scanBase) {
logger.info("Scan base hdfs: {}", hdfs);
ImmutableList<String> dataPaths = parsePaths(fileSystem, paths.select(FSUtils::isBaseFile));
int parallelism = Math.max(1, Math.min(dataPaths.size() / 2, 500));
ImmutableList<String> basePaths = HdfsHelper.latestBasePaths(fileSystem, hdfs);
int parallelism = HdfsHelper.baseScanParallelismPredict(basePaths);
totalParallelism = Math.max(totalParallelism, parallelism);
DataStream<RecordView> stream = environment
.fromCollection(dataPaths.toList())
.fromCollection(basePaths.toList())
.name("Read base paths")
.flatMap(new ReadHudiFile())
.name("Read hudi file")

View File

@@ -0,0 +1,76 @@
package com.lanyuanxiaoyao.service.executor.task;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil;
import com.eshore.odcp.hudi.connector.Constants;
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
import com.lanyuanxiaoyao.service.executor.task.functions.ReadHudiFile;
import com.lanyuanxiaoyao.service.executor.task.helper.ArgumentsHelper;
import com.lanyuanxiaoyao.service.executor.task.helper.FlinkHelper;
import com.lanyuanxiaoyao.service.executor.task.helper.HdfsHelper;
import org.apache.flink.api.common.functions.RichReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.CloseableIterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 查找base文件最后的opts
*
* @author lanyuanxiaoyao
* @date 2024-01-22
*/
public class LatestOperationTimeScan {
private static final Logger logger = LoggerFactory.getLogger(LatestOperationTimeScan.class);
public static void main(String[] args) throws Exception {
TaskContext taskContext = ArgumentsHelper.getContext(args);
logger.info("Context: {}", taskContext);
ArgumentsHelper.checkMetadata(taskContext, "hdfs");
String hdfs = (String) taskContext.getMetadata().get("hdfs");
FileSystem fileSystem = FileSystem.get(new Configuration());
HdfsHelper.checkHdfsPath(fileSystem, hdfs);
ImmutableList<String> basePaths = HdfsHelper.latestBasePaths(fileSystem, hdfs);
int parallelism = HdfsHelper.baseScanParallelismPredict(basePaths);
StreamExecutionEnvironment environment = FlinkHelper.getBatchEnvironment();
environment.setParallelism(parallelism);
String maxValue = "0";
try (CloseableIterator<String> iterator = environment
.fromCollection(basePaths.toList())
.name("Read base paths")
.flatMap(new ReadHudiFile())
.name("Read hudi file")
.setParallelism(parallelism)
.map(view -> (String) view.getAttributes().getOrDefault(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, "0"))
.map(time -> new Tuple2<>(RandomUtil.randomInt(parallelism), time), TypeInformation.of(new TypeHint<Tuple2<Integer, String>>() {
}))
.keyBy(tuple -> tuple.f0)
.reduce(new RichReduceFunction<Tuple2<Integer, String>>() {
@Override
public Tuple2<Integer, String> reduce(Tuple2<Integer, String> value1, Tuple2<Integer, String> value2) throws Exception {
return value1.f1.compareTo(value2.f1) > 0 ? value1 : value2;
}
})
.map(tuple -> tuple.f1)
/*.sinkTo(FlinkHelper.createFileSink(taskContext))*/
.executeAndCollect("Find latest opts")) {
while (iterator.hasNext()) {
String item = iterator.next();
if (item.compareTo(maxValue) > 0) {
maxValue = item;
}
}
}
HdfsHelper.createResult(fileSystem, taskContext, StrUtil.trim(maxValue));
}
}

View File

@@ -1,6 +1,7 @@
package com.lanyuanxiaoyao.service.executor.task.functions;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.eshore.odcp.hudi.connector.Constants;
import com.lanyuanxiaoyao.service.executor.task.entity.RecordView;
import java.io.IOException;
@@ -60,7 +61,9 @@ public class ReadHudiFile implements FlatMapFunction<String, RecordView> {
String data = builder.toString();
RecordView recordView = new RecordView(operation, data, timestamp, source);
recordView.getAttributes().put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, latestOpTs);
if (StrUtil.isNotBlank(latestOpTs)) {
recordView.getAttributes().put(Constants.LATEST_OPERATION_TIMESTAMP_KEY_NAME, latestOpTs);
}
return recordView;
}

View File

@@ -28,7 +28,7 @@ public class FlinkHelper {
public static StreamExecutionEnvironment getBatchEnvironment() {
StreamExecutionEnvironment environment = getSteamEnvironment();
environment.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
environment.setRuntimeMode(RuntimeExecutionMode.BATCH);
return environment;
}

View File

@@ -0,0 +1,131 @@
package com.lanyuanxiaoyao.service.executor.task.helper;
import cn.hutool.core.collection.IterUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.configuration.ExecutorProvider;
import com.lanyuanxiaoyao.service.executor.core.TaskContext;
import java.io.IOException;
import java.util.Optional;
import java.util.function.Predicate;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.fs.FSUtils;
import org.eclipse.collections.api.factory.Lists;
import org.eclipse.collections.api.list.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* HDFS工具
*
* @author lanyuanxiaoyao
* @date 2024-01-22
*/
public class HdfsHelper {
private static final Logger logger = LoggerFactory.getLogger(HdfsHelper.class);
public static void checkHdfsPath(FileSystem fileSystem, String path) throws IOException {
checkHdfsPath(fileSystem, new Path(path));
}
public static void checkHdfsPath(FileSystem fileSystem, Path path) throws IOException {
if (!fileSystem.exists(path)) {
throw new RuntimeException(StrUtil.format("HDFS {} is not exists", path.toString()));
}
}
public static Integer logScanParallelismPredict(Iterable<?> list) {
return logScanParallelismPredict(IterUtil.size(list));
}
public static Integer logScanParallelismPredict(Integer pathNum) {
return Math.max(1, Math.min(pathNum / 20, 100));
}
public static Integer baseScanParallelismPredict(Iterable<?> list) {
return baseScanParallelismPredict(IterUtil.size(list));
}
public static Integer baseScanParallelismPredict(Integer pathNum) {
return Math.max(1, Math.min(pathNum / 2, 500));
}
public static ImmutableList<String> latestBasePaths(FileSystem fileSystem, String root) throws IOException {
return latestBasePaths(fileSystem, new Path(root));
}
public static ImmutableList<String> latestBasePaths(FileSystem fileSystem, Path root) throws IOException {
return basePaths(fileSystem, root)
.asParallel(ExecutorProvider.EXECUTORS, 1)
.collect(Path::new)
.reject(path -> {
try {
return FSUtils.getFileSize(fileSystem, path) < 1;
} catch (IOException e) {
logger.error("Get file size error", e);
}
return true;
})
.groupBy(FSUtils::getFileIdFromFilePath)
.multiValuesView()
.collect(pathList -> pathList
.toSortedListBy(path -> {
String commitTime = FSUtils.getCommitTime(path.getName());
try {
return Long.valueOf(commitTime);
} catch (Throwable throwable) {
return 0L;
}
})
.getLastOptional())
.select(Optional::isPresent)
.collect(Optional::get)
.collect(Path::toString)
.toList()
.toImmutable();
}
public static ImmutableList<String> basePaths(FileSystem fileSystem, String root) throws IOException {
return basePaths(fileSystem, new Path(root));
}
public static ImmutableList<String> basePaths(FileSystem fileSystem, Path root) throws IOException {
return hdfsPaths(fileSystem, root, FSUtils::isBaseFile);
}
public static ImmutableList<String> logPaths(FileSystem fileSystem, String root) throws IOException {
return logPaths(fileSystem, new Path(root));
}
public static ImmutableList<String> logPaths(FileSystem fileSystem, Path root) throws IOException {
return hdfsPaths(fileSystem, root, FSUtils::isLogFile);
}
public static ImmutableList<String> hdfsPaths(FileSystem fileSystem, Path root, Predicate<Path> check) throws IOException {
return Lists.immutable.of(fileSystem.listStatus(root))
.reject(status -> StrUtil.equals(".hoodie", status.getPath().getName()))
.flatCollect(status -> {
try {
if (status.isDirectory()) {
return Lists.immutable.of(fileSystem.listStatus(status.getPath()));
} else {
return Lists.immutable.of(status);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.collect(FileStatus::getPath)
.select(check::test)
.collect(Path::toString);
}
public static void createResult(FileSystem fileSystem, TaskContext context, String result) throws IOException {
Path resultPath = new Path(context.getResultPath() + "/" + context.getTaskId() + "/task-result");
try(FSDataOutputStream outputStream = fileSystem.create(resultPath)) {
outputStream.writeUTF(result);
}
}
}

View File

@@ -0,0 +1,11 @@
package com.lanyuanxiaoyao.service.executor.task;
/**
* @author lanyuanxiaoyao
* @date 2024-01-30
*/
public class HashcodeTest {
public static void main(String[] args) {
System.out.println("hello world".hashCode() % 180);
}
}

View File

@@ -24,6 +24,9 @@ public interface TaskService {
@Query("scan_base") Boolean scanBase
);
@Get(value = "/task/latest_op_ts", readTimeout = 2 * 60 * 1000)
String latestOpTs(@Query("hdfs") String hdfs);
@Get("/task/results")
ImmutableList<String> results(@Query("task_id") String taskId);

View File

@@ -84,6 +84,35 @@ function taskTab() {
}
]
},
{
type: 'form',
title: '检索最后操作时间',
actions: [
{
type: 'submit',
label: '提交任务',
actionType: 'ajax',
api: {
method: 'get',
url: '${base}/task/latest_op_ts',
data: {
hdfs: '${hdfs|default:undefined}',
}
}
},
],
body: [
{
type: 'input-text',
name: 'hdfs',
label: 'HDFS路经',
required: true,
clearable: true,
description: '输入表HDFS路径',
autoComplete: '${base}/table/all_hdfs?key=$term',
},
]
},
{
type: 'crud',
api: {

View File

@@ -43,7 +43,7 @@ Content-Type: application/json
]
### 清空队列
GET http://{{username}}:{{password}}@b12s25.hdp.dc:26625/queue/clear/compaction-queue-pre
GET http://{{username}}:{{password}}@b12s25.hdp.dc:36781/queue/clear/compaction-queue-pre
### Info
GET http://{{username}}:{{password}}@132.122.116.146:18166/info/compaction_metrics?flink_job_id=1542097996099055616&alias=acct_acct_item_fs&filter_completes=true