feat(executor-task): 增加查询指定hudi表base文件最新的timestamp

根据LATEST_OP_TS来判断比较timestamp先后,排序后取最后的
This commit is contained in:
2024-01-30 12:31:57 +08:00
parent 4b2585984c
commit cd3b340270
13 changed files with 377 additions and 166 deletions

View File

@@ -51,6 +51,14 @@ public class ExecutorTaskController {
return executorTaskService.scanAvro(key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget);
}
@GetMapping("latest_op_ts")
public String latestOpTs(@RequestParam("hdfs") String hdfs) throws Exception {
if (StrUtil.isBlank(hdfs)) {
throw new RuntimeException("Hdfs path cannot be empty");
}
return executorTaskService.scanLatestOpTs(hdfs);
}
@GetMapping("results")
public ImmutableList<String> results(
@RequestParam("task_id") String taskId,

View File

@@ -161,6 +161,31 @@ public class ExecutorTaskService {
return applicationId.toString();
}
public String scanLatestOpTs(String hdfs) throws Exception {
String taskId = taskId();
Configuration configuration = generateConfiguration(taskId, "latest_op_ts");
configuration.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("1024m"));
MapBuilder<String, Object> builder = MapUtil.builder();
builder.put("hdfs", hdfs);
ApplicationId applicationId = Runner.run(
configuration,
"com.lanyuanxiaoyao.service.executor.task.LatestOperationTimeScan",
new String[]{
TaskConstants.TASK_CONTEXT_OPTION,
mapper.writeValueAsString(
new TaskContext(
taskId,
executorConfiguration.getTaskResultPath(),
Maps.mutable.ofMap(builder.build())
)
)
}
);
return applicationId.toString();
}
@Cacheable(value = "results", sync = true)
@Retryable(Throwable.class)
public ImmutableList<String> taskResult(String taskId, Integer limit) throws IOException {

View File

@@ -41,9 +41,6 @@
</encoder>
</appender>
<logger name="com.zaxxer.hikari" level="ERROR"/>
<logger name="com.netflix.discovery.shared.resolver.aws.ConfigClusterResolver" level="WARN"/>
<root level="INFO">
<appender-ref ref="Loki"/>
<appender-ref ref="Console"/>