feat(executor-task): 数据扫描增加pulsar队列读取

This commit is contained in:
2024-01-19 14:36:41 +08:00
parent 521e82104f
commit 9140a39bf1
22 changed files with 796 additions and 105 deletions

View File

@@ -1,5 +1,6 @@
package com.lanyuanxiaoyao.service.executor.manager.controller;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.executor.manager.service.TaskService;
import java.io.IOException;
import org.eclipse.collections.api.list.ImmutableList;
@@ -27,14 +28,27 @@ public class TaskController {
@GetMapping("scan")
public String scan(
@RequestParam("hdfs") String hdfs,
@RequestParam("key") String key,
@RequestParam(value = "scan_log", defaultValue = "true") Boolean scanLog,
@RequestParam(value = "scan_data", defaultValue = "false") Boolean scanData,
@RequestParam(value = "hdfs", required = false) String hdfs,
@RequestParam(value = "pulsar", required = false) String pulsar,
@RequestParam(value = "pulsar_topic", required = false) String pulsarTopic,
@RequestParam(value = "scan_source", defaultValue = "false") Boolean scanSource,
@RequestParam(value = "scan_queue", defaultValue = "false") Boolean scanQueue,
@RequestParam(value = "scan_log", defaultValue = "false") Boolean scanLog,
@RequestParam(value = "scan_base", defaultValue = "false") Boolean scanBase,
@RequestParam(value = "scan_target", defaultValue = "false") Boolean scanTarget
) throws Exception {
return taskService.scanAvro(hdfs, key, scanLog, scanData, scanSource, scanTarget);
logger.info("Enter method: scan[key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget]. " + "key:" + key + "," + "hdfs:" + hdfs + "," + "pulsar:" + pulsar + "," + "pulsarTopic:" + pulsarTopic + "," + "scanSource:" + scanSource + "," + "scanQueue:" + scanQueue + "," + "scanLog:" + scanLog + "," + "scanBase:" + scanBase + "," + "scanTarget:" + scanTarget);
if (!scanSource && !scanQueue && !scanLog && !scanBase && !scanTarget) {
throw new RuntimeException("Must choose one mode");
}
if (scanQueue && (StrUtil.isBlank(pulsar) || StrUtil.isBlank(pulsar))) {
throw new RuntimeException("Pulsar topic or url cannot be empty");
}
if ((scanLog || scanBase) && StrUtil.isBlank(hdfs)) {
throw new RuntimeException("Hdfs path cannot be empty");
}
return taskService.scanAvro(key, hdfs, pulsar, pulsarTopic, scanSource, scanQueue, scanLog, scanBase, scanTarget);
}
@GetMapping("results")

View File

@@ -1,6 +1,8 @@
package com.lanyuanxiaoyao.service.executor.manager.service;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.map.MapBuilder;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.IdUtil;
import cn.hutool.core.util.StrUtil;
import com.eshore.odcp.hudi.connector.utils.executor.Runner;
@@ -108,11 +110,38 @@ public class TaskService {
return configuration;
}
public String scanAvro(String hdfs, String key, Boolean scanLog, Boolean scanData, Boolean scanSource, Boolean scanTarget) throws Exception {
public String scanAvro(
String key,
String hdfs,
String pulsar,
String pulsarTopic,
Boolean scanSource,
Boolean scanQueue,
Boolean scanLog,
Boolean scanBase,
Boolean scanTarget
) throws Exception {
String taskId = taskId();
Configuration configuration = generateConfiguration(taskId, "scan");
setEnvironment(configuration, "hdfs", hdfs);
MapBuilder<String, Object> builder = MapUtil.builder();
setEnvironment(configuration, "key", key);
builder.put("key", key);
if (scanLog || scanBase) {
setEnvironment(configuration, "hdfs", hdfs);
builder.put("scan_log", scanLog);
builder.put("scan_base", scanBase);
builder.put("hdfs", hdfs);
}
if (scanQueue) {
setEnvironment(configuration, "pulsar", pulsar);
setEnvironment(configuration, "pulsar_topic", pulsarTopic);
builder.put("scan_queue", true);
builder.put("pulsar", pulsar);
builder.put("pulsar_topic", pulsarTopic);
}
ApplicationId applicationId = Runner.run(
configuration,
"com.lanyuanxiaoyao.service.executor.task.DataScanner",
@@ -122,16 +151,7 @@ public class TaskService {
new TaskContext(
taskId,
executorConfiguration.getTaskResultPath(),
Maps.mutable.of(
"key",
key,
"hdfs",
hdfs,
"scan_log",
scanLog,
"scan_data",
scanData
)
Maps.mutable.ofMap(builder.build())
)
)
}

View File

@@ -2,7 +2,7 @@ spring:
application:
name: service-executor-manager
profiles:
include: random-port,common,discovery,metrics
include: random-port,common,discovery,metrics,forest
executor:
staging-directory: hdfs://b2/apps/datalake/yarn
history-server-archive-dir: hdfs://b2/apps/flink/completed-jobs/