feat(launcher): 增加单独指定集群进行手动压缩的接口

This commit is contained in:
v-zhangjc9
2024-04-26 10:16:11 +08:00
parent 1808c30786
commit 053a9222cd
5 changed files with 143 additions and 87 deletions

View File

@@ -1,12 +1,15 @@
package com.lanyuanxiaoyao.service.command.commands; package com.lanyuanxiaoyao.service.command.commands;
import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.command.utils.CommandLineUtils; import com.lanyuanxiaoyao.service.command.utils.CommandLineUtils;
import com.lanyuanxiaoyao.service.command.utils.TableUtils; import com.lanyuanxiaoyao.service.command.utils.TableUtils;
import com.lanyuanxiaoyao.service.common.Constants; import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.TableMeta; import com.lanyuanxiaoyao.service.common.entity.TableMeta;
import com.lanyuanxiaoyao.service.configuration.entity.yarn.YarnClusters;
import com.lanyuanxiaoyao.service.forest.service.InfoService; import com.lanyuanxiaoyao.service.forest.service.InfoService;
import com.lanyuanxiaoyao.service.forest.service.ScheduleService; import com.lanyuanxiaoyao.service.forest.service.ScheduleService;
import com.lanyuanxiaoyao.service.forest.service.launcher.LaunchersService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.shell.standard.ShellComponent; import org.springframework.shell.standard.ShellComponent;
@@ -27,10 +30,14 @@ public class CompactionCommand extends AbstractUtilShellComponent {
private final InfoService infoService; private final InfoService infoService;
private final ScheduleService scheduleService; private final ScheduleService scheduleService;
private final YarnClusters yarnClusters;
private final LaunchersService launchersService;
public CompactionCommand(InfoService infoService, ScheduleService scheduleService) { public CompactionCommand(InfoService infoService, ScheduleService scheduleService, YarnClusters yarnClusters, LaunchersService launchersService) {
this.infoService = infoService; this.infoService = infoService;
this.scheduleService = scheduleService; this.scheduleService = scheduleService;
this.yarnClusters = yarnClusters;
this.launchersService = launchersService;
} }
@ShellMethod("启动表压缩任务") @ShellMethod("启动表压缩任务")
@@ -59,4 +66,40 @@ public class CompactionCommand extends AbstractUtilShellComponent {
} }
); );
} }
@ShellMethod("启动表压缩任务")
public String compactionDryRun(
@ShellOption(
help = "集群",
defaultValue = ""
) String cluster,
@ShellOption(help = "Flink job id") Long flinkJobId,
@ShellOption(help = "别名") String alias,
@ShellOption(
help = "Ignore double check",
defaultValue = "false"
) Boolean ignoreCheck
) {
if (StrUtil.isBlank(cluster)) {
cluster = yarnClusters.getDefaultCompactionCluster();
logger.info("Use default compaction cluster: {}", cluster);
}
String targetCluster = cluster;
TableMeta meta = infoService.tableMetaDetail(flinkJobId, alias);
return CommandLineUtils.generateResultLines(
() -> {
if (ObjectUtil.isEmpty(meta)) {
return "没有找到指定的表信息";
} else {
System.out.println(TableUtils.makeTableMeta(meta));
if (doubleCheck(RUN_CONFIRMATION_MESSAGE, ignoreCheck)) {
launchersService.compactionStart(targetCluster, flinkJobId, alias);
return Constants.OPERATION_DONE;
} else {
return Constants.OPERATION_CANCEL;
}
}
}
);
}
} }

View File

@@ -14,6 +14,9 @@ public interface LauncherService {
@Get("/launcher/synchronizer/stop") @Get("/launcher/synchronizer/stop")
void syncStop(@Query("flink_job_id") Long flinkJobId); void syncStop(@Query("flink_job_id") Long flinkJobId);
@Get("/launcher/compaction/start")
void compactionStart(@Query("flink_job_id") Long flinkJobId, @Query("alias") String alias);
@Get("/launcher/compaction/stop") @Get("/launcher/compaction/stop")
void compactionStop(@Query("flink_job_id") Long flinkJobId, @Query("alias") String alias); void compactionStop(@Query("flink_job_id") Long flinkJobId, @Query("alias") String alias);

View File

@@ -46,6 +46,11 @@ public class LaunchersService {
return serviceMap.valuesView().toList().toImmutable(); return serviceMap.valuesView().toList().toImmutable();
} }
public void compactionStart(String cluster, Long flinkJobId, String alias) {
LauncherService service = getService(cluster);
service.compactionStart(flinkJobId, alias);
}
public void compactionStop(Long flinkJobId, String alias) { public void compactionStop(Long flinkJobId, String alias) {
for (LauncherService service : getServices()) { for (LauncherService service : getServices()) {
service.compactionStop(flinkJobId, alias); service.compactionStop(flinkJobId, alias);

View File

@@ -1,5 +1,6 @@
package com.lanyuanxiaoyao.service.launcher.compaction.controller; package com.lanyuanxiaoyao.service.launcher.compaction.controller;
import cn.hutool.core.util.IdUtil;
import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.lanyuanxiaoyao.service.common.Constants; import com.lanyuanxiaoyao.service.common.Constants;
@@ -8,6 +9,7 @@ import com.lanyuanxiaoyao.service.common.entity.SyncState;
import com.lanyuanxiaoyao.service.configuration.utils.QueueUtil; import com.lanyuanxiaoyao.service.configuration.utils.QueueUtil;
import com.lanyuanxiaoyao.service.forest.service.InfoService; import com.lanyuanxiaoyao.service.forest.service.InfoService;
import com.lanyuanxiaoyao.service.forest.service.ZookeeperService; import com.lanyuanxiaoyao.service.forest.service.ZookeeperService;
import com.lanyuanxiaoyao.service.launcher.compaction.service.CompactionService;
import com.lanyuanxiaoyao.service.launcher.configuration.ClusterConfiguration; import com.lanyuanxiaoyao.service.launcher.configuration.ClusterConfiguration;
import java.io.IOException; import java.io.IOException;
import java.util.EnumSet; import java.util.EnumSet;
@@ -41,13 +43,15 @@ public class CompactionController {
private final ClusterConfiguration clusterConfiguration; private final ClusterConfiguration clusterConfiguration;
private final InfoService infoService; private final InfoService infoService;
private final ZookeeperService zookeeperService; private final ZookeeperService zookeeperService;
private final CompactionService compactionService;
private final YarnClient yarnClient; private final YarnClient yarnClient;
public CompactionController(DiscoveryClient discoveryClient, ClusterConfiguration clusterConfiguration, InfoService infoService, ZookeeperService zookeeperService) { public CompactionController(DiscoveryClient discoveryClient, ClusterConfiguration clusterConfiguration, InfoService infoService, ZookeeperService zookeeperService, CompactionService compactionService) {
this.discoveryClient = discoveryClient; this.discoveryClient = discoveryClient;
this.clusterConfiguration = clusterConfiguration; this.clusterConfiguration = clusterConfiguration;
this.infoService = infoService; this.infoService = infoService;
this.zookeeperService = zookeeperService; this.zookeeperService = zookeeperService;
this.compactionService = compactionService;
yarnClient = YarnClient.createYarnClient(); yarnClient = YarnClient.createYarnClient();
yarnClient.init(new Configuration()); yarnClient.init(new Configuration());
@@ -62,6 +66,15 @@ public class CompactionController {
} }
} }
@GetMapping("start")
public void start(
@RequestParam("flink_job_id") Long flinkJobId,
@RequestParam("alias") String alias
) throws Exception {
logger.info("Enter method: start[flinkJobId, alias]. " + "flinkJobId:" + flinkJobId + "," + "alias:" + alias);
compactionService.compact(IdUtil.nanoId(10), flinkJobId, alias);
}
@GetMapping("stop") @GetMapping("stop")
public void stop( public void stop(
@RequestParam("flink_job_id") Long flinkJobId, @RequestParam("flink_job_id") Long flinkJobId,

View File

@@ -3,7 +3,6 @@ package com.lanyuanxiaoyao.service.launcher.compaction.service;
import cn.hutool.core.thread.ThreadUtil; import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.lanyuanxiaoyao.service.common.Constants; import com.lanyuanxiaoyao.service.common.Constants;
import com.lanyuanxiaoyao.service.common.entity.FlinkJob; import com.lanyuanxiaoyao.service.common.entity.FlinkJob;
@@ -99,7 +98,7 @@ public class CompactionService {
java.util.concurrent.ExecutorService threadPool = Executors.newWorkStealingPool(5); java.util.concurrent.ExecutorService threadPool = Executors.newWorkStealingPool(5);
if (Boolean.FALSE.equals(QueueUtil.isEmpty(discoveryClient, clusterConfiguration.getCompactionQueueName()))) { if (Boolean.FALSE.equals(QueueUtil.isEmpty(discoveryClient, clusterConfiguration.getCompactionQueueName()))) {
for (int index = 0; index < 5; index++) { for (int index = 0; index < 5; index++) {
threadPool.submit(this::compact); threadPool.submit(() -> compact());
} }
threadPool.shutdown(); threadPool.shutdown();
while (!threadPool.isTerminated()) { while (!threadPool.isTerminated()) {
@@ -124,39 +123,69 @@ public class CompactionService {
ScheduleJob job = item.getData(); ScheduleJob job = item.getData();
LogHelper.setMdcFlinkJobAndAlias(job.getFlinkJobId(), job.getAlias()); LogHelper.setMdcFlinkJobAndAlias(job.getFlinkJobId(), job.getAlias());
logger.info("Receive job[{}]({}): {}", item.getTraceId(), item.getCreateTime(), item.getData()); logger.info("Receive job[{}]({}): {}", item.getTraceId(), item.getCreateTime(), item.getData());
// 构造任务相关的锁
String lockPath = NameHelper.compactionLauncherLockPath(job.getFlinkJobId(), job.getAlias());
InterProcessLock lock = new InterProcessMutex(zookeeperClient, lockPath);
try { try {
if (lock.acquire(2, TimeUnit.SECONDS)) { compact(job.getBatch(), job.getFlinkJobId(), job.getAlias());
Stat stat = zookeeperClient.checkExists().forPath(NameHelper.compactionRunningLockPath(job.getFlinkJobId(), job.getAlias())); clearHolder(holder);
if (ObjectUtil.isNotNull(stat)) { } catch (JobCannotRunningException e) {
logger.info("Job {} {} is running", job.getFlinkJobId(), job.getAlias());
// 运行中的任务放在持有容器中 // 运行中的任务放在持有容器中
holder.add(item); holder.add(item);
// 进入下一轮,由于最外层有一个 finally所以直接 continue 也会尝试获取新的任务 // 进入下一轮,由于最外层有一个 finally所以直接 continue 也会尝试获取新的任务
continue; } catch (Exception e) {
logger.warn(StrUtil.format("[{}] [{}] Try compaction wrong ", job.getFlinkJobId(), job.getAlias()), e);
String failCount = item.getMetadata(Constants.SCHEDULE_JOB_FAIL_COUNT);
if (StrUtil.isNotBlank(failCount)) {
int fail = Integer.parseInt(failCount);
if (fail > 5) {
logger.error("Job {} cause unaccepted error", item);
return;
} else {
item.getMetadata().put(Constants.SCHEDULE_JOB_FAIL_COUNT, String.valueOf(fail + 1));
} }
FlinkJob flinkJob = infoService.flinkJobDetail(job.getFlinkJobId()); } else {
TableMeta meta = infoService.tableMetaDetail(job.getFlinkJobId(), job.getAlias()); item.getMetadata().put(Constants.SCHEDULE_JOB_FAIL_COUNT, "1");
}
QueueUtil.add(discoveryClient, this.mapper, Constants.COMPACTION_QUEUE_PRE, item);
}
} else {
logger.warn("Schedule job is empty. [{}]({}): {}", item.getTraceId(), item.getCreateTime(), item);
}
} finally {
// 无论如何尝试获取下个任务
item = QueueUtil.poll(discoveryClient, this.mapper, clusterConfiguration.getCompactionQueueName());
LogHelper.removeMdc(Constants.LOG_JOB_ID_LABEL, Constants.LOG_FLINK_JOB_ID_LABEL, Constants.LOG_ALIAS_LABEL);
}
}
clearHolder(holder);
}
public void compact(String batch, Long flinkJobId, String alias) throws Exception {
// 构造任务相关的锁
String lockPath = NameHelper.compactionLauncherLockPath(flinkJobId, alias);
InterProcessLock lock = new InterProcessMutex(zookeeperClient, lockPath);
try {
if (lock.acquire(2, TimeUnit.SECONDS)) {
Stat stat = zookeeperClient.checkExists().forPath(NameHelper.compactionRunningLockPath(flinkJobId, alias));
if (ObjectUtil.isNotNull(stat)) {
logger.info("Job {} {} is running", flinkJobId, alias);
throw new JobCannotRunningException();
}
FlinkJob flinkJob = infoService.flinkJobDetail(flinkJobId);
TableMeta meta = infoService.tableMetaDetail(flinkJobId, alias);
if (TableMetaHelper.existsTag(meta, Constants.TAGS_NO_COMPACT)) { if (TableMetaHelper.existsTag(meta, Constants.TAGS_NO_COMPACT)) {
logger.warn("[{}] [{}] Table tags no compact", flinkJob.getId(), meta.getAlias()); logger.warn("[{}] [{}] Table tags no compact", flinkJob.getId(), meta.getAlias());
clearHolder(holder); return;
continue;
} }
logger.info("[{}] [{}] Execute job", flinkJob.getId(), meta.getAlias()); logger.info("[{}] [{}] Execute job", flinkJob.getId(), meta.getAlias());
// 判断是否存在 Hudi 表,提前结束掉 // 判断是否存在 Hudi 表,提前结束掉
if (!hudiService.existsHudiTable(flinkJob.getId(), meta.getAlias())) { if (!hudiService.existsHudiTable(flinkJob.getId(), meta.getAlias())) {
logger.info("[{}] [{}] Hudi table not found", flinkJob.getId(), meta.getAlias()); logger.info("[{}] [{}] Hudi table not found", flinkJob.getId(), meta.getAlias());
clearHolder(holder); return;
continue;
} }
// 获取待压缩的时间点 // 获取待压缩的时间点
ImmutableList<HudiInstant> selectedInstants = hudiService.timelinePendingCompactionList(flinkJob.getId(), meta.getAlias()); ImmutableList<HudiInstant> selectedInstants = hudiService.timelinePendingCompactionList(flinkJob.getId(), meta.getAlias());
if (ObjectUtil.isEmpty(selectedInstants)) { if (ObjectUtil.isEmpty(selectedInstants)) {
logger.info("[{}] [{}] Table not need to compact", flinkJob.getId(), meta.getAlias()); logger.info("[{}] [{}] Table not need to compact", flinkJob.getId(), meta.getAlias());
clearHolder(holder); return;
continue;
} }
logger.info("[{}] [{}] Selected Instants: {}", flinkJob.getId(), meta.getAlias(), selectedInstants.makeString(",")); logger.info("[{}] [{}] Selected Instants: {}", flinkJob.getId(), meta.getAlias(), selectedInstants.makeString(","));
// 计算待压缩的文件数 // 计算待压缩的文件数
@@ -170,7 +199,7 @@ public class CompactionService {
logger.info("[{}] [{}] Execution", flinkJob.getId(), meta.getAlias()); logger.info("[{}] [{}] Execution", flinkJob.getId(), meta.getAlias());
String applicationId = Failsafe.with(RETRY_POLICY) String applicationId = Failsafe.with(RETRY_POLICY)
.get(() -> executorService.runCompaction( .get(() -> executorService.runCompaction(
job.getBatch(), batch,
flinkJob, flinkJob,
meta, meta,
hadoopConfiguration.getKerberosKeytabPath(), hadoopConfiguration.getKerberosKeytabPath(),
@@ -180,26 +209,10 @@ public class CompactionService {
).toString()); ).toString());
Failsafe.with(RETRY_POLICY) Failsafe.with(RETRY_POLICY)
.run(() -> infoService.saveCompactionId(flinkJob.getId(), meta.getAlias(), applicationId)); .run(() -> infoService.saveCompactionId(flinkJob.getId(), meta.getAlias(), applicationId));
clearHolder(holder);
} else { } else {
logger.warn("Un acquire lock for " + item.getId()); logger.warn("Un acquire lock for " + alias);
holder.add(item); throw new JobCannotRunningException();
} }
} catch (Exception e) {
logger.warn(StrUtil.format("[{}] [{}] Try lock something wrong ", job.getFlinkJobId(), job.getAlias()), e);
String failCount = item.getMetadata(Constants.SCHEDULE_JOB_FAIL_COUNT);
if (StrUtil.isNotBlank(failCount)) {
int fail = Integer.parseInt(failCount);
if (fail > 5) {
logger.error("Job {} cause unaccepted error", item);
continue;
} else {
item.getMetadata().put(Constants.SCHEDULE_JOB_FAIL_COUNT, String.valueOf(fail + 1));
}
} else {
item.getMetadata().put(Constants.SCHEDULE_JOB_FAIL_COUNT, "1");
}
QueueUtil.add(discoveryClient, this.mapper, Constants.COMPACTION_QUEUE_PRE, item);
} finally { } finally {
// 无论如何,尝试解锁 // 无论如何,尝试解锁
try { try {
@@ -210,16 +223,6 @@ public class CompactionService {
logger.error("Release lock failure " + lockPath, e); logger.error("Release lock failure " + lockPath, e);
} }
} }
} else {
logger.warn("Schedule job is empty. [{}]({}): {}", item.getTraceId(), item.getCreateTime(), item);
}
} finally {
// 无论如何尝试获取下个任务
item = QueueUtil.poll(discoveryClient, this.mapper, clusterConfiguration.getCompactionQueueName());
LogHelper.removeMdc(Constants.LOG_JOB_ID_LABEL, Constants.LOG_FLINK_JOB_ID_LABEL, Constants.LOG_ALIAS_LABEL);
}
}
clearHolder(holder);
} }
private void clearHolder(MutableList<QueueItem<ScheduleJob>> holder) { private void clearHolder(MutableList<QueueItem<ScheduleJob>> holder) {
@@ -259,16 +262,5 @@ public class CompactionService {
return Math.toIntExact(parallelism); return Math.toIntExact(parallelism);
} }
private QueueItem<ScheduleJob> deserialize(String body) { private static final class JobCannotRunningException extends Exception {}
if (StrUtil.isBlank(body)) {
return null;
}
try {
return mapper.readValue(body, new TypeReference<QueueItem<ScheduleJob>>() {
});
} catch (Throwable error) {
logger.error("Schedule job parse error", error);
return null;
}
}
} }