refactor: ProbeEngine 调度引擎重写为 per-target setTimeout 链
将 per-group setInterval + groupBy 调度模式改为 per-target setTimeout 链, 实现 catch-up 语义(超时后立即补执行)、AbortController 优雅停止、 循环内错误隔离和 overrun warn 日志。 移除 groupBy/probeGroup/timers,新增 sleep/runLoop/runOnce。 新增 croner 依赖供后续 cron 表达式支持使用。
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
import { groupBy, isError, Semaphore } from "es-toolkit";
|
||||
import { isError, Semaphore } from "es-toolkit";
|
||||
|
||||
import type { Logger } from "../logger";
|
||||
import type { ProbeStore } from "./store";
|
||||
@@ -11,14 +11,15 @@ import { checkerRegistry } from "./runner";
|
||||
const PRUNE_INTERVAL_MS = 3600000;
|
||||
|
||||
export class ProbeEngine {
|
||||
private abort: AbortController | null = null;
|
||||
private lastMatched = new Map<string, boolean>();
|
||||
private logger: Logger;
|
||||
private pruneTimer: null | ReturnType<typeof setInterval> = null;
|
||||
private retentionMs: number;
|
||||
private semaphore: Semaphore;
|
||||
private store: ProbeStore;
|
||||
private targetIds = new Set<string>();
|
||||
private targets: ResolvedTargetBase[];
|
||||
private timers: Array<ReturnType<typeof setInterval>> = [];
|
||||
|
||||
constructor(
|
||||
store: ProbeStore,
|
||||
@@ -37,32 +38,28 @@ export class ProbeEngine {
|
||||
}
|
||||
|
||||
start(): void {
|
||||
const groups = groupBy(this.targets, (t) => t.intervalMs);
|
||||
this.abort = new AbortController();
|
||||
const signal = this.abort.signal;
|
||||
|
||||
for (const [intervalMs, groupTargets] of Object.entries(groups)) {
|
||||
void this.probeGroup(groupTargets);
|
||||
|
||||
const timer = setInterval(() => {
|
||||
void this.probeGroup(groupTargets);
|
||||
}, Number(intervalMs));
|
||||
|
||||
this.timers.push(timer);
|
||||
for (const target of this.targets) {
|
||||
void this.runLoop(target, signal);
|
||||
}
|
||||
|
||||
if (this.retentionMs > 0) {
|
||||
this.store.prune(this.retentionMs);
|
||||
const pruneTimer = setInterval(() => {
|
||||
this.pruneTimer = setInterval(() => {
|
||||
this.store.prune(this.retentionMs);
|
||||
}, PRUNE_INTERVAL_MS);
|
||||
this.timers.push(pruneTimer);
|
||||
}
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
for (const timer of this.timers) {
|
||||
clearInterval(timer);
|
||||
this.abort?.abort();
|
||||
this.abort = null;
|
||||
if (this.pruneTimer) {
|
||||
clearInterval(this.pruneTimer);
|
||||
this.pruneTimer = null;
|
||||
}
|
||||
this.timers = [];
|
||||
}
|
||||
|
||||
private initStateCache(): void {
|
||||
@@ -108,44 +105,6 @@ export class ProbeEngine {
|
||||
this.lastMatched.set(result.targetId, current);
|
||||
}
|
||||
|
||||
private async probeGroup(targets: ResolvedTargetBase[]): Promise<void> {
|
||||
const results = await Promise.allSettled(
|
||||
targets.map(async (target) => {
|
||||
await this.semaphore.acquire();
|
||||
try {
|
||||
return await this.runCheck(target);
|
||||
} finally {
|
||||
this.semaphore.release();
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
for (const [index, result] of results.entries()) {
|
||||
if (result.status === "fulfilled") {
|
||||
this.writeResult(result.value);
|
||||
this.logStateChange(result.value);
|
||||
this.logCheckDebug(result.value);
|
||||
} else {
|
||||
const target = targets[index];
|
||||
if (target) {
|
||||
this.logger.error(
|
||||
{ reason: formatReason(result.reason), targetId: target.id, targetType: target.type },
|
||||
`探针执行失败: ${formatReason(result.reason)}`,
|
||||
);
|
||||
this.writeResult({
|
||||
detail: null,
|
||||
durationMs: null,
|
||||
failure: errorFailure("internal", "engine", formatReason(result.reason)),
|
||||
matched: false,
|
||||
observation: null,
|
||||
targetId: target.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private refreshCache(): void {
|
||||
this.targetIds.clear();
|
||||
for (const target of this.store.getTargets()) {
|
||||
@@ -165,6 +124,62 @@ export class ProbeEngine {
|
||||
}
|
||||
}
|
||||
|
||||
private async runLoop(target: ResolvedTargetBase, signal: AbortSignal): Promise<void> {
|
||||
while (!signal.aborted) {
|
||||
const start = performance.now();
|
||||
try {
|
||||
await this.runOnce(target, signal);
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
|
||||
const elapsed = performance.now() - start;
|
||||
if (elapsed > target.intervalMs) {
|
||||
this.logger.warn(
|
||||
{ elapsed, intervalMs: target.intervalMs, targetId: target.id },
|
||||
`拨测超时: ${target.id} 耗时 ${Math.round(elapsed)}ms > 间隔 ${target.intervalMs}ms`,
|
||||
);
|
||||
}
|
||||
const delay = Math.max(0, target.intervalMs - elapsed);
|
||||
try {
|
||||
await sleep(delay, signal);
|
||||
} catch {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async runOnce(target: ResolvedTargetBase, signal?: AbortSignal): Promise<CheckResult> {
|
||||
await this.semaphore.acquire();
|
||||
if (signal?.aborted) {
|
||||
this.semaphore.release();
|
||||
throw new DOMException("Aborted", "AbortError");
|
||||
}
|
||||
try {
|
||||
const result = await this.runCheck(target);
|
||||
this.writeResult(result);
|
||||
this.logStateChange(result);
|
||||
this.logCheckDebug(result);
|
||||
return result;
|
||||
} catch (error) {
|
||||
const reason = formatReason(error);
|
||||
this.logger.error({ reason, targetId: target.id, targetType: target.type }, `探针执行失败: ${reason}`);
|
||||
const errorResult: CheckResult = {
|
||||
detail: null,
|
||||
durationMs: null,
|
||||
failure: errorFailure("internal", "engine", reason),
|
||||
matched: false,
|
||||
observation: null,
|
||||
targetId: target.id,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
this.writeResult(errorResult);
|
||||
return errorResult;
|
||||
} finally {
|
||||
this.semaphore.release();
|
||||
}
|
||||
}
|
||||
|
||||
private writeResult(result: CheckResult): void {
|
||||
if (!this.targetIds.has(result.targetId)) return;
|
||||
|
||||
@@ -182,3 +197,24 @@ export class ProbeEngine {
|
||||
function formatReason(reason: unknown): string {
|
||||
return isError(reason) ? reason.message : String(reason);
|
||||
}
|
||||
|
||||
function sleep(ms: number, signal: AbortSignal): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
if (signal.aborted) {
|
||||
reject(new DOMException("Aborted", "AbortError"));
|
||||
return;
|
||||
}
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
signal.removeEventListener("abort", onAbort);
|
||||
resolve();
|
||||
}, ms);
|
||||
|
||||
function onAbort() {
|
||||
clearTimeout(timer);
|
||||
reject(new DOMException("Aborted", "AbortError"));
|
||||
}
|
||||
|
||||
signal.addEventListener("abort", onAbort, { once: true });
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user