1
0

fix: 强化 CPU/memory checker 错误处理、timeout 遵守和快照校验

- Memory checker: reader 与 ctx.signal race,abort 返回 memory/timeout,reject 保持 memory/snapshot
- CPU checker: 第二次快照异常返回 cpu/snapshot,计算前校验空数组/核心数不一致/非有限值/负 delta
- CPU 计算: 零 delta 安全处理,observation 不含 NaN/Infinity
- 文档: CPU 互补描述修正,Memory timeout 约束说明
- 测试: +18 覆盖 timeout、异常和边界输入
This commit is contained in:
2026-05-27 16:33:39 +08:00
parent 145bb8fd04
commit 3390eb5e8d
8 changed files with 394 additions and 24 deletions

View File

@@ -14,9 +14,9 @@
## expect 校验项 ## expect 校验项
| 字段 | 说明 | 必填 | 默认值 | | 字段 | 说明 | 必填 | 默认值 |
| --------------------- | ----------------------------------------------------------------------------- | ---- | ------ | | --------------------- | ----------------------------------------------------------------------------------------------- | ---- | ------ |
| `usagePercent` | 总体 CPU 使用率,范围 `0-100`,使用 `ValueMatcher` | 否 | 无 | | `usagePercent` | 总体 CPU 使用率,范围 `0-100`,使用 `ValueMatcher` | 否 | 无 |
| `idlePercent` | 总体 CPU 空闲率,与 `usagePercent` 互补(`idlePercent = 100 - usagePercent` | 否 | 无 | | `idlePercent` | 总体 CPU 空闲率,与 `usagePercent` 互补,两者之和恒为 100`idlePercent + usagePercent = 100` | 否 | 无 |
| `maxCoreUsagePercent` | 单核心最高使用率,使用 `ValueMatcher` | 否 | 无 | | `maxCoreUsagePercent` | 单核心最高使用率,使用 `ValueMatcher` | 否 | 无 |
| `minCoreUsagePercent` | 单核心最低使用率,使用 `ValueMatcher` | 否 | 无 | | `minCoreUsagePercent` | 单核心最低使用率,使用 `ValueMatcher` | 否 | 无 |
| `durationMs` | 完整执行耗时校验,使用 `ValueMatcher` | 否 | 无 | | `durationMs` | 完整执行耗时校验,使用 `ValueMatcher` | 否 | 无 |

View File

@@ -98,7 +98,7 @@ Memory checker 通过 `systeminformation` 库读取系统内存数据,在 Linu
- **Swap 字段**:当系统未配置交换分区时,`swapTotalBytes``0``swapUsagePercent``null`(非 `0`)。 - **Swap 字段**:当系统未配置交换分区时,`swapTotalBytes``0``swapUsagePercent``null`(非 `0`)。
- **`buffcacheBytes`**:反映 Linux 的 buffers + cache 用量,在其他平台上可能为 `null` - **`buffcacheBytes`**:反映 Linux 的 buffers + cache 用量,在其他平台上可能为 `null`
Memory checker 是即时读取(非采样),无需 `sampleDuration`,执行速度远快于 CPU checker。 Memory checker 是即时读取(非采样),无需 `sampleDuration`,执行速度远快于 CPU checker。虽然读取本身很快,但仍受 target `timeout` 约束——若底层系统调用悬挂或阻塞超过 `timeout`checker 会返回 `memory/timeout` failure。
## 跨平台注意事项 ## 跨平台注意事项

View File

@@ -67,6 +67,45 @@ export function readCpuSnapshot(): CpuCoreSnapshot[] {
})); }));
} }
export function validateCpuSnapshots(before: CpuCoreSnapshot[], after: CpuCoreSnapshot[]): null | string {
if (before.length === 0 || after.length === 0) {
return "CPU 快照为空";
}
if (before.length !== after.length) {
return `CPU 快照核心数不一致: before=${before.length}, after=${after.length}`;
}
for (let i = 0; i < before.length; i++) {
const bTimes = before[i]!.times;
const aTimes = after[i]!.times;
for (const [name, value] of Object.entries(bTimes)) {
if (!Number.isFinite(value)) {
return `CPU 快照包含非有限值: before[${i}].times.${name}=${value}`;
}
}
for (const [name, value] of Object.entries(aTimes)) {
if (!Number.isFinite(value)) {
return `CPU 快照包含非有限值: after[${i}].times.${name}=${value}`;
}
}
const idleDelta = aTimes.idle - bTimes.idle;
const userDelta = aTimes.user - bTimes.user;
const niceDelta = aTimes.nice - bTimes.nice;
const sysDelta = aTimes.sys - bTimes.sys;
const irqDelta = aTimes.irq - bTimes.irq;
const coreTotalDelta = userDelta + niceDelta + sysDelta + idleDelta + irqDelta;
if (coreTotalDelta < 0) {
return `CPU 快照包含负数 delta: core[${i}] totalDelta=${coreTotalDelta}`;
}
}
return null;
}
function round1(value: number): number { function round1(value: number): number {
return Math.round(value * 10) / 10; return Math.round(value * 10) / 10;
} }

View File

@@ -5,7 +5,7 @@ import type { CpuCoreSnapshot, CpuStats, CpuTargetConfig, ResolvedCpuExpectConfi
import { errorFailure } from "../../expect/failure"; import { errorFailure } from "../../expect/failure";
import { checkValueExpectation } from "../../expect/value"; import { checkValueExpectation } from "../../expect/value";
import { parseDuration } from "../../utils"; import { parseDuration } from "../../utils";
import { calculateCpuStats, readCpuSnapshot } from "./calculate"; import { calculateCpuStats, readCpuSnapshot, validateCpuSnapshots } from "./calculate";
import { checkIdlePercent, checkMaxCoreUsage, checkMinCoreUsage, checkUsagePercent } from "./expect"; import { checkIdlePercent, checkMaxCoreUsage, checkMinCoreUsage, checkUsagePercent } from "./expect";
import { normalizeTargetExpect } from "./normalize"; import { normalizeTargetExpect } from "./normalize";
import { cpuCheckerSchemas } from "./schema"; import { cpuCheckerSchemas } from "./schema";
@@ -65,10 +65,9 @@ export class CpuChecker implements CheckerDefinition<ResolvedCpuTarget> {
// 采样等待,支持 AbortSignal 取消 // 采样等待,支持 AbortSignal 取消
const aborted = await waitForDuration(t.cpu.sampleDurationMs, ctx.signal); const aborted = await waitForDuration(t.cpu.sampleDurationMs, ctx.signal);
const after = aborted ? null : this.readSnapshot(); let after: CpuCoreSnapshot[];
if (aborted) {
const durationMs = Math.round(performance.now() - start); const durationMs = Math.round(performance.now() - start);
if (aborted || after === null) {
return { return {
detail: null, detail: null,
durationMs, durationMs,
@@ -80,7 +79,41 @@ export class CpuChecker implements CheckerDefinition<ResolvedCpuTarget> {
}; };
} }
try {
after = this.readSnapshot();
} catch (error) {
const durationMs = Math.round(performance.now() - start);
return {
detail: null,
durationMs,
failure: errorFailure(
"cpu",
"snapshot",
`CPU 快照读取失败: ${error instanceof Error ? error.message : String(error)}`,
),
matched: false,
observation: null,
targetId: t.id,
timestamp,
};
}
const validationError = validateCpuSnapshots(before, after);
if (validationError !== null) {
const durationMs = Math.round(performance.now() - start);
return {
detail: null,
durationMs,
failure: errorFailure("cpu", "snapshot", validationError),
matched: false,
observation: null,
targetId: t.id,
timestamp,
};
}
const stats = calculateCpuStats(before, after); const stats = calculateCpuStats(before, after);
const durationMs = Math.round(performance.now() - start);
const result = checkStats(stats, t.expect, durationMs); const result = checkStats(stats, t.expect, durationMs);
const observation: Record<string, unknown> = { const observation: Record<string, unknown> = {

View File

@@ -45,19 +45,36 @@ export class MemoryChecker implements CheckerDefinition<ResolvedMemoryTarget> {
return `usage ${usageStr}%, total ${totalStr}`; return `usage ${usageStr}%, total ${totalStr}`;
} }
async execute(t: ResolvedMemoryTarget, _ctx: CheckerContext): Promise<CheckResult> { async execute(t: ResolvedMemoryTarget, ctx: CheckerContext): Promise<CheckResult> {
const timestamp = new Date().toISOString(); const timestamp = new Date().toISOString();
const start = performance.now(); const start = performance.now();
let data: Systeminformation.MemData; if (ctx.signal.aborted) {
try {
data = await this.reader();
} catch (error) {
const durationMs = Math.round(performance.now() - start); const durationMs = Math.round(performance.now() - start);
return { return {
detail: null, detail: null,
durationMs, durationMs,
failure: errorFailure( failure: errorFailure("memory", "timeout", "内存读取超时signal 已取消"),
matched: false,
observation: null,
targetId: t.id,
timestamp,
};
}
let data: Systeminformation.MemData;
try {
data = await raceWithSignal(this.reader(), ctx.signal);
} catch (error) {
const durationMs = Math.round(performance.now() - start);
const isTimeout =
error instanceof AbortError || (error instanceof Error && error.message === MEMORY_TIMEOUT_MESSAGE);
return {
detail: null,
durationMs,
failure: isTimeout
? errorFailure("memory", "timeout", "内存读取超时")
: errorFailure(
"memory", "memory",
"snapshot", "snapshot",
`内存数据读取失败: ${error instanceof Error ? error.message : String(error)}`, `内存数据读取失败: ${error instanceof Error ? error.message : String(error)}`,
@@ -181,3 +198,35 @@ function formatBytes(bytes: number): string {
function formatNumber(value: number): string { function formatNumber(value: number): string {
return Number.isInteger(value) ? String(value) : String(Number(value.toFixed(1))); return Number.isInteger(value) ? String(value) : String(Number(value.toFixed(1)));
} }
const MEMORY_TIMEOUT_MESSAGE = "Memory read aborted by signal";
class AbortError extends Error {
constructor() {
super(MEMORY_TIMEOUT_MESSAGE);
this.name = "AbortError";
}
}
function raceWithSignal<T>(promise: Promise<T>, signal: AbortSignal): Promise<T> {
if (signal.aborted) return Promise.reject(new AbortError());
return new Promise<T>((resolve, reject) => {
function onAbort() {
reject(new AbortError());
}
signal.addEventListener("abort", onAbort, { once: true });
promise.then(
(value) => {
signal.removeEventListener("abort", onAbort);
resolve(value);
},
(error: unknown) => {
signal.removeEventListener("abort", onAbort);
reject(error instanceof Error ? error : new Error(String(error)));
},
);
});
}

View File

@@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test";
import type { CpuCoreSnapshot } from "../../../../../src/server/checker/runner/cpu/types"; import type { CpuCoreSnapshot } from "../../../../../src/server/checker/runner/cpu/types";
import { calculateCpuStats } from "../../../../../src/server/checker/runner/cpu/calculate"; import { calculateCpuStats, validateCpuSnapshots } from "../../../../../src/server/checker/runner/cpu/calculate";
function makeCore(user: number, nice: number, sys: number, idle: number, irq: number): CpuCoreSnapshot { function makeCore(user: number, nice: number, sys: number, idle: number, irq: number): CpuCoreSnapshot {
return { times: { idle, irq, nice, sys, user } }; return { times: { idle, irq, nice, sys, user } };
@@ -110,3 +110,68 @@ describe("calculateCpuStats", () => {
expect(stats.usagePercent).toBe(60); expect(stats.usagePercent).toBe(60);
}); });
}); });
describe("validateCpuSnapshots", () => {
test("合法 snapshot 返回 null", () => {
const before = [makeCore(100, 0, 0, 900, 0)];
const after = [makeCore(200, 0, 0, 800, 0)];
expect(validateCpuSnapshots(before, after)).toBeNull();
});
test("空 before snapshot", () => {
const after = [makeCore(0, 0, 0, 0, 0)];
expect(validateCpuSnapshots([], after)).toBe("CPU 快照为空");
});
test("空 after snapshot", () => {
const before = [makeCore(0, 0, 0, 0, 0)];
expect(validateCpuSnapshots(before, [])).toBe("CPU 快照为空");
});
test("核心数不一致", () => {
const before = [makeCore(0, 0, 0, 0, 0)];
const after = [makeCore(0, 0, 0, 0, 0), makeCore(0, 0, 0, 0, 0)];
expect(validateCpuSnapshots(before, after)).toBe("CPU 快照核心数不一致: before=1, after=2");
});
test("before 包含 NaN time 值", () => {
const before = [{ times: { idle: NaN, irq: 0, nice: 0, sys: 0, user: 0 } }];
const after = [makeCore(0, 0, 0, 0, 0)];
const error = validateCpuSnapshots(before, after);
expect(error).toContain("非有限值");
expect(error).toContain("before[0]");
});
test("after 包含 Infinity time 值", () => {
const before = [makeCore(0, 0, 0, 0, 0)];
const after = [{ times: { idle: Infinity, irq: 0, nice: 0, sys: 0, user: 0 } }];
const error = validateCpuSnapshots(before, after);
expect(error).toContain("非有限值");
expect(error).toContain("after[0]");
});
test("负数 total delta", () => {
const before = [makeCore(1000, 0, 0, 0, 0)];
const after = [makeCore(100, 0, 0, 0, 0)];
const error = validateCpuSnapshots(before, after);
expect(error).toContain("负数 delta");
});
test("零 delta 合法", () => {
const before = [makeCore(100, 0, 0, 100, 0)];
const after = [makeCore(100, 0, 0, 100, 0)];
expect(validateCpuSnapshots(before, after)).toBeNull();
});
test("零 delta 不产生除零错误", () => {
const before = [makeCore(100, 0, 0, 100, 0)];
const after = [makeCore(100, 0, 0, 100, 0)];
const stats = calculateCpuStats(before, after);
expect(Number.isFinite(stats.usagePercent)).toBe(true);
expect(Number.isFinite(stats.idlePercent)).toBe(true);
expect(Number.isFinite(stats.maxCoreUsagePercent)).toBe(true);
expect(Number.isFinite(stats.minCoreUsagePercent)).toBe(true);
expect(stats.usagePercent).toBe(0);
expect(stats.idlePercent).toBe(0);
});
});

View File

@@ -210,6 +210,130 @@ describe("CpuChecker execute", () => {
expect(result.failure?.path).toBe("timeout"); expect(result.failure?.path).toBe("timeout");
}); });
test("第二次 snapshot 抛错返回 cpu/snapshot failure", async () => {
const before = [makeCore(0, 0, 0, 10000, 0)];
let callCount = 0;
const reader: SnapshotReader = () => {
callCount++;
if (callCount === 1) return before;
throw new Error("second snapshot failed");
};
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("cpu");
expect(result.failure?.path).toBe("snapshot");
expect(result.observation).toBeNull();
});
test("空 snapshot pair 返回 cpu/snapshot failure", async () => {
const reader: SnapshotReader = () => [];
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("cpu");
expect(result.failure?.path).toBe("snapshot");
});
test("核心数不一致返回 cpu/snapshot failure", async () => {
let callCount = 0;
const snapshots = [[makeCore(0, 0, 0, 100, 0)], [makeCore(0, 0, 0, 100, 0), makeCore(0, 0, 0, 100, 0)]];
const reader: SnapshotReader = () => {
const result = snapshots[Math.min(callCount, snapshots.length - 1)]!;
callCount++;
return result;
};
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("cpu");
expect(result.failure?.path).toBe("snapshot");
expect(result.failure?.message).toContain("核心数不一致");
});
test("非有限 CPU time 值返回 cpu/snapshot failure", async () => {
let callCount = 0;
const snapshots: CpuCoreSnapshot[][] = [
[makeCore(0, 0, 0, 100, 0)],
[{ times: { idle: NaN, irq: 0, nice: 0, sys: 0, user: 100 } }],
];
const reader: SnapshotReader = () => {
const result = snapshots[Math.min(callCount, snapshots.length - 1)]!;
callCount++;
return result;
};
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("cpu");
expect(result.failure?.path).toBe("snapshot");
expect(result.failure?.message).toContain("非有限值");
});
test("负数 CPU time delta 返回 cpu/snapshot failure", async () => {
const before = [makeCore(1000, 0, 0, 0, 0)];
const after = [makeCore(100, 0, 0, 0, 0)];
const reader = makeSnapshotReader(before, after);
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("cpu");
expect(result.failure?.path).toBe("snapshot");
expect(result.failure?.message).toContain("负数 delta");
});
test("零 delta snapshot 返回稳定安全值", async () => {
const before = [makeCore(100, 0, 0, 100, 0)];
const after = [makeCore(100, 0, 0, 100, 0)];
const reader = makeSnapshotReader(before, after);
const checker = new CpuChecker(reader);
const target: RawTargetConfig = { cpu: {}, id: "cpu-test", type: "cpu" };
const resolved = checker.resolve(target, makeResolveContext({ defaultTimeoutMs: 5000 }));
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(true);
expect(result.failure).toBeNull();
expect(result.observation).toMatchObject({
idlePercent: 0,
maxCoreUsagePercent: 0,
minCoreUsagePercent: 0,
usagePercent: 0,
});
});
test("includePerCore=true 时输出 perCoreUsagePercent", async () => { test("includePerCore=true 时输出 perCoreUsagePercent", async () => {
const before = [makeCore(0, 0, 0, 0, 0), makeCore(0, 0, 0, 0, 0)]; const before = [makeCore(0, 0, 0, 0, 0), makeCore(0, 0, 0, 0, 0)];
const after = [makeCore(8000, 0, 0, 2000, 0), makeCore(2000, 0, 0, 8000, 0)]; const after = [makeCore(8000, 0, 0, 2000, 0), makeCore(2000, 0, 0, 8000, 0)];

View File

@@ -153,6 +153,66 @@ describe("MemoryChecker execute", () => {
expect(result.observation).toBeNull(); expect(result.observation).toBeNull();
}); });
test("signal 已 abort 时返回 timeout failure", async () => {
const reader = () => Promise.resolve(makeMemData());
const checker = new MemoryChecker(reader);
const target: RawTargetConfig = { id: "mem-test", memory: {}, type: "memory" };
const resolved = checker.resolve(target, makeResolveContext());
const controller = new AbortController();
controller.abort();
const result = await checker.execute(resolved, { signal: controller.signal });
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("memory");
expect(result.failure?.path).toBe("timeout");
expect(result.observation).toBeNull();
});
test("pending reader 被 signal abort 后返回 timeout failure", async () => {
const reader = () =>
new Promise<Systeminformation.MemData>(() => {
// 故意永不 resolve模拟悬挂的 reader
});
const checker = new MemoryChecker(reader);
const target: RawTargetConfig = { id: "mem-test", memory: {}, type: "memory" };
const resolved = checker.resolve(target, makeResolveContext());
const controller = new AbortController();
const executePromise = checker.execute(resolved, { signal: controller.signal });
controller.abort();
const result = await executePromise;
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("memory");
expect(result.failure?.path).toBe("timeout");
expect(result.observation).toBeNull();
});
test("reader 在 abort 前 resolve 时返回正常结果", async () => {
const data = makeMemData({ active: 4294967296, total: 8589934592 });
const reader = () => Promise.resolve(data);
const checker = new MemoryChecker(reader);
const target: RawTargetConfig = { id: "mem-test", memory: {}, type: "memory" };
const resolved = checker.resolve(target, makeResolveContext());
resolved.expect = { usagePercent: { lte: 85 } };
const ctx = { signal: new AbortController().signal };
const result = await checker.execute(resolved, ctx);
expect(result.matched).toBe(true);
expect(result.failure).toBeNull();
expect(result.observation).toMatchObject({
totalBytes: 8589934592,
usagePercent: 50,
});
});
test("detail 格式", async () => { test("detail 格式", async () => {
const data = makeMemData({ active: 4294967296, total: 8589934592 }); const data = makeMemData({ active: 4294967296, total: 8589934592 });
const reader = () => Promise.resolve(data); const reader = () => Promise.resolve(data);