1
0
Files
DiAL/tests/server/checker/runner/llm/output-expect.test.ts
lanyuanxiaoyao 60a54b483f refactor: expect 类型模型重构,Raw/Resolved 双层分离与断言基础设施内聚
- 重命名 ContentRules→ContentExpectations, KeyValueExpect→KeyedExpectations
- 新增 Raw/Resolved 双层模型:resolve 阶段物化为执行计划,store 持久化 Raw 快照
- HTTP body 按需读取:status/headers 失败或无 body expectation 时不读取 body
- 新增 displayValueExpectation() 解包 failure.expected 用户可读展示
- 修复 checkEarlyTimeout 独立 lte/lt 检查,修复 KeyedExpectations JSON Schema
- 新增 expect/value.ts(resolve/check/display)、keyed.ts、content.ts、headers.ts、status.ts
- 删除旧 normalize.ts/matcher.ts/validate-matcher.ts/key-value.ts
- 更新 DEVELOPMENT.md:expect 五层管线表、displayValueExpectation、1.7↔1.10 交叉引用
- 同步 13 个 main specs,归档 refactor-expect-type-model 变更(62/62 tasks)
2026-05-20 16:12:48 +08:00

268 lines
9.1 KiB
TypeScript

import { describe, expect, test } from "bun:test";
import type {
RawContentExpectations,
RawKeyedExpectations,
RawValueExpectation,
} from "../../../../../src/server/checker/expect/types";
import type { LlmCheckObservation, ResolvedLlmExpectConfig } from "../../../../../src/server/checker/runner/llm/types";
import { checkContentExpectations, resolveContentExpectations } from "../../../../../src/server/checker/expect/content";
import { resolveKeyedExpectations } from "../../../../../src/server/checker/expect/keyed";
import { resolveValueExpectation } from "../../../../../src/server/checker/expect/value";
import { runExpects } from "../../../../../src/server/checker/runner/llm/expect";
interface RawLlmExpectInput {
finishReason?: RawValueExpectation;
headers?: RawKeyedExpectations;
output?: RawContentExpectations;
rawFinishReason?: RawValueExpectation;
status?: Array<number | string>;
stream?: { completed?: boolean; firstTokenMs?: RawValueExpectation };
usage?: { inputTokens?: RawValueExpectation; outputTokens?: RawValueExpectation; totalTokens?: RawValueExpectation };
}
function checkOutputRules(outputText: null | string, rawRules: RawContentExpectations | undefined) {
return checkContentExpectations(outputText, resolveContentExpectations(rawRules), {
path: "output",
phase: "output",
});
}
function makeObservation(overrides?: Partial<LlmCheckObservation>): LlmCheckObservation {
return {
finishReason: "stop",
http: { headers: {}, status: 200, statusText: "OK" },
mode: "http",
model: "gpt-4o-mini",
outputText: "OK",
provider: "openai",
rawFinishReason: "stop",
stream: null,
usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 },
warnings: [],
...overrides,
};
}
function resolveLlmExpect(raw: RawLlmExpectInput | undefined): ResolvedLlmExpectConfig | undefined {
if (raw === undefined) return undefined;
return {
finishReason: resolveValueExpectation(raw.finishReason),
headers: resolveKeyedExpectations(raw.headers),
output: resolveContentExpectations(raw.output),
rawFinishReason: resolveValueExpectation(raw.rawFinishReason),
status: raw.status ?? [200],
stream: raw.stream
? {
completed: raw.stream.completed ?? true,
firstTokenMs: resolveValueExpectation(raw.stream.firstTokenMs),
}
: undefined,
usage: raw.usage
? {
inputTokens: resolveValueExpectation(raw.usage.inputTokens),
outputTokens: resolveValueExpectation(raw.usage.outputTokens),
totalTokens: resolveValueExpectation(raw.usage.totalTokens),
}
: undefined,
};
}
describe("LLM output expectations", () => {
test("equals 严格匹配", () => {
expect(checkOutputRules("OK", [{ equals: "OK" }]).matched).toBe(true);
expect(checkOutputRules("OK\n", [{ equals: "OK" }]).matched).toBe(false);
expect(checkOutputRules("OK ", [{ equals: "OK" }]).matched).toBe(false);
});
test("equals null 输出失败", () => {
expect(checkOutputRules(null, [{ equals: "OK" }]).matched).toBe(false);
});
test("contains 匹配", () => {
expect(checkOutputRules("Hello World", [{ contains: "World" }]).matched).toBe(true);
expect(checkOutputRules("Hello", [{ contains: "World" }]).matched).toBe(false);
expect(checkOutputRules(null, [{ contains: "World" }]).matched).toBe(false);
});
test("regex 匹配", () => {
expect(checkOutputRules("status: ok", [{ regex: "^status:" }]).matched).toBe(true);
expect(checkOutputRules("status: ok", [{ regex: "^error:" }]).matched).toBe(false);
expect(checkOutputRules(null, [{ regex: "^status:" }]).matched).toBe(false);
});
test("json 匹配", () => {
expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { equals: "ok", path: "$.status" } }]).matched).toBe(
true,
);
expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { gte: 200, path: "$.code" } }]).matched).toBe(true);
expect(checkOutputRules('{"status":"ok"}', [{ json: { exists: true, path: "$.code" } }]).matched).toBe(false);
});
test("json 非法 JSON 失败", () => {
expect(checkOutputRules("not json", [{ json: { exists: true, path: "$.x" } }]).matched).toBe(false);
});
test("多规则按顺序快速失败", () => {
const result = checkOutputRules("Hello World", [{ equals: "wrong" }, { contains: "World" }]);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("output");
});
test("undefined expectations 返回通过", () => {
expect(checkOutputRules("anything", undefined).matched).toBe(true);
expect(checkOutputRules(null, undefined).matched).toBe(true);
});
});
describe("LLM runExpects", () => {
test("全部 expect 通过", () => {
const observation = makeObservation();
const result = runExpects(
observation,
resolveLlmExpect({
finishReason: { equals: "stop" },
output: [{ contains: "OK" }],
status: [200],
}),
);
expect(result.matched).toBe(true);
expect(result.failure).toBeNull();
});
test("默认 status=200 通过", () => {
const observation = makeObservation();
const result = runExpects(observation, undefined);
expect(result.matched).toBe(true);
});
test("status 不匹配失败", () => {
const observation = makeObservation();
const result = runExpects(observation, resolveLlmExpect({ status: [404] }));
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("status");
});
test("finishReason 不匹配失败", () => {
const observation = makeObservation();
const result = runExpects(observation, resolveLlmExpect({ finishReason: { equals: "length" } }));
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("finishReason");
});
test("rawFinishReason 不匹配失败", () => {
const observation = makeObservation();
const result = runExpects(observation, resolveLlmExpect({ rawFinishReason: { equals: "end_turn" } }));
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("rawFinishReason");
});
test("usage 不匹配失败", () => {
const observation = makeObservation();
const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { gte: 100 } } }));
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("usage");
});
test("usage 匹配通过", () => {
const observation = makeObservation();
const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { lte: 20 } } }));
expect(result.matched).toBe(true);
});
test("stream completed 匹配", () => {
const observation = makeObservation({
mode: "stream",
stream: { completed: true, firstTokenMs: 500 },
});
const result = runExpects(
observation,
resolveLlmExpect({
stream: { completed: true },
}),
);
expect(result.matched).toBe(true);
});
test("stream firstTokenMs 匹配", () => {
const observation = makeObservation({
mode: "stream",
stream: { completed: true, firstTokenMs: 500 },
});
const result = runExpects(
observation,
resolveLlmExpect({
stream: { firstTokenMs: { lte: 1000 } },
}),
);
expect(result.matched).toBe(true);
});
test("stream firstTokenMs 缺失失败", () => {
const observation = makeObservation({
mode: "stream",
stream: { completed: true, firstTokenMs: null },
});
const result = runExpects(
observation,
resolveLlmExpect({
stream: { firstTokenMs: { lte: 1000 } },
}),
);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("stream");
});
test("headers 匹配通过", () => {
const observation = makeObservation({
http: { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" },
});
const result = runExpects(
observation,
resolveLlmExpect({
headers: { "content-type": "application/json" },
}),
);
expect(result.matched).toBe(true);
});
test("headers 不匹配失败", () => {
const observation = makeObservation({
http: { headers: { "content-type": "text/plain" }, status: 200, statusText: "OK" },
});
const result = runExpects(
observation,
resolveLlmExpect({
headers: { "content-type": "application/json" },
}),
);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("headers");
});
test("首个 expect 失败立即返回", () => {
const observation = makeObservation();
const result = runExpects(
observation,
resolveLlmExpect({
output: [{ contains: "OK" }],
status: [404],
}),
);
expect(result.matched).toBe(false);
expect(result.failure?.phase).toBe("status");
});
test("APICallError 状态码 expect 通过", () => {
const observation = makeObservation({
finishReason: null,
http: { headers: {}, status: 401, statusText: "Unauthorized" },
outputText: null,
usage: null,
});
const result = runExpects(observation, resolveLlmExpect({ status: [401] }));
expect(result.matched).toBe(true);
});
});