refactor: expect 类型模型重构,Raw/Resolved 双层分离与断言基础设施内聚
- 重命名 ContentRules→ContentExpectations, KeyValueExpect→KeyedExpectations - 新增 Raw/Resolved 双层模型:resolve 阶段物化为执行计划,store 持久化 Raw 快照 - HTTP body 按需读取:status/headers 失败或无 body expectation 时不读取 body - 新增 displayValueExpectation() 解包 failure.expected 用户可读展示 - 修复 checkEarlyTimeout 独立 lte/lt 检查,修复 KeyedExpectations JSON Schema - 新增 expect/value.ts(resolve/check/display)、keyed.ts、content.ts、headers.ts、status.ts - 删除旧 normalize.ts/matcher.ts/validate-matcher.ts/key-value.ts - 更新 DEVELOPMENT.md:expect 五层管线表、displayValueExpectation、1.7↔1.10 交叉引用 - 同步 13 个 main specs,归档 refactor-expect-type-model 变更(62/62 tasks)
This commit is contained in:
@@ -1,12 +1,32 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import type { LlmCheckObservation } from "../../../../../src/server/checker/runner/llm/types";
|
||||
import type {
|
||||
RawContentExpectations,
|
||||
RawKeyedExpectations,
|
||||
RawValueExpectation,
|
||||
} from "../../../../../src/server/checker/expect/types";
|
||||
import type { LlmCheckObservation, ResolvedLlmExpectConfig } from "../../../../../src/server/checker/runner/llm/types";
|
||||
|
||||
import { checkContentRules } from "../../../../../src/server/checker/expect/content";
|
||||
import { checkContentExpectations, resolveContentExpectations } from "../../../../../src/server/checker/expect/content";
|
||||
import { resolveKeyedExpectations } from "../../../../../src/server/checker/expect/keyed";
|
||||
import { resolveValueExpectation } from "../../../../../src/server/checker/expect/value";
|
||||
import { runExpects } from "../../../../../src/server/checker/runner/llm/expect";
|
||||
|
||||
function checkOutputRules(outputText: null | string, rules: Parameters<typeof checkContentRules>[1]) {
|
||||
return checkContentRules(outputText, rules, { path: "output", phase: "output" });
|
||||
interface RawLlmExpectInput {
|
||||
finishReason?: RawValueExpectation;
|
||||
headers?: RawKeyedExpectations;
|
||||
output?: RawContentExpectations;
|
||||
rawFinishReason?: RawValueExpectation;
|
||||
status?: Array<number | string>;
|
||||
stream?: { completed?: boolean; firstTokenMs?: RawValueExpectation };
|
||||
usage?: { inputTokens?: RawValueExpectation; outputTokens?: RawValueExpectation; totalTokens?: RawValueExpectation };
|
||||
}
|
||||
|
||||
function checkOutputRules(outputText: null | string, rawRules: RawContentExpectations | undefined) {
|
||||
return checkContentExpectations(outputText, resolveContentExpectations(rawRules), {
|
||||
path: "output",
|
||||
phase: "output",
|
||||
});
|
||||
}
|
||||
|
||||
function makeObservation(overrides?: Partial<LlmCheckObservation>): LlmCheckObservation {
|
||||
@@ -25,7 +45,31 @@ function makeObservation(overrides?: Partial<LlmCheckObservation>): LlmCheckObse
|
||||
};
|
||||
}
|
||||
|
||||
describe("LLM output rules", () => {
|
||||
function resolveLlmExpect(raw: RawLlmExpectInput | undefined): ResolvedLlmExpectConfig | undefined {
|
||||
if (raw === undefined) return undefined;
|
||||
return {
|
||||
finishReason: resolveValueExpectation(raw.finishReason),
|
||||
headers: resolveKeyedExpectations(raw.headers),
|
||||
output: resolveContentExpectations(raw.output),
|
||||
rawFinishReason: resolveValueExpectation(raw.rawFinishReason),
|
||||
status: raw.status ?? [200],
|
||||
stream: raw.stream
|
||||
? {
|
||||
completed: raw.stream.completed ?? true,
|
||||
firstTokenMs: resolveValueExpectation(raw.stream.firstTokenMs),
|
||||
}
|
||||
: undefined,
|
||||
usage: raw.usage
|
||||
? {
|
||||
inputTokens: resolveValueExpectation(raw.usage.inputTokens),
|
||||
outputTokens: resolveValueExpectation(raw.usage.outputTokens),
|
||||
totalTokens: resolveValueExpectation(raw.usage.totalTokens),
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
describe("LLM output expectations", () => {
|
||||
test("equals 严格匹配", () => {
|
||||
expect(checkOutputRules("OK", [{ equals: "OK" }]).matched).toBe(true);
|
||||
expect(checkOutputRules("OK\n", [{ equals: "OK" }]).matched).toBe(false);
|
||||
@@ -66,7 +110,7 @@ describe("LLM output rules", () => {
|
||||
expect(result.failure?.phase).toBe("output");
|
||||
});
|
||||
|
||||
test("undefined rules 返回通过", () => {
|
||||
test("undefined expectations 返回通过", () => {
|
||||
expect(checkOutputRules("anything", undefined).matched).toBe(true);
|
||||
expect(checkOutputRules(null, undefined).matched).toBe(true);
|
||||
});
|
||||
@@ -75,11 +119,14 @@ describe("LLM output rules", () => {
|
||||
describe("LLM runExpects", () => {
|
||||
test("全部 expect 通过", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, {
|
||||
finishReason: { equals: "stop" },
|
||||
output: [{ contains: "OK" }],
|
||||
status: [200],
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
finishReason: { equals: "stop" },
|
||||
output: [{ contains: "OK" }],
|
||||
status: [200],
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
expect(result.failure).toBeNull();
|
||||
});
|
||||
@@ -92,35 +139,35 @@ describe("LLM runExpects", () => {
|
||||
|
||||
test("status 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { status: [404] });
|
||||
const result = runExpects(observation, resolveLlmExpect({ status: [404] }));
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
|
||||
test("finishReason 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { finishReason: { equals: "length" } });
|
||||
const result = runExpects(observation, resolveLlmExpect({ finishReason: { equals: "length" } }));
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("finishReason");
|
||||
});
|
||||
|
||||
test("rawFinishReason 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { rawFinishReason: { equals: "end_turn" } });
|
||||
const result = runExpects(observation, resolveLlmExpect({ rawFinishReason: { equals: "end_turn" } }));
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("rawFinishReason");
|
||||
});
|
||||
|
||||
test("usage 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { usage: { totalTokens: { gte: 100 } } });
|
||||
const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { gte: 100 } } }));
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("usage");
|
||||
});
|
||||
|
||||
test("usage 匹配通过", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { usage: { totalTokens: { lte: 20 } } });
|
||||
const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { lte: 20 } } }));
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
@@ -129,9 +176,12 @@ describe("LLM runExpects", () => {
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: 500 },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { completed: true },
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
stream: { completed: true },
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
@@ -140,9 +190,12 @@ describe("LLM runExpects", () => {
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: 500 },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
@@ -151,9 +204,12 @@ describe("LLM runExpects", () => {
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: null },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("stream");
|
||||
});
|
||||
@@ -162,9 +218,12 @@ describe("LLM runExpects", () => {
|
||||
const observation = makeObservation({
|
||||
http: { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
@@ -172,19 +231,25 @@ describe("LLM runExpects", () => {
|
||||
const observation = makeObservation({
|
||||
http: { headers: { "content-type": "text/plain" }, status: 200, statusText: "OK" },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
headers: { "content-type": "application/json" },
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("headers");
|
||||
});
|
||||
|
||||
test("首个 expect 失败立即返回", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, {
|
||||
output: [{ contains: "OK" }],
|
||||
status: [404],
|
||||
});
|
||||
const result = runExpects(
|
||||
observation,
|
||||
resolveLlmExpect({
|
||||
output: [{ contains: "OK" }],
|
||||
status: [404],
|
||||
}),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
@@ -196,7 +261,7 @@ describe("LLM runExpects", () => {
|
||||
outputText: null,
|
||||
usage: null,
|
||||
});
|
||||
const result = runExpects(observation, { status: [401] });
|
||||
const result = runExpects(observation, resolveLlmExpect({ status: [401] }));
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user