import { describe, expect, test } from "bun:test"; import type { RawContentExpectations, RawKeyedExpectations, RawValueExpectation, } from "../../../../../src/server/checker/expect/types"; import type { LlmCheckObservation, ResolvedLlmExpectConfig } from "../../../../../src/server/checker/runner/llm/types"; import { checkContentExpectations, resolveContentExpectations } from "../../../../../src/server/checker/expect/content"; import { resolveKeyedExpectations } from "../../../../../src/server/checker/expect/keyed"; import { resolveValueExpectation } from "../../../../../src/server/checker/expect/value"; import { runExpects } from "../../../../../src/server/checker/runner/llm/expect"; interface RawLlmExpectInput { finishReason?: RawValueExpectation; headers?: RawKeyedExpectations; output?: RawContentExpectations; rawFinishReason?: RawValueExpectation; status?: Array; stream?: { completed?: boolean; firstTokenMs?: RawValueExpectation }; usage?: { inputTokens?: RawValueExpectation; outputTokens?: RawValueExpectation; totalTokens?: RawValueExpectation }; } function checkOutputRules(outputText: null | string, rawRules: RawContentExpectations | undefined) { return checkContentExpectations(outputText, resolveContentExpectations(rawRules), { path: "output", phase: "output", }); } function makeObservation(overrides?: Partial): LlmCheckObservation { return { finishReason: "stop", http: { headers: {}, status: 200, statusText: "OK" }, mode: "http", model: "gpt-4o-mini", outputText: "OK", provider: "openai", rawFinishReason: "stop", stream: null, usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 }, warnings: [], ...overrides, }; } function resolveLlmExpect(raw: RawLlmExpectInput | undefined): ResolvedLlmExpectConfig | undefined { if (raw === undefined) return undefined; return { finishReason: resolveValueExpectation(raw.finishReason), headers: resolveKeyedExpectations(raw.headers), output: resolveContentExpectations(raw.output), rawFinishReason: resolveValueExpectation(raw.rawFinishReason), status: raw.status ?? [200], stream: raw.stream ? { completed: raw.stream.completed ?? true, firstTokenMs: resolveValueExpectation(raw.stream.firstTokenMs), } : undefined, usage: raw.usage ? { inputTokens: resolveValueExpectation(raw.usage.inputTokens), outputTokens: resolveValueExpectation(raw.usage.outputTokens), totalTokens: resolveValueExpectation(raw.usage.totalTokens), } : undefined, }; } describe("LLM output expectations", () => { test("equals 严格匹配", () => { expect(checkOutputRules("OK", [{ equals: "OK" }]).matched).toBe(true); expect(checkOutputRules("OK\n", [{ equals: "OK" }]).matched).toBe(false); expect(checkOutputRules("OK ", [{ equals: "OK" }]).matched).toBe(false); }); test("equals null 输出失败", () => { expect(checkOutputRules(null, [{ equals: "OK" }]).matched).toBe(false); }); test("contains 匹配", () => { expect(checkOutputRules("Hello World", [{ contains: "World" }]).matched).toBe(true); expect(checkOutputRules("Hello", [{ contains: "World" }]).matched).toBe(false); expect(checkOutputRules(null, [{ contains: "World" }]).matched).toBe(false); }); test("regex 匹配", () => { expect(checkOutputRules("status: ok", [{ regex: "^status:" }]).matched).toBe(true); expect(checkOutputRules("status: ok", [{ regex: "^error:" }]).matched).toBe(false); expect(checkOutputRules(null, [{ regex: "^status:" }]).matched).toBe(false); }); test("json 匹配", () => { expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { equals: "ok", path: "$.status" } }]).matched).toBe( true, ); expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { gte: 200, path: "$.code" } }]).matched).toBe(true); expect(checkOutputRules('{"status":"ok"}', [{ json: { exists: true, path: "$.code" } }]).matched).toBe(false); }); test("json 非法 JSON 失败", () => { expect(checkOutputRules("not json", [{ json: { exists: true, path: "$.x" } }]).matched).toBe(false); }); test("多规则按顺序快速失败", () => { const result = checkOutputRules("Hello World", [{ equals: "wrong" }, { contains: "World" }]); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("output"); }); test("undefined expectations 返回通过", () => { expect(checkOutputRules("anything", undefined).matched).toBe(true); expect(checkOutputRules(null, undefined).matched).toBe(true); }); }); describe("LLM runExpects", () => { test("全部 expect 通过", () => { const observation = makeObservation(); const result = runExpects( observation, resolveLlmExpect({ finishReason: { equals: "stop" }, output: [{ contains: "OK" }], status: [200], }), ); expect(result.matched).toBe(true); expect(result.failure).toBeNull(); }); test("默认 status=200 通过", () => { const observation = makeObservation(); const result = runExpects(observation, undefined); expect(result.matched).toBe(true); }); test("status 不匹配失败", () => { const observation = makeObservation(); const result = runExpects(observation, resolveLlmExpect({ status: [404] })); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("status"); }); test("finishReason 不匹配失败", () => { const observation = makeObservation(); const result = runExpects(observation, resolveLlmExpect({ finishReason: { equals: "length" } })); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("finishReason"); }); test("rawFinishReason 不匹配失败", () => { const observation = makeObservation(); const result = runExpects(observation, resolveLlmExpect({ rawFinishReason: { equals: "end_turn" } })); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("rawFinishReason"); }); test("usage 不匹配失败", () => { const observation = makeObservation(); const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { gte: 100 } } })); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("usage"); }); test("usage 匹配通过", () => { const observation = makeObservation(); const result = runExpects(observation, resolveLlmExpect({ usage: { totalTokens: { lte: 20 } } })); expect(result.matched).toBe(true); }); test("stream completed 匹配", () => { const observation = makeObservation({ mode: "stream", stream: { completed: true, firstTokenMs: 500 }, }); const result = runExpects( observation, resolveLlmExpect({ stream: { completed: true }, }), ); expect(result.matched).toBe(true); }); test("stream firstTokenMs 匹配", () => { const observation = makeObservation({ mode: "stream", stream: { completed: true, firstTokenMs: 500 }, }); const result = runExpects( observation, resolveLlmExpect({ stream: { firstTokenMs: { lte: 1000 } }, }), ); expect(result.matched).toBe(true); }); test("stream firstTokenMs 缺失失败", () => { const observation = makeObservation({ mode: "stream", stream: { completed: true, firstTokenMs: null }, }); const result = runExpects( observation, resolveLlmExpect({ stream: { firstTokenMs: { lte: 1000 } }, }), ); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("stream"); }); test("headers 匹配通过", () => { const observation = makeObservation({ http: { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" }, }); const result = runExpects( observation, resolveLlmExpect({ headers: { "content-type": "application/json" }, }), ); expect(result.matched).toBe(true); }); test("headers 不匹配失败", () => { const observation = makeObservation({ http: { headers: { "content-type": "text/plain" }, status: 200, statusText: "OK" }, }); const result = runExpects( observation, resolveLlmExpect({ headers: { "content-type": "application/json" }, }), ); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("headers"); }); test("首个 expect 失败立即返回", () => { const observation = makeObservation(); const result = runExpects( observation, resolveLlmExpect({ output: [{ contains: "OK" }], status: [404], }), ); expect(result.matched).toBe(false); expect(result.failure?.phase).toBe("status"); }); test("APICallError 状态码 expect 通过", () => { const observation = makeObservation({ finishReason: null, http: { headers: {}, status: 401, statusText: "Unauthorized" }, outputText: null, usage: null, }); const result = runExpects(observation, resolveLlmExpect({ status: [401] })); expect(result.matched).toBe(true); }); });