- 重命名 ContentRules→ContentExpectations, KeyValueExpect→KeyedExpectations - 新增 Raw/Resolved 双层模型:resolve 阶段物化为执行计划,store 持久化 Raw 快照 - HTTP body 按需读取:status/headers 失败或无 body expectation 时不读取 body - 新增 displayValueExpectation() 解包 failure.expected 用户可读展示 - 修复 checkEarlyTimeout 独立 lte/lt 检查,修复 KeyedExpectations JSON Schema - 新增 expect/value.ts(resolve/check/display)、keyed.ts、content.ts、headers.ts、status.ts - 删除旧 normalize.ts/matcher.ts/validate-matcher.ts/key-value.ts - 更新 DEVELOPMENT.md:expect 五层管线表、displayValueExpectation、1.7↔1.10 交叉引用 - 同步 13 个 main specs,归档 refactor-expect-type-model 变更(62/62 tasks)
238 lines
8.4 KiB
TypeScript
238 lines
8.4 KiB
TypeScript
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
|
|
|
import type {
|
|
RawLlmExpectConfig,
|
|
ResolvedLlmExpectConfig,
|
|
ResolvedLlmTarget,
|
|
} from "../../../../../src/server/checker/runner/llm/types";
|
|
import type { CheckerContext } from "../../../../../src/server/checker/runner/types";
|
|
|
|
import { resolveContentExpectations } from "../../../../../src/server/checker/expect/content";
|
|
import { resolveKeyedExpectations } from "../../../../../src/server/checker/expect/keyed";
|
|
import { resolveValueExpectation } from "../../../../../src/server/checker/expect/value";
|
|
import { LlmChecker } from "../../../../../src/server/checker/runner/llm/execute";
|
|
|
|
const MOCK_PORT = 18456;
|
|
let server: ReturnType<typeof Bun.serve>;
|
|
|
|
function makeCtx(timeoutMs = 10000): CheckerContext {
|
|
const controller = new AbortController();
|
|
setTimeout(() => controller.abort(), timeoutMs);
|
|
return { signal: controller.signal };
|
|
}
|
|
|
|
function makeTarget(overrides?: Partial<ResolvedLlmTarget["llm"]>, rawExpect?: RawLlmExpectConfig): ResolvedLlmTarget {
|
|
const resolvedExpect: ResolvedLlmExpectConfig | undefined = rawExpect
|
|
? {
|
|
durationMs: resolveValueExpectation(rawExpect.durationMs),
|
|
finishReason: resolveValueExpectation(rawExpect.finishReason),
|
|
headers: resolveKeyedExpectations(rawExpect.headers),
|
|
output: resolveContentExpectations(rawExpect.output),
|
|
rawFinishReason: resolveValueExpectation(rawExpect.rawFinishReason),
|
|
status: rawExpect.status ?? [200],
|
|
stream: rawExpect.stream
|
|
? {
|
|
completed: rawExpect.stream.completed ?? true,
|
|
firstTokenMs: resolveValueExpectation(rawExpect.stream.firstTokenMs),
|
|
}
|
|
: undefined,
|
|
usage: rawExpect.usage
|
|
? {
|
|
inputTokens: resolveValueExpectation(rawExpect.usage.inputTokens),
|
|
outputTokens: resolveValueExpectation(rawExpect.usage.outputTokens),
|
|
totalTokens: resolveValueExpectation(rawExpect.usage.totalTokens),
|
|
}
|
|
: undefined,
|
|
}
|
|
: undefined;
|
|
|
|
return {
|
|
description: null,
|
|
expect: resolvedExpect,
|
|
group: "default",
|
|
id: "test-llm",
|
|
intervalMs: 30000,
|
|
llm: {
|
|
headers: {},
|
|
ignoreSSL: false,
|
|
key: "test-key",
|
|
mode: "http",
|
|
model: "gpt-4o-mini",
|
|
options: { maxOutputTokens: 16, temperature: 0 },
|
|
prompt: "Say OK",
|
|
provider: "openai",
|
|
providerOptions: {},
|
|
url: `http://127.0.0.1:${MOCK_PORT}/v1`,
|
|
...overrides,
|
|
},
|
|
name: null,
|
|
rawExpect,
|
|
timeoutMs: 10000,
|
|
type: "llm",
|
|
};
|
|
}
|
|
|
|
function openaiResponse(
|
|
content: string,
|
|
options?: { usage?: { completion_tokens: number; prompt_tokens: number; total_tokens: number } },
|
|
) {
|
|
return JSON.stringify({
|
|
choices: [{ finish_reason: "stop", index: 0, message: { content, role: "assistant" } }],
|
|
created: Date.now(),
|
|
id: "chatcmpl-test",
|
|
model: "gpt-4o-mini",
|
|
object: "chat.completion",
|
|
usage: options?.usage ?? { completion_tokens: 2, prompt_tokens: 12, total_tokens: 14 },
|
|
});
|
|
}
|
|
|
|
beforeAll(() => {
|
|
server = Bun.serve({
|
|
fetch(req) {
|
|
const url = new URL(req.url);
|
|
|
|
const authHeader = req.headers.get("Authorization");
|
|
|
|
if (url.pathname === "/v1/rate_limit/chat/completions") {
|
|
return new Response(JSON.stringify({ error: { message: "Rate limit exceeded", type: "rate_limit_error" } }), {
|
|
headers: { "Content-Type": "application/json" },
|
|
status: 429,
|
|
});
|
|
}
|
|
|
|
if (url.pathname === "/v1/server_error/chat/completions") {
|
|
return new Response(JSON.stringify({ error: { message: "Internal server error", type: "server_error" } }), {
|
|
headers: { "Content-Type": "application/json" },
|
|
status: 500,
|
|
});
|
|
}
|
|
|
|
if (url.pathname === "/v1/no_content/chat/completions") {
|
|
return new Response(
|
|
openaiResponse("", { usage: { completion_tokens: 0, prompt_tokens: 5, total_tokens: 5 } }),
|
|
{ headers: { "Content-Type": "application/json" }, status: 200 },
|
|
);
|
|
}
|
|
|
|
if (authHeader === "Bearer bad-key") {
|
|
return new Response(
|
|
JSON.stringify({
|
|
error: { message: "Invalid API key", param: null, type: "invalid_request_error" },
|
|
}),
|
|
{ headers: { "Content-Type": "application/json" }, status: 401 },
|
|
);
|
|
}
|
|
|
|
return new Response(openaiResponse("OK"), { headers: { "Content-Type": "application/json" }, status: 200 });
|
|
},
|
|
port: MOCK_PORT,
|
|
});
|
|
});
|
|
|
|
afterAll(() => {
|
|
void server.stop();
|
|
});
|
|
|
|
const checker = new LlmChecker();
|
|
|
|
describe("LlmChecker execute - 非流式", () => {
|
|
test("成功调用返回 matched=true", async () => {
|
|
const result = await checker.execute(makeTarget(), makeCtx());
|
|
expect(result.matched).toBe(true);
|
|
expect(result.failure).toBeNull();
|
|
expect(result.observation).toMatchObject({ provider: "openai" });
|
|
expect(result.observation).toMatchObject({ mode: "http" });
|
|
expect(result.observation).toMatchObject({ http: { status: 200 } });
|
|
expect(result.observation).toMatchObject({ finishReason: "stop" });
|
|
});
|
|
|
|
test("status expect 不匹配", async () => {
|
|
const result = await checker.execute(makeTarget(undefined, { status: [404] }), makeCtx());
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("status");
|
|
});
|
|
|
|
test("output equals 不匹配", async () => {
|
|
const result = await checker.execute(makeTarget(undefined, { output: [{ equals: "WRONG" }] }), makeCtx());
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("output");
|
|
});
|
|
|
|
test("output contains 通过", async () => {
|
|
const result = await checker.execute(makeTarget(undefined, { output: [{ contains: "O" }] }), makeCtx());
|
|
expect(result.matched).toBe(true);
|
|
});
|
|
|
|
test("finishReason expect 不匹配", async () => {
|
|
const result = await checker.execute(makeTarget(undefined, { finishReason: { equals: "length" } }), makeCtx());
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("finishReason");
|
|
});
|
|
|
|
test("401 错误可通过 status expect 捕获", async () => {
|
|
const result = await checker.execute(makeTarget({ key: "bad-key" }, { status: [401] }), makeCtx());
|
|
expect(result.matched).toBe(true);
|
|
});
|
|
|
|
test("429 错误可通过 status expect 捕获", async () => {
|
|
const result = await checker.execute(
|
|
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/rate_limit` }, { status: [429] }),
|
|
makeCtx(),
|
|
);
|
|
expect(result.matched).toBe(true);
|
|
});
|
|
|
|
test("500 错误返回 status failure", async () => {
|
|
const result = await checker.execute(
|
|
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/server_error` }),
|
|
makeCtx(),
|
|
);
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("status");
|
|
});
|
|
|
|
test("连接失败返回 request failure", async () => {
|
|
const result = await checker.execute(makeTarget({ url: "http://127.0.0.1:19999/v1" }), makeCtx(5000));
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("request");
|
|
});
|
|
|
|
test("observation 包含 output 长度和 usage", async () => {
|
|
const result = await checker.execute(makeTarget(), makeCtx());
|
|
expect(result.observation).toHaveProperty("outputPreview");
|
|
expect(result.observation).toHaveProperty("outputLength");
|
|
expect(result.observation).toHaveProperty("usage");
|
|
});
|
|
|
|
test("无文本输出且配置 output expect 失败", async () => {
|
|
const result = await checker.execute(
|
|
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/no_content` }, { output: [{ equals: "OK" }] }),
|
|
makeCtx(),
|
|
);
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("output");
|
|
});
|
|
|
|
test("无 expect 默认 status=200 通过", async () => {
|
|
const result = await checker.execute(makeTarget(), makeCtx());
|
|
expect(result.matched).toBe(true);
|
|
});
|
|
|
|
test("headers 断言通过", async () => {
|
|
const result = await checker.execute(
|
|
makeTarget(undefined, { headers: { "content-type": "application/json" } }),
|
|
makeCtx(),
|
|
);
|
|
expect(result.matched).toBe(true);
|
|
});
|
|
|
|
test("headers 断言失败", async () => {
|
|
const result = await checker.execute(
|
|
makeTarget(undefined, { headers: { "content-type": "text/plain" } }),
|
|
makeCtx(),
|
|
);
|
|
expect(result.matched).toBe(false);
|
|
expect(result.failure?.phase).toBe("headers");
|
|
});
|
|
});
|