feat: 新增 LLM checker 支持大模型服务应用层拨测
基于 AI SDK v6 实现 openai/openai-responses/anthropic 三类 provider 的 http/stream 模式调用 支持 output/finishReason/usage/stream 等完整 expect 断言链路 新增 9 个源文件和 5 个测试文件共 78 个测试 更新 README/DEVELOPMENT/probes.example.yaml 和 probe-config.schema.json
This commit is contained in:
209
tests/server/checker/runner/llm/execute.test.ts
Normal file
209
tests/server/checker/runner/llm/execute.test.ts
Normal file
@@ -0,0 +1,209 @@
|
||||
import { afterAll, beforeAll, describe, expect, test } from "bun:test";
|
||||
|
||||
import type { ResolvedLlmTarget } from "../../../../../src/server/checker/runner/llm/types";
|
||||
import type { CheckerContext } from "../../../../../src/server/checker/runner/types";
|
||||
|
||||
import { LlmChecker } from "../../../../../src/server/checker/runner/llm/execute";
|
||||
|
||||
const MOCK_PORT = 18456;
|
||||
let server: ReturnType<typeof Bun.serve>;
|
||||
|
||||
function makeCtx(timeoutMs = 10000): CheckerContext {
|
||||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), timeoutMs);
|
||||
return { signal: controller.signal };
|
||||
}
|
||||
|
||||
function makeTarget(
|
||||
overrides?: Partial<ResolvedLlmTarget["llm"]>,
|
||||
expectOverrides?: Partial<ResolvedLlmTarget["expect"]>,
|
||||
): ResolvedLlmTarget {
|
||||
return {
|
||||
description: null,
|
||||
expect: expectOverrides,
|
||||
group: "default",
|
||||
id: "test-llm",
|
||||
intervalMs: 30000,
|
||||
llm: {
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "test-key",
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
options: { maxOutputTokens: 16, temperature: 0 },
|
||||
prompt: "Say OK",
|
||||
provider: "openai",
|
||||
providerOptions: {},
|
||||
url: `http://127.0.0.1:${MOCK_PORT}/v1`,
|
||||
...overrides,
|
||||
},
|
||||
name: null,
|
||||
timeoutMs: 10000,
|
||||
type: "llm",
|
||||
};
|
||||
}
|
||||
|
||||
function openaiResponse(
|
||||
content: string,
|
||||
options?: { usage?: { completion_tokens: number; prompt_tokens: number; total_tokens: number } },
|
||||
) {
|
||||
return JSON.stringify({
|
||||
choices: [{ finish_reason: "stop", index: 0, message: { content, role: "assistant" } }],
|
||||
created: Date.now(),
|
||||
id: "chatcmpl-test",
|
||||
model: "gpt-4o-mini",
|
||||
object: "chat.completion",
|
||||
usage: options?.usage ?? { completion_tokens: 2, prompt_tokens: 12, total_tokens: 14 },
|
||||
});
|
||||
}
|
||||
|
||||
beforeAll(() => {
|
||||
server = Bun.serve({
|
||||
fetch(req) {
|
||||
const url = new URL(req.url);
|
||||
|
||||
const authHeader = req.headers.get("Authorization");
|
||||
|
||||
if (url.pathname === "/v1/rate_limit/chat/completions") {
|
||||
return new Response(JSON.stringify({ error: { message: "Rate limit exceeded", type: "rate_limit_error" } }), {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
status: 429,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.pathname === "/v1/server_error/chat/completions") {
|
||||
return new Response(JSON.stringify({ error: { message: "Internal server error", type: "server_error" } }), {
|
||||
headers: { "Content-Type": "application/json" },
|
||||
status: 500,
|
||||
});
|
||||
}
|
||||
|
||||
if (url.pathname === "/v1/no_content/chat/completions") {
|
||||
return new Response(
|
||||
openaiResponse("", { usage: { completion_tokens: 0, prompt_tokens: 5, total_tokens: 5 } }),
|
||||
{ headers: { "Content-Type": "application/json" }, status: 200 },
|
||||
);
|
||||
}
|
||||
|
||||
if (authHeader === "Bearer bad-key") {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
error: { message: "Invalid API key", param: null, type: "invalid_request_error" },
|
||||
}),
|
||||
{ headers: { "Content-Type": "application/json" }, status: 401 },
|
||||
);
|
||||
}
|
||||
|
||||
return new Response(openaiResponse("OK"), { headers: { "Content-Type": "application/json" }, status: 200 });
|
||||
},
|
||||
port: MOCK_PORT,
|
||||
});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
void server.stop();
|
||||
});
|
||||
|
||||
const checker = new LlmChecker();
|
||||
|
||||
describe("LlmChecker execute - 非流式", () => {
|
||||
test("成功调用返回 matched=true", async () => {
|
||||
const result = await checker.execute(makeTarget(), makeCtx());
|
||||
expect(result.matched).toBe(true);
|
||||
expect(result.failure).toBeNull();
|
||||
expect(result.statusDetail).toContain("openai");
|
||||
expect(result.statusDetail).toContain("http");
|
||||
expect(result.statusDetail).toContain("200");
|
||||
expect(result.statusDetail).toContain("finish=stop");
|
||||
});
|
||||
|
||||
test("status expect 不匹配", async () => {
|
||||
const result = await checker.execute(makeTarget(undefined, { status: [404] }), makeCtx());
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
|
||||
test("output equals 不匹配", async () => {
|
||||
const result = await checker.execute(makeTarget(undefined, { output: [{ equals: "WRONG" }] }), makeCtx());
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("output");
|
||||
});
|
||||
|
||||
test("output contains 通过", async () => {
|
||||
const result = await checker.execute(makeTarget(undefined, { output: [{ contains: "O" }] }), makeCtx());
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("finishReason expect 不匹配", async () => {
|
||||
const result = await checker.execute(makeTarget(undefined, { finishReason: "length" }), makeCtx());
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("finishReason");
|
||||
});
|
||||
|
||||
test("401 错误可通过 status expect 捕获", async () => {
|
||||
const result = await checker.execute(makeTarget({ key: "bad-key" }, { status: [401] }), makeCtx());
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("429 错误可通过 status expect 捕获", async () => {
|
||||
const result = await checker.execute(
|
||||
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/rate_limit` }, { status: [429] }),
|
||||
makeCtx(),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("500 错误返回 status failure", async () => {
|
||||
const result = await checker.execute(
|
||||
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/server_error` }),
|
||||
makeCtx(),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
|
||||
test("连接失败返回 request failure", async () => {
|
||||
const result = await checker.execute(makeTarget({ url: "http://127.0.0.1:19999/v1" }), makeCtx(5000));
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("request");
|
||||
});
|
||||
|
||||
test("statusDetail 包含 output 长度和 usage", async () => {
|
||||
const result = await checker.execute(makeTarget(), makeCtx());
|
||||
expect(result.statusDetail).toContain("output=");
|
||||
expect(result.statusDetail).toContain("chars");
|
||||
expect(result.statusDetail).toContain("usage=");
|
||||
expect(result.statusDetail).toContain("tokens");
|
||||
});
|
||||
|
||||
test("无文本输出且配置 output expect 失败", async () => {
|
||||
const result = await checker.execute(
|
||||
makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/no_content` }, { output: [{ equals: "OK" }] }),
|
||||
makeCtx(),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("output");
|
||||
});
|
||||
|
||||
test("无 expect 默认 status=200 通过", async () => {
|
||||
const result = await checker.execute(makeTarget(), makeCtx());
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("headers 断言通过", async () => {
|
||||
const result = await checker.execute(
|
||||
makeTarget(undefined, { headers: { "content-type": "application/json" } }),
|
||||
makeCtx(),
|
||||
);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("headers 断言失败", async () => {
|
||||
const result = await checker.execute(
|
||||
makeTarget(undefined, { headers: { "content-type": "text/plain" } }),
|
||||
makeCtx(),
|
||||
);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("headers");
|
||||
});
|
||||
});
|
||||
198
tests/server/checker/runner/llm/output-expect.test.ts
Normal file
198
tests/server/checker/runner/llm/output-expect.test.ts
Normal file
@@ -0,0 +1,198 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import type { LlmCheckObservation } from "../../../../../src/server/checker/runner/llm/types";
|
||||
|
||||
import { runExpects } from "../../../../../src/server/checker/runner/llm/expect";
|
||||
import { checkOutputRules } from "../../../../../src/server/checker/runner/llm/output";
|
||||
|
||||
function makeObservation(overrides?: Partial<LlmCheckObservation>): LlmCheckObservation {
|
||||
return {
|
||||
finishReason: "stop",
|
||||
http: { headers: {}, status: 200, statusText: "OK" },
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
outputText: "OK",
|
||||
provider: "openai",
|
||||
rawFinishReason: "stop",
|
||||
stream: null,
|
||||
usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 },
|
||||
warnings: [],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("LLM output rules", () => {
|
||||
test("equals 严格匹配", () => {
|
||||
expect(checkOutputRules("OK", [{ equals: "OK" }]).matched).toBe(true);
|
||||
expect(checkOutputRules("OK\n", [{ equals: "OK" }]).matched).toBe(false);
|
||||
expect(checkOutputRules("OK ", [{ equals: "OK" }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("equals null 输出失败", () => {
|
||||
expect(checkOutputRules(null, [{ equals: "OK" }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("contains 匹配", () => {
|
||||
expect(checkOutputRules("Hello World", [{ contains: "World" }]).matched).toBe(true);
|
||||
expect(checkOutputRules("Hello", [{ contains: "World" }]).matched).toBe(false);
|
||||
expect(checkOutputRules(null, [{ contains: "World" }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("regex 匹配", () => {
|
||||
expect(checkOutputRules("status: ok", [{ regex: "^status:" }]).matched).toBe(true);
|
||||
expect(checkOutputRules("status: ok", [{ regex: "^error:" }]).matched).toBe(false);
|
||||
expect(checkOutputRules(null, [{ regex: "^status:" }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("json 匹配", () => {
|
||||
expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { equals: "ok", path: "$.status" } }]).matched).toBe(
|
||||
true,
|
||||
);
|
||||
expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { gte: 200, path: "$.code" } }]).matched).toBe(true);
|
||||
expect(checkOutputRules('{"status":"ok"}', [{ json: { exists: true, path: "$.code" } }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("json 非法 JSON 失败", () => {
|
||||
expect(checkOutputRules("not json", [{ json: { exists: true, path: "$.x" } }]).matched).toBe(false);
|
||||
});
|
||||
|
||||
test("多规则按顺序快速失败", () => {
|
||||
const result = checkOutputRules("Hello World", [{ equals: "wrong" }, { contains: "World" }]);
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("output");
|
||||
});
|
||||
|
||||
test("undefined rules 返回通过", () => {
|
||||
expect(checkOutputRules("anything", undefined).matched).toBe(true);
|
||||
expect(checkOutputRules(null, undefined).matched).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM runExpects", () => {
|
||||
test("全部 expect 通过", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, {
|
||||
finishReason: "stop",
|
||||
output: [{ contains: "OK" }],
|
||||
status: [200],
|
||||
});
|
||||
expect(result.matched).toBe(true);
|
||||
expect(result.failure).toBeNull();
|
||||
});
|
||||
|
||||
test("默认 status=200 通过", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, undefined);
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("status 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { status: [404] });
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
|
||||
test("finishReason 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { finishReason: "length" });
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("finishReason");
|
||||
});
|
||||
|
||||
test("rawFinishReason 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { rawFinishReason: "end_turn" });
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("rawFinishReason");
|
||||
});
|
||||
|
||||
test("usage 不匹配失败", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { usage: { totalTokens: { gte: 100 } } });
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("usage");
|
||||
});
|
||||
|
||||
test("usage 匹配通过", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, { usage: { totalTokens: { lte: 20 } } });
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("stream completed 匹配", () => {
|
||||
const observation = makeObservation({
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: 500 },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { completed: true },
|
||||
});
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("stream firstTokenMs 匹配", () => {
|
||||
const observation = makeObservation({
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: 500 },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
});
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("stream firstTokenMs 缺失失败", () => {
|
||||
const observation = makeObservation({
|
||||
mode: "stream",
|
||||
stream: { completed: true, firstTokenMs: null },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
stream: { firstTokenMs: { lte: 1000 } },
|
||||
});
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("stream");
|
||||
});
|
||||
|
||||
test("headers 匹配通过", () => {
|
||||
const observation = makeObservation({
|
||||
http: { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
|
||||
test("headers 不匹配失败", () => {
|
||||
const observation = makeObservation({
|
||||
http: { headers: { "content-type": "text/plain" }, status: 200, statusText: "OK" },
|
||||
});
|
||||
const result = runExpects(observation, {
|
||||
headers: { "content-type": "application/json" },
|
||||
});
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("headers");
|
||||
});
|
||||
|
||||
test("首个 expect 失败立即返回", () => {
|
||||
const observation = makeObservation();
|
||||
const result = runExpects(observation, {
|
||||
output: [{ contains: "OK" }],
|
||||
status: [404],
|
||||
});
|
||||
expect(result.matched).toBe(false);
|
||||
expect(result.failure?.phase).toBe("status");
|
||||
});
|
||||
|
||||
test("APICallError 状态码 expect 通过", () => {
|
||||
const observation = makeObservation({
|
||||
finishReason: null,
|
||||
http: { headers: {}, status: 401, statusText: "Unauthorized" },
|
||||
outputText: null,
|
||||
usage: null,
|
||||
});
|
||||
const result = runExpects(observation, { status: [401] });
|
||||
expect(result.matched).toBe(true);
|
||||
});
|
||||
});
|
||||
258
tests/server/checker/runner/llm/provider-observation.test.ts
Normal file
258
tests/server/checker/runner/llm/provider-observation.test.ts
Normal file
@@ -0,0 +1,258 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import {
|
||||
buildObservationFromApiCallError,
|
||||
buildObservationFromGenerateText,
|
||||
buildObservationFromStreamText,
|
||||
} from "../../../../../src/server/checker/runner/llm/observation";
|
||||
import { createProviderModel } from "../../../../../src/server/checker/runner/llm/provider";
|
||||
|
||||
describe("LLM provider factory", () => {
|
||||
test("createProviderModel 返回 model 和 http 初始为 null", () => {
|
||||
const { http, model } = createProviderModel({
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "test-key",
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
options: {},
|
||||
prompt: "test",
|
||||
provider: "openai",
|
||||
providerOptions: {},
|
||||
url: "https://api.openai.com/v1",
|
||||
});
|
||||
expect(http).toBeNull();
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
|
||||
test("openai provider 使用 chat 路径", () => {
|
||||
const { model } = createProviderModel({
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "test-key",
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
options: {},
|
||||
prompt: "test",
|
||||
provider: "openai",
|
||||
providerOptions: {},
|
||||
url: "https://api.openai.com/v1",
|
||||
});
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
|
||||
test("openai-responses provider 使用 responses 路径", () => {
|
||||
const { model } = createProviderModel({
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "test-key",
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
options: {},
|
||||
prompt: "test",
|
||||
provider: "openai-responses",
|
||||
providerOptions: {},
|
||||
url: "https://api.openai.com/v1",
|
||||
});
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
|
||||
test("anthropic provider 使用 messages 路径", () => {
|
||||
const { model } = createProviderModel({
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "test-key",
|
||||
mode: "http",
|
||||
model: "claude-3-5-haiku-20241022",
|
||||
options: {},
|
||||
prompt: "test",
|
||||
provider: "anthropic",
|
||||
providerOptions: {},
|
||||
url: "https://api.anthropic.com/v1",
|
||||
});
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
|
||||
test("anthropic authToken 映射到 Authorization header", () => {
|
||||
const { model } = createProviderModel({
|
||||
authToken: "my-bearer-token",
|
||||
headers: {},
|
||||
ignoreSSL: false,
|
||||
key: "",
|
||||
mode: "http",
|
||||
model: "claude-3-5-haiku-20241022",
|
||||
options: {},
|
||||
prompt: "test",
|
||||
provider: "anthropic",
|
||||
providerOptions: {},
|
||||
url: "https://api.anthropic.com/v1",
|
||||
});
|
||||
expect(model).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM observation - generateText", () => {
|
||||
test("构建非流式 observation", () => {
|
||||
const observation = buildObservationFromGenerateText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"http",
|
||||
{
|
||||
finishReason: "stop",
|
||||
rawFinishReason: "stop",
|
||||
text: "OK",
|
||||
usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 },
|
||||
},
|
||||
{ headers: { "content-type": "application/json" }, status: 200, statusText: "OK" },
|
||||
);
|
||||
|
||||
expect(observation.provider).toBe("openai");
|
||||
expect(observation.model).toBe("gpt-4o-mini");
|
||||
expect(observation.mode).toBe("http");
|
||||
expect(observation.outputText).toBe("OK");
|
||||
expect(observation.finishReason).toBe("stop");
|
||||
expect(observation.rawFinishReason).toBe("stop");
|
||||
expect(observation.usage).toEqual({ inputTokens: 12, outputTokens: 2, totalTokens: 14 });
|
||||
expect(observation.stream).toBeNull();
|
||||
expect(observation.http?.status).toBe(200);
|
||||
});
|
||||
|
||||
test("rawFinishReason 为 undefined 时转为 null", () => {
|
||||
const observation = buildObservationFromGenerateText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"http",
|
||||
{
|
||||
finishReason: "stop",
|
||||
rawFinishReason: undefined,
|
||||
text: "OK",
|
||||
usage: { inputTokens: 5, outputTokens: 1 },
|
||||
},
|
||||
null,
|
||||
);
|
||||
expect(observation.rawFinishReason).toBeNull();
|
||||
});
|
||||
|
||||
test("usage totalTokens 缺失时自动计算", () => {
|
||||
const observation = buildObservationFromGenerateText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"http",
|
||||
{
|
||||
finishReason: "stop",
|
||||
rawFinishReason: "stop",
|
||||
text: "OK",
|
||||
usage: { inputTokens: 10, outputTokens: 3 },
|
||||
},
|
||||
null,
|
||||
);
|
||||
expect(observation.usage?.totalTokens).toBe(13);
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM observation - APICallError", () => {
|
||||
test("带 statusCode 的 APICallError 构建 http metadata", async () => {
|
||||
const { APICallError } = await import("ai");
|
||||
const error = new APICallError({
|
||||
message: "Unauthorized",
|
||||
requestBodyValues: {},
|
||||
responseBody: '{"error":{"message":"Invalid API key"}}',
|
||||
responseHeaders: { "content-type": "application/json" },
|
||||
statusCode: 401,
|
||||
url: "https://api.openai.com/v1/chat/completions",
|
||||
});
|
||||
|
||||
const observation = buildObservationFromApiCallError(error, "openai", "gpt-4o-mini", "http");
|
||||
expect(observation.http?.status).toBe(401);
|
||||
expect(observation.http?.headers).toEqual({ "content-type": "application/json" });
|
||||
expect(observation.outputText).toBeNull();
|
||||
expect(observation.finishReason).toBeNull();
|
||||
expect(observation.usage).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM observation - streamText", () => {
|
||||
test("消费 fullStream 构建流式 observation", async () => {
|
||||
const parts = [
|
||||
{ textDelta: "Hello", type: "text-delta" },
|
||||
{ textDelta: " world", type: "text-delta" },
|
||||
{
|
||||
finishReason: "stop",
|
||||
rawFinishReason: "stop",
|
||||
totalUsage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||
type: "finish",
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
},
|
||||
];
|
||||
|
||||
async function* fakeStream() {
|
||||
for (const part of parts) {
|
||||
yield await Promise.resolve(part);
|
||||
}
|
||||
}
|
||||
|
||||
const observation = await buildObservationFromStreamText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"stream",
|
||||
fakeStream(),
|
||||
{ headers: {}, status: 200, statusText: "OK" },
|
||||
performance.now() - 100,
|
||||
);
|
||||
|
||||
expect(observation.outputText).toBe("Hello world");
|
||||
expect(observation.stream?.completed).toBe(true);
|
||||
expect(observation.stream?.firstTokenMs).not.toBeNull();
|
||||
expect(observation.finishReason).toBe("stop");
|
||||
expect(observation.rawFinishReason).toBe("stop");
|
||||
expect(observation.usage?.totalTokens).toBe(15);
|
||||
});
|
||||
|
||||
test("空 text-delta 不触发 firstTokenMs", async () => {
|
||||
const parts = [
|
||||
{ textDelta: "", type: "text-delta" },
|
||||
{ textDelta: "OK", type: "text-delta" },
|
||||
{ finishReason: "stop", type: "finish", usage: { inputTokens: 5, outputTokens: 1 } },
|
||||
];
|
||||
|
||||
async function* fakeStream() {
|
||||
for (const part of parts) {
|
||||
yield await Promise.resolve(part);
|
||||
}
|
||||
}
|
||||
|
||||
const observation = await buildObservationFromStreamText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"stream",
|
||||
fakeStream(),
|
||||
null,
|
||||
performance.now(),
|
||||
);
|
||||
expect(observation.stream?.firstTokenMs).not.toBeNull();
|
||||
expect(observation.outputText).toBe("OK");
|
||||
});
|
||||
|
||||
test("error part 添加到 warnings", async () => {
|
||||
const parts = [
|
||||
{ error: new Error("stream broken"), type: "error" },
|
||||
{ finishReason: "error", type: "finish", usage: { inputTokens: 5, outputTokens: 0 } },
|
||||
];
|
||||
|
||||
async function* fakeStream() {
|
||||
for (const part of parts) {
|
||||
yield await Promise.resolve(part);
|
||||
}
|
||||
}
|
||||
|
||||
const observation = await buildObservationFromStreamText(
|
||||
"openai",
|
||||
"gpt-4o-mini",
|
||||
"stream",
|
||||
fakeStream(),
|
||||
null,
|
||||
performance.now(),
|
||||
);
|
||||
expect(observation.warnings).toContain("stream broken");
|
||||
});
|
||||
});
|
||||
32
tests/server/checker/runner/llm/registry.test.ts
Normal file
32
tests/server/checker/runner/llm/registry.test.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import { checkerRegistry } from "../../../../../src/server/checker/runner";
|
||||
|
||||
describe("LLM registry integration", () => {
|
||||
test("registry 包含 llm 类型", () => {
|
||||
expect(checkerRegistry.supportedTypes).toContain("llm");
|
||||
});
|
||||
|
||||
test("llm checker 可获取", () => {
|
||||
const checker = checkerRegistry.tryGet("llm");
|
||||
expect(checker).toBeDefined();
|
||||
expect(checker!.type).toBe("llm");
|
||||
expect(checker!.configKey).toBe("llm");
|
||||
});
|
||||
|
||||
test("llm checker schemas 有效", () => {
|
||||
const checker = checkerRegistry.get("llm");
|
||||
expect(checker.schemas.config).toBeDefined();
|
||||
expect(checker.schemas.defaults).toBeDefined();
|
||||
expect(checker.schemas.expect).toBeDefined();
|
||||
});
|
||||
|
||||
test("llm checker validate 方法可用", () => {
|
||||
const checker = checkerRegistry.get("llm");
|
||||
const issues = checker.validate({
|
||||
defaults: {},
|
||||
targets: [],
|
||||
});
|
||||
expect(issues).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
384
tests/server/checker/runner/llm/schema-validate-resolve.test.ts
Normal file
384
tests/server/checker/runner/llm/schema-validate-resolve.test.ts
Normal file
@@ -0,0 +1,384 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import type { ResolvedLlmTarget } from "../../../../../src/server/checker/runner/llm/types";
|
||||
import type { ResolveContext } from "../../../../../src/server/checker/runner/types";
|
||||
import type { RawTargetConfig } from "../../../../../src/server/checker/types";
|
||||
|
||||
import { checkerRegistry } from "../../../../../src/server/checker/runner";
|
||||
import { validateLlmConfig } from "../../../../../src/server/checker/runner/llm/validate";
|
||||
|
||||
interface SerializedConfig {
|
||||
headers: Record<string, string>;
|
||||
ignoreSSL: boolean;
|
||||
key: string;
|
||||
mode: string;
|
||||
model: string;
|
||||
options: Record<string, unknown>;
|
||||
prompt: string;
|
||||
provider: string;
|
||||
providerOptions: Record<string, unknown>;
|
||||
url: string;
|
||||
}
|
||||
|
||||
function asLlm(resolved: ReturnType<ReturnType<typeof checkerRegistry.get>["resolve"]>): ResolvedLlmTarget {
|
||||
return resolved as ResolvedLlmTarget;
|
||||
}
|
||||
|
||||
function makeRawTarget(overrides?: Partial<RawTargetConfig>): RawTargetConfig {
|
||||
return {
|
||||
id: "test-llm",
|
||||
llm: {
|
||||
model: "gpt-4o-mini",
|
||||
prompt: "Say OK",
|
||||
provider: "openai",
|
||||
url: "https://api.openai.com/v1",
|
||||
},
|
||||
type: "llm",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeResolveContext(overrides?: Partial<ResolveContext>): ResolveContext {
|
||||
return {
|
||||
configDir: "/tmp",
|
||||
defaultIntervalMs: 30000,
|
||||
defaults: {},
|
||||
defaultTimeoutMs: 10000,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function parseSerializedConfig(json: string): SerializedConfig {
|
||||
return JSON.parse(json) as SerializedConfig;
|
||||
}
|
||||
|
||||
describe("LlmChecker schema", () => {
|
||||
const checker = checkerRegistry.tryGet("llm");
|
||||
|
||||
test("llm checker 注册到 registry", () => {
|
||||
expect(checker).toBeDefined();
|
||||
expect(checker?.type).toBe("llm");
|
||||
expect(checker?.configKey).toBe("llm");
|
||||
});
|
||||
|
||||
test("schemas 包含 config、defaults、expect", () => {
|
||||
expect(checker).toBeDefined();
|
||||
expect(Object.keys(checker!.schemas).sort()).toEqual(["config", "defaults", "expect"].sort());
|
||||
});
|
||||
});
|
||||
|
||||
describe("LlmChecker validate", () => {
|
||||
test("合法 LLM target 无校验问题", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [makeRawTarget()],
|
||||
});
|
||||
expect(issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("provider 非法报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "m", prompt: "p", provider: "gemini", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues.some((i) => i.code === "invalid-type" && i.path.includes("provider"))).toBe(true);
|
||||
});
|
||||
|
||||
test("url 非法报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "m", prompt: "p", provider: "openai", url: "ftp://bad" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues.some((i) => i.code === "invalid-url")).toBe(true);
|
||||
});
|
||||
|
||||
test("model 为空报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues.some((i) => i.path.includes("model"))).toBe(true);
|
||||
});
|
||||
|
||||
test("prompt 为空报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "m", prompt: "", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues.some((i) => i.path.includes("prompt"))).toBe(true);
|
||||
});
|
||||
|
||||
test("mode 非法报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { mode: "batch", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.length).toBeGreaterThan(0);
|
||||
expect(issues.some((i) => i.path.includes("mode"))).toBe(true);
|
||||
});
|
||||
|
||||
test("openai provider 不允许 authToken", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { authToken: "tok", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.code === "invalid-auth")).toBe(true);
|
||||
});
|
||||
|
||||
test("anthropic 同时配置 key 和 authToken 报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { authToken: "tok", key: "k", model: "m", prompt: "p", provider: "anthropic", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.code === "auth-conflict")).toBe(true);
|
||||
});
|
||||
|
||||
test("ignoreSSL 非布尔值报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { ignoreSSL: "yes", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.path.includes("ignoreSSL"))).toBe(true);
|
||||
});
|
||||
|
||||
test("options.maxOutputTokens 非正整数报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "m", options: { maxOutputTokens: -1 }, prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.path.includes("maxOutputTokens"))).toBe(true);
|
||||
});
|
||||
|
||||
test("options.stopSequences 非字符串数组报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
llm: { model: "m", options: { stopSequences: [123] }, prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.path.includes("stopSequences"))).toBe(true);
|
||||
});
|
||||
|
||||
test("expect.output 缺少规则类型报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [makeRawTarget({ expect: { output: [{}] } })],
|
||||
});
|
||||
expect(issues.some((i) => i.code === "missing-body-rule")).toBe(true);
|
||||
});
|
||||
|
||||
test("expect.output 同时多种规则类型报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [makeRawTarget({ expect: { output: [{ contains: "y", equals: "x" }] } })],
|
||||
});
|
||||
expect(issues.some((i) => i.code === "multiple-body-rules")).toBe(true);
|
||||
});
|
||||
|
||||
test("expect.output regex ReDoS 报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [makeRawTarget({ expect: { output: [{ regex: "(a+)+" }] } })],
|
||||
});
|
||||
expect(issues.some((i) => i.code === "unsafe-regex")).toBe(true);
|
||||
});
|
||||
|
||||
test("expect.stream 在 mode:http 下报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
expect: { stream: { completed: true } },
|
||||
llm: { mode: "http", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues.some((i) => i.message.includes("stream mode"))).toBe(true);
|
||||
});
|
||||
|
||||
test("expect.stream 在 mode:stream 下合法", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {},
|
||||
targets: [
|
||||
makeRawTarget({
|
||||
expect: { stream: { completed: true } },
|
||||
llm: { mode: "stream", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("defaults.llm 合法配置", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: {
|
||||
llm: { headers: { "X-Custom": "val" }, ignoreSSL: false, mode: "http", options: { maxOutputTokens: 32 } },
|
||||
},
|
||||
targets: [makeRawTarget()],
|
||||
});
|
||||
expect(issues).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("defaults.llm mode 非法报错", () => {
|
||||
const issues = validateLlmConfig({
|
||||
defaults: { llm: { mode: "batch" } },
|
||||
targets: [makeRawTarget()],
|
||||
});
|
||||
expect(issues.some((i) => i.path.includes("defaults.llm.mode"))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("LlmChecker resolve", () => {
|
||||
const checker = checkerRegistry.tryGet("llm")!;
|
||||
|
||||
test("最简 target 填充默认值", () => {
|
||||
const resolved = asLlm(checker.resolve(makeRawTarget(), makeResolveContext()));
|
||||
expect(resolved.type).toBe("llm");
|
||||
expect(resolved.llm.mode).toBe("http");
|
||||
expect(resolved.llm.key).toBe("");
|
||||
expect(resolved.llm.ignoreSSL).toBe(false);
|
||||
expect(resolved.llm.options.maxOutputTokens).toBe(16);
|
||||
expect(resolved.llm.options.temperature).toBe(0);
|
||||
expect(resolved.group).toBe("default");
|
||||
expect(resolved.intervalMs).toBe(30000);
|
||||
expect(resolved.timeoutMs).toBe(10000);
|
||||
});
|
||||
|
||||
test("defaults.llm 与 target.llm 浅合并", () => {
|
||||
const raw = makeRawTarget({
|
||||
llm: {
|
||||
headers: { Authorization: "Bearer test" },
|
||||
model: "gpt-4o-mini",
|
||||
prompt: "Say OK",
|
||||
provider: "openai",
|
||||
url: "https://api.openai.com/v1",
|
||||
},
|
||||
});
|
||||
const ctx = makeResolveContext({
|
||||
defaults: {
|
||||
llm: {
|
||||
headers: { "X-Custom": "default" },
|
||||
ignoreSSL: true,
|
||||
mode: "stream",
|
||||
options: { maxOutputTokens: 64, temperature: 0.5 },
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = asLlm(checker.resolve(raw, ctx));
|
||||
expect(resolved.llm.mode).toBe("stream");
|
||||
expect(resolved.llm.ignoreSSL).toBe(true);
|
||||
expect(resolved.llm.headers).toEqual({ Authorization: "Bearer test", "X-Custom": "default" });
|
||||
expect(resolved.llm.options.maxOutputTokens).toBe(64);
|
||||
expect(resolved.llm.options.temperature).toBe(0.5);
|
||||
});
|
||||
|
||||
test("target 字段覆盖 defaults", () => {
|
||||
const raw = makeRawTarget({
|
||||
llm: {
|
||||
ignoreSSL: false,
|
||||
mode: "http",
|
||||
model: "gpt-4o-mini",
|
||||
options: { maxOutputTokens: 8 },
|
||||
prompt: "Say OK",
|
||||
provider: "openai",
|
||||
url: "https://api.openai.com/v1",
|
||||
},
|
||||
});
|
||||
const ctx = makeResolveContext({
|
||||
defaults: {
|
||||
llm: {
|
||||
ignoreSSL: true,
|
||||
mode: "stream",
|
||||
options: { maxOutputTokens: 64 },
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = asLlm(checker.resolve(raw, ctx));
|
||||
expect(resolved.llm.mode).toBe("http");
|
||||
expect(resolved.llm.ignoreSSL).toBe(false);
|
||||
expect(resolved.llm.options.maxOutputTokens).toBe(8);
|
||||
});
|
||||
|
||||
test("serialize 返回正确格式", () => {
|
||||
const resolved = asLlm(checker.resolve(makeRawTarget(), makeResolveContext()));
|
||||
const serialized = checker.serialize(resolved);
|
||||
expect(serialized.target).toBe("openai:gpt-4o-mini @ https://api.openai.com/v1");
|
||||
const config = parseSerializedConfig(serialized.config);
|
||||
expect(config.provider).toBe("openai");
|
||||
expect(config.key).toBe("");
|
||||
expect(config.model).toBe("gpt-4o-mini");
|
||||
});
|
||||
|
||||
test("serialize 隐藏 key", () => {
|
||||
const raw = makeRawTarget({
|
||||
llm: { key: "sk-secret-key", model: "m", prompt: "p", provider: "openai", url: "https://x" },
|
||||
});
|
||||
const resolved = asLlm(checker.resolve(raw, makeResolveContext()));
|
||||
const serialized = checker.serialize(resolved);
|
||||
const config = parseSerializedConfig(serialized.config);
|
||||
expect(config.key).toBe("***");
|
||||
});
|
||||
|
||||
test("providerOptions 浅合并", () => {
|
||||
const raw = makeRawTarget({
|
||||
llm: {
|
||||
model: "m",
|
||||
prompt: "p",
|
||||
provider: "openai",
|
||||
providerOptions: { openai: { store: true } },
|
||||
url: "https://x",
|
||||
},
|
||||
});
|
||||
const ctx = makeResolveContext({
|
||||
defaults: {
|
||||
llm: {
|
||||
providerOptions: { openai: { user: "default-user" } },
|
||||
},
|
||||
},
|
||||
});
|
||||
const resolved = asLlm(checker.resolve(raw, ctx));
|
||||
expect(resolved.llm.providerOptions).toEqual({ openai: { store: true } });
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user