feat: 新增 LLM checker 支持大模型服务应用层拨测
基于 AI SDK v6 实现 openai/openai-responses/anthropic 三类 provider 的 http/stream 模式调用 支持 output/finishReason/usage/stream 等完整 expect 断言链路 新增 9 个源文件和 5 个测试文件共 78 个测试 更新 README/DEVELOPMENT/probes.example.yaml 和 probe-config.schema.json
This commit is contained in:
168
src/server/checker/runner/llm/expect.ts
Normal file
168
src/server/checker/runner/llm/expect.ts
Normal file
@@ -0,0 +1,168 @@
|
||||
import type { ExpectResult } from "../../expect/types";
|
||||
import type { LlmCheckObservation, LlmExpectConfig } from "./types";
|
||||
|
||||
import { checkDuration } from "../../expect/duration";
|
||||
import { mismatchFailure } from "../../expect/failure";
|
||||
import { applyOperator } from "../../expect/operator";
|
||||
import { checkHeaders, checkStatus } from "../http/expect";
|
||||
import { checkOutputRules } from "./output";
|
||||
|
||||
export function checkStreamExpect(observation: LlmCheckObservation, expect: LlmExpectConfig): ExpectResult {
|
||||
if (!observation.stream || !expect.stream) return { failure: null, matched: true };
|
||||
|
||||
const expectedCompleted = expect.stream.completed ?? true;
|
||||
if (observation.stream.completed !== expectedCompleted) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"stream",
|
||||
"stream.completed",
|
||||
expectedCompleted,
|
||||
observation.stream.completed,
|
||||
"stream.completed mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
|
||||
if (expect.stream.firstTokenMs && observation.stream.firstTokenMs !== null) {
|
||||
if (!applyOperator(observation.stream.firstTokenMs, expect.stream.firstTokenMs)) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"stream",
|
||||
"stream.firstTokenMs",
|
||||
expect.stream.firstTokenMs,
|
||||
observation.stream.firstTokenMs,
|
||||
"stream.firstTokenMs mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
} else if (expect.stream.firstTokenMs && observation.stream.firstTokenMs === null) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"stream",
|
||||
"stream.firstTokenMs",
|
||||
expect.stream.firstTokenMs,
|
||||
null,
|
||||
"stream.firstTokenMs missing",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
export function runExpects(observation: LlmCheckObservation, expect: LlmExpectConfig | undefined): ExpectResult {
|
||||
if (!expect) {
|
||||
const defaultStatus = checkStatus(observation.http?.status ?? 0, [200]);
|
||||
if (!defaultStatus.matched) return defaultStatus;
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
const http = observation.http;
|
||||
|
||||
const statusResult = checkStatus(http?.status ?? 0, expect.status ?? [200]);
|
||||
if (!statusResult.matched) return statusResult;
|
||||
|
||||
if (http && expect.headers) {
|
||||
const headersResult = checkHeaders(http.headers, expect.headers);
|
||||
if (!headersResult.matched) return headersResult;
|
||||
}
|
||||
|
||||
if (observation.stream !== null) {
|
||||
const streamResult = checkStreamExpect(observation, expect);
|
||||
if (!streamResult.matched) return streamResult;
|
||||
}
|
||||
|
||||
const outputResult = checkOutputRules(observation.outputText, expect.output);
|
||||
if (!outputResult.matched) return outputResult;
|
||||
|
||||
if (expect.finishReason !== undefined) {
|
||||
if (observation.finishReason !== expect.finishReason) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"finishReason",
|
||||
"finishReason",
|
||||
expect.finishReason,
|
||||
observation.finishReason,
|
||||
"finishReason mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (expect.rawFinishReason !== undefined) {
|
||||
if (observation.rawFinishReason !== expect.rawFinishReason) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"rawFinishReason",
|
||||
"rawFinishReason",
|
||||
expect.rawFinishReason,
|
||||
observation.rawFinishReason,
|
||||
"rawFinishReason mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (expect.usage && observation.usage) {
|
||||
const usageResult = checkUsageExpect(observation.usage, expect.usage);
|
||||
if (!usageResult.matched) return usageResult;
|
||||
}
|
||||
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
function checkUsageExpect(
|
||||
usage: { inputTokens: number; outputTokens: number; totalTokens: number },
|
||||
expectUsage: { inputTokens?: unknown; outputTokens?: unknown; totalTokens?: unknown },
|
||||
): ExpectResult {
|
||||
if (expectUsage.inputTokens !== undefined) {
|
||||
if (!applyOperator(usage.inputTokens, expectUsage.inputTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.inputTokens",
|
||||
expectUsage.inputTokens,
|
||||
usage.inputTokens,
|
||||
"usage.inputTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
if (expectUsage.outputTokens !== undefined) {
|
||||
if (!applyOperator(usage.outputTokens, expectUsage.outputTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.outputTokens",
|
||||
expectUsage.outputTokens,
|
||||
usage.outputTokens,
|
||||
"usage.outputTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
if (expectUsage.totalTokens !== undefined) {
|
||||
if (!applyOperator(usage.totalTokens, expectUsage.totalTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.totalTokens",
|
||||
expectUsage.totalTokens,
|
||||
usage.totalTokens,
|
||||
"usage.totalTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
export { checkDuration };
|
||||
Reference in New Issue
Block a user