feat: 新增 LLM checker 支持大模型服务应用层拨测
基于 AI SDK v6 实现 openai/openai-responses/anthropic 三类 provider 的 http/stream 模式调用 支持 output/finishReason/usage/stream 等完整 expect 断言链路 新增 9 个源文件和 5 个测试文件共 78 个测试 更新 README/DEVELOPMENT/probes.example.yaml 和 probe-config.schema.json
This commit is contained in:
83
src/server/checker/runner/llm/output.ts
Normal file
83
src/server/checker/runner/llm/output.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import type { ExpectResult } from "../../expect/types";
|
||||
import type { OutputRule } from "./types";
|
||||
|
||||
import { mismatchFailure } from "../../expect/failure";
|
||||
import { applyOperator, evaluateJsonPath } from "../../expect/operator";
|
||||
|
||||
export function checkOutputRules(outputText: null | string, rules: OutputRule[] | undefined): ExpectResult {
|
||||
if (!rules || rules.length === 0) return { failure: null, matched: true };
|
||||
|
||||
for (const rule of rules) {
|
||||
const result = checkSingleOutputRule(outputText, rule);
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
function checkSingleOutputRule(outputText: null | string, rule: OutputRule): ExpectResult {
|
||||
if ("equals" in rule) {
|
||||
if (outputText === null || outputText !== rule.equals) {
|
||||
return {
|
||||
failure: mismatchFailure("output", "output", rule.equals, outputText, "output equals mismatch"),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
if ("contains" in rule) {
|
||||
if (!outputText?.includes(rule.contains)) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"output",
|
||||
"output",
|
||||
`contains: ${rule.contains}`,
|
||||
outputText,
|
||||
"output contains mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
if ("regex" in rule) {
|
||||
if (outputText === null || !new RegExp(rule.regex).test(outputText)) {
|
||||
return {
|
||||
failure: mismatchFailure("output", "output", `match: ${rule.regex}`, outputText, "output regex mismatch"),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
if ("json" in rule) {
|
||||
if (outputText === null) {
|
||||
return {
|
||||
failure: mismatchFailure("output", "output", "valid JSON", null, "output is null, cannot parse JSON"),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(outputText);
|
||||
} catch {
|
||||
return {
|
||||
failure: mismatchFailure("output", "output", "valid JSON", outputText, "output is not valid JSON"),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
|
||||
const value = evaluateJsonPath(parsed, rule.json.path);
|
||||
if (!applyOperator(value, rule.json)) {
|
||||
return {
|
||||
failure: mismatchFailure("output", "output", rule.json, value, "output json mismatch"),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
Reference in New Issue
Block a user