refactor: 统一 expect 断言体系,引入共享 ValueMatcher/ContentRules/KeyValueExpect 模型
- 引入共享 ValueMatcher(equals/contains/regex/exists/empty/gt/gte/lt/lte) - 引入共享 ContentRules 数组(direct/json/css/xpath 提取器) - 引入共享 KeyValueExpect(动态键值断言,字面量等价 equals) - maxDurationMs → durationMs: ValueMatcher(所有 checker) - match → regex(固定无 flags) - Ping max* → packetLossPercent/avgLatencyMs/maxLatencyMs(ValueMatcher) - LLM finishReason/rawFinishReason → ValueMatcher - DB 新增 result: ContentRules - TCP banner → ContentRules 数组 - 删除旧模块:operator.ts、validate-operator.ts、duration.ts、body.ts、text.ts、output.ts - 更新全部 checker schema/validate/expect/execute - 更新 probe-config.schema.json、probes.example.yaml - 更新 README.md、DEVELOPMENT.md(含 expect 字段选择规范) - 同步 10 个 delta specs 到主 specs,归档 change
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
import type { ExpectResult } from "../../expect/types";
|
||||
import type { LlmCheckObservation, LlmExpectConfig } from "./types";
|
||||
import type { LlmCheckObservation, LlmExpectConfig, LlmUsageExpect } from "./types";
|
||||
|
||||
import { checkDuration } from "../../expect/duration";
|
||||
import { checkContentRules } from "../../expect/content";
|
||||
import { mismatchFailure } from "../../expect/failure";
|
||||
import { applyOperator } from "../../expect/operator";
|
||||
import { checkValueMatcher } from "../../expect/matcher";
|
||||
import { checkHeaders, checkStatus } from "../http/expect";
|
||||
import { checkOutputRules } from "./output";
|
||||
|
||||
export function checkStreamExpect(observation: LlmCheckObservation, expect: LlmExpectConfig): ExpectResult {
|
||||
if (!observation.stream || !expect.stream) return { failure: null, matched: true };
|
||||
@@ -25,18 +24,11 @@ export function checkStreamExpect(observation: LlmCheckObservation, expect: LlmE
|
||||
}
|
||||
|
||||
if (expect.stream.firstTokenMs && observation.stream.firstTokenMs !== null) {
|
||||
if (!applyOperator(observation.stream.firstTokenMs, expect.stream.firstTokenMs)) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"stream",
|
||||
"stream.firstTokenMs",
|
||||
expect.stream.firstTokenMs,
|
||||
observation.stream.firstTokenMs,
|
||||
"stream.firstTokenMs mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
return checkValueMatcher(observation.stream.firstTokenMs, expect.stream.firstTokenMs, {
|
||||
message: "stream.firstTokenMs mismatch",
|
||||
path: "stream.firstTokenMs",
|
||||
phase: "stream",
|
||||
});
|
||||
} else if (expect.stream.firstTokenMs && observation.stream.firstTokenMs === null) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
@@ -75,37 +67,25 @@ export function runExpects(observation: LlmCheckObservation, expect: LlmExpectCo
|
||||
if (!streamResult.matched) return streamResult;
|
||||
}
|
||||
|
||||
const outputResult = checkOutputRules(observation.outputText, expect.output);
|
||||
const outputResult = checkContentRules(observation.outputText, expect.output, { path: "output", phase: "output" });
|
||||
if (!outputResult.matched) return outputResult;
|
||||
|
||||
if (expect.finishReason !== undefined) {
|
||||
if (observation.finishReason !== expect.finishReason) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"finishReason",
|
||||
"finishReason",
|
||||
expect.finishReason,
|
||||
observation.finishReason,
|
||||
"finishReason mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const result = checkValueMatcher(observation.finishReason, expect.finishReason, {
|
||||
message: "finishReason mismatch",
|
||||
path: "finishReason",
|
||||
phase: "finishReason",
|
||||
});
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
|
||||
if (expect.rawFinishReason !== undefined) {
|
||||
if (observation.rawFinishReason !== expect.rawFinishReason) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"rawFinishReason",
|
||||
"rawFinishReason",
|
||||
expect.rawFinishReason,
|
||||
observation.rawFinishReason,
|
||||
"rawFinishReason mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const result = checkValueMatcher(observation.rawFinishReason, expect.rawFinishReason, {
|
||||
message: "rawFinishReason mismatch",
|
||||
path: "rawFinishReason",
|
||||
phase: "rawFinishReason",
|
||||
});
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
|
||||
if (expect.usage && observation.usage) {
|
||||
@@ -118,51 +98,31 @@ export function runExpects(observation: LlmCheckObservation, expect: LlmExpectCo
|
||||
|
||||
function checkUsageExpect(
|
||||
usage: { inputTokens: number; outputTokens: number; totalTokens: number },
|
||||
expectUsage: { inputTokens?: unknown; outputTokens?: unknown; totalTokens?: unknown },
|
||||
expectUsage: LlmUsageExpect,
|
||||
): ExpectResult {
|
||||
if (expectUsage.inputTokens !== undefined) {
|
||||
if (!applyOperator(usage.inputTokens, expectUsage.inputTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.inputTokens",
|
||||
expectUsage.inputTokens,
|
||||
usage.inputTokens,
|
||||
"usage.inputTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const result = checkValueMatcher(usage.inputTokens, expectUsage.inputTokens, {
|
||||
message: "usage.inputTokens mismatch",
|
||||
path: "usage.inputTokens",
|
||||
phase: "usage",
|
||||
});
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
if (expectUsage.outputTokens !== undefined) {
|
||||
if (!applyOperator(usage.outputTokens, expectUsage.outputTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.outputTokens",
|
||||
expectUsage.outputTokens,
|
||||
usage.outputTokens,
|
||||
"usage.outputTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const result = checkValueMatcher(usage.outputTokens, expectUsage.outputTokens, {
|
||||
message: "usage.outputTokens mismatch",
|
||||
path: "usage.outputTokens",
|
||||
phase: "usage",
|
||||
});
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
if (expectUsage.totalTokens !== undefined) {
|
||||
if (!applyOperator(usage.totalTokens, expectUsage.totalTokens as Parameters<typeof applyOperator>[1])) {
|
||||
return {
|
||||
failure: mismatchFailure(
|
||||
"usage",
|
||||
"usage.totalTokens",
|
||||
expectUsage.totalTokens,
|
||||
usage.totalTokens,
|
||||
"usage.totalTokens mismatch",
|
||||
),
|
||||
matched: false,
|
||||
};
|
||||
}
|
||||
const result = checkValueMatcher(usage.totalTokens, expectUsage.totalTokens, {
|
||||
message: "usage.totalTokens mismatch",
|
||||
path: "usage.totalTokens",
|
||||
phase: "usage",
|
||||
});
|
||||
if (!result.matched) return result;
|
||||
}
|
||||
return { failure: null, matched: true };
|
||||
}
|
||||
|
||||
export { checkDuration };
|
||||
|
||||
Reference in New Issue
Block a user