diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index b784ef8..e073bc4 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -63,6 +63,7 @@ src/ tcp/ TCP Checker(自包含模块,含 types/schema/execute/expect/validate) icmp/ Ping Checker(自包含模块,含 types/schema/execute/expect/validate/parse) udp/ UDP Checker(自包含模块,含 types/schema/execute/expect/validate/encoding) + llm/ LLM Checker(自包含模块,含 types/schema/execute/expect/validate/output/provider/observation) shared/ api.ts 前后端共享 TypeScript 类型 web/ React 前端 Dashboard(通过 Bun HTML import 集成) diff --git a/README.md b/README.md index acbf5d1..6cef01d 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,11 @@ --- -DiAL 是一个自托管的拨测监控工具,支持 **HTTP**、**命令行**、**数据库**、**TCP**、**UDP** 和 **Ping** 多种拨测类型。通过 YAML 配置文件定义拨测目标,后端定时并发执行拨测并将结果持久化到本地 SQLite,前端 Dashboard 展示各目标的实时状态、可用率和耗时趋势。 +DiAL 是一个自托管的拨测监控工具,支持 **HTTP**、**命令行**、**数据库**、**TCP**、**UDP**、**Ping** 和 **LLM** 多种拨测类型。通过 YAML 配置文件定义拨测目标,后端定时并发执行拨测并将结果持久化到本地 SQLite,前端 Dashboard 展示各目标的实时状态、可用率和耗时趋势。 **功能亮点:** -- 多种拨测类型:HTTP(GET/POST/PUT 等)、Cmd(命令行执行)、DB(PostgreSQL/MySQL/SQLite)、TCP(端口可达性 + Banner 探测)、UDP(自定义 payload 请求-响应)、Ping(ICMP 存活、延迟、丢包率) +- 多种拨测类型:HTTP(GET/POST/PUT 等)、Cmd(命令行执行)、DB(PostgreSQL/MySQL/SQLite)、TCP(端口可达性 + Banner 探测)、UDP(自定义 payload 请求-响应)、Ping(ICMP 存活、延迟、丢包率)、LLM(大模型服务应用层健康检查) - 丰富的校验规则:状态码、响应头、JSONPath、CSS 选择器、XPath、正则匹配、数值比较等 - 响应式 Dashboard:实时状态、可用率统计、耗时趋势图、手动/自动刷新 - 多主题支持:系统、明亮、黑暗三种主题模式 @@ -218,7 +218,7 @@ targets: | `id` | 目标唯一标识,最长 30 字符,支持字母数字、下划线、连字符,不参与变量替换 | 是 | | `name` | 展示名称,最长 30 字符,支持变量替换,可省略或显式 null;前端展示时 null 回退到 `id` | 否 | | `description` | 目标描述,最长 500 字符,支持变量替换,可省略或显式 null,允许空字符串 | 否 | -| `type` | 目标类型:`http`、`cmd`、`db`、`tcp`、`ping` | 是 | +| `type` | 目标类型:`http`、`cmd`、`db`、`tcp`、`udp`、`ping`、`llm` | 是 | | `group` | 分组名称 | 否,默认 `"default"` | | `interval` | 覆盖全局拨测间隔 | 否 | | `timeout` | 覆盖全局超时时间 | 否 | @@ -270,6 +270,26 @@ targets: Ping checker 通过系统 `ping` 命令执行 ICMP 探测,支持 Linux、macOS 和 Windows 输出解析。 +**LLM 类型** (`type: llm`) + +| 字段 | 说明 | +| --------------------- | ----------------------------------------------------------- | +| `llm.provider` | 模型提供方:`openai`、`openai-responses`、`anthropic` | +| `llm.url` | API base URL | +| `llm.model` | 模型名称 | +| `llm.prompt` | 单轮 prompt | +| `llm.mode` | 调用模式:`http`(默认,非流式)或 `stream`(流式) | +| `llm.key` | API key(默认空字符串,支持 `${VAR}` 变量替换) | +| `llm.authToken` | Bearer token(仅 `anthropic` provider,与 `key` 互斥) | +| `llm.headers` | 附加请求头(与 `defaults.llm.headers` 合并) | +| `llm.ignoreSSL` | 忽略 HTTPS 证书校验,默认 `false` | +| `llm.options` | 生成选项(与 `defaults.llm.options` 合并) | +| `llm.providerOptions` | Provider 专属选项(与 `defaults.llm.providerOptions` 合并) | + +`llm.options` 支持 `maxOutputTokens`(默认 `16`)、`temperature`(默认 `0`)、`topP`、`topK`、`presencePenalty`、`frequencyPenalty`、`stopSequences`、`seed`。 + +`defaults.llm` 支持 `mode`、`headers`、`ignoreSSL`、`options`、`providerOptions`,不支持 `provider`、`url`、`model`、`key`、`authToken`、`prompt`。 + #### expect — 期望校验 | 字段 | 适用类型 | 说明 | @@ -278,6 +298,11 @@ Ping checker 通过系统 `ping` 命令执行 ICMP 探测,支持 Linux、macOS | `exitCode` | Cmd | 可接受的退出码列表;未指定时不校验 | | `headers` | HTTP | 响应头校验 | | `maxDurationMs` | 全部 | 最大耗时阈值(毫秒) | +| `output` | LLM | 模型输出校验(数组:`equals`/`contains`/`regex`/`json`) | +| `finishReason` | LLM | 期望的 finish reason 字符串 | +| `rawFinishReason` | LLM | 期望的原始 finish reason 字符串 | +| `usage` | LLM | Token usage 校验(`inputTokens`/`outputTokens`/`totalTokens`) | +| `stream` | LLM | 流式断言(`completed`、`firstTokenMs`,仅 `mode: stream`) | | `body` | HTTP | 响应体校验(数组,可组合使用,见下方) | | `stdout` / `stderr` | Cmd | 输出校验(数组,每项一个操作符对象) | | `rowCount` | DB | 查询返回行数校验(操作符对象) | diff --git a/bun.lock b/bun.lock index 08312b3..707db9c 100644 --- a/bun.lock +++ b/bun.lock @@ -5,10 +5,13 @@ "": { "name": "gateway-checker", "dependencies": { + "@ai-sdk/anthropic": "^3", + "@ai-sdk/openai": "^3", "@number-flow/react": "^0.6.0", "@sinclair/typebox": "^0.34.49", "@tanstack/react-query": "^5.100.10", "@xmldom/xmldom": "^0.9.10", + "ai": "^6", "ajv": "^8.20.0", "cheerio": "^1.2.0", "es-toolkit": "^1.46.1", @@ -49,6 +52,16 @@ }, }, "packages": { + "@ai-sdk/anthropic": ["@ai-sdk/anthropic@3.0.78", "https://registry.npmmirror.com/@ai-sdk/anthropic/-/anthropic-3.0.78.tgz", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-0OY12G20cUt6iU6htpEA1491Oz++NVxZxlmWGX4B7rSbeZ5pnDmOu6YtW9BKzdZlNx5Gn23i6WMxyZFoMKNcgA=="], + + "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.115", "https://registry.npmmirror.com/@ai-sdk/gateway/-/gateway-3.0.115.tgz", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@vercel/oidc": "3.2.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-xonmGfN9pt54WdKqMzWe68BRYS3rsYvraBzioyA0gfNcecHs8Ir5qk/X8grJSyZ95hghjWiOphrK6bAc11E6SA=="], + + "@ai-sdk/openai": ["@ai-sdk/openai@3.0.64", "https://registry.npmmirror.com/@ai-sdk/openai/-/openai-3.0.64.tgz", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-epO4iS6QwktaY2PF6uBcPnDTJ3BxPOfsGS7/OEtBe3GtNj7C8h8gMDVtIe5K8W16HNDbn0tbR4dcQfpfs+XVFg=="], + + "@ai-sdk/provider": ["@ai-sdk/provider@3.0.10", "https://registry.npmmirror.com/@ai-sdk/provider/-/provider-3.0.10.tgz", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="], + + "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.27", "https://registry.npmmirror.com/@ai-sdk/provider-utils/-/provider-utils-4.0.27.tgz", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="], + "@asamuzakjp/css-color": ["@asamuzakjp/css-color@5.1.11", "https://registry.npmmirror.com/@asamuzakjp/css-color/-/css-color-5.1.11.tgz", { "dependencies": { "@asamuzakjp/generational-cache": "^1.0.1", "@csstools/css-calc": "^3.2.0", "@csstools/css-color-parser": "^4.1.0", "@csstools/css-parser-algorithms": "^4.0.0", "@csstools/css-tokenizer": "^4.0.0" } }, "sha512-KVw6qIiCTUQhByfTd78h2yD1/00waTmm9uy/R7Ck/ctUyAPj+AEDLkQIdJW0T8+qGgj3j5bpNKK7Q3G+LedJWg=="], "@asamuzakjp/dom-selector": ["@asamuzakjp/dom-selector@7.1.1", "https://registry.npmmirror.com/@asamuzakjp/dom-selector/-/dom-selector-7.1.1.tgz", { "dependencies": { "@asamuzakjp/generational-cache": "^1.0.1", "@asamuzakjp/nwsapi": "^2.3.9", "bidi-js": "^1.0.3", "css-tree": "^3.2.1", "is-potential-custom-element-name": "^1.0.1" } }, "sha512-67RZDnYRc8H/8MLDgQCDE//zoqVFwajkepHZgmXrbwybzXOEwOWGPYGmALYl9J2DOLfFPPs6kKCqmbzV895hTQ=="], @@ -189,6 +202,8 @@ "@number-flow/react": ["@number-flow/react@0.6.0", "https://registry.npmmirror.com/@number-flow/react/-/react-0.6.0.tgz", { "dependencies": { "esm-env": "^1.1.4", "number-flow": "0.6.0" }, "peerDependencies": { "react": "^18 || ^19", "react-dom": "^18 || ^19" } }, "sha512-77Yfc9+zkV2UDSP8phhZzxJGuwxi/Tt1TikmipL+1r3e9GFKEYDZ1XwInj67NoSt3OnOB0KLvvcl3lfPZgBHVQ=="], + "@opentelemetry/api": ["@opentelemetry/api@1.9.1", "https://registry.npmmirror.com/@opentelemetry/api/-/api-1.9.1.tgz", {}, "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q=="], + "@oxc-project/types": ["@oxc-project/types@0.130.0", "https://registry.npmmirror.com/@oxc-project/types/-/types-0.130.0.tgz", {}, "sha512-ibD2usx9JRu7f5pu2tMKMI4cpA4NgXJQoYRP4pQ7Pxmn1l6k/53qWtQWZayhYy3X4QZkt90Ot+mJEaeXouio6Q=="], "@pkgr/core": ["@pkgr/core@0.2.9", "https://registry.npmmirror.com/@pkgr/core/-/core-0.2.9.tgz", {}, "sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA=="], @@ -359,6 +374,8 @@ "@unrs/resolver-binding-win32-x64-msvc": ["@unrs/resolver-binding-win32-x64-msvc@1.11.1", "https://registry.npmmirror.com/@unrs/resolver-binding-win32-x64-msvc/-/resolver-binding-win32-x64-msvc-1.11.1.tgz", { "os": "win32", "cpu": "x64" }, "sha512-lrW200hZdbfRtztbygyaq/6jP6AKE8qQN2KvPcJ+x7wiD038YtnYtZ82IMNJ69GJibV7bwL3y9FgK+5w/pYt6g=="], + "@vercel/oidc": ["@vercel/oidc@3.2.0", "https://registry.npmmirror.com/@vercel/oidc/-/oidc-3.2.0.tgz", {}, "sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug=="], + "@vitejs/plugin-react": ["@vitejs/plugin-react@6.0.2", "https://registry.npmmirror.com/@vitejs/plugin-react/-/plugin-react-6.0.2.tgz", { "dependencies": { "@rolldown/pluginutils": "^1.0.0" }, "peerDependencies": { "@rolldown/plugin-babel": "^0.1.7 || ^0.2.0", "babel-plugin-react-compiler": "^1.0.0", "vite": "^8.0.0" }, "optionalPeers": ["@rolldown/plugin-babel", "babel-plugin-react-compiler"] }, "sha512-DlSMqo4WhThw4vB8Mpn0Woe9J+Jfq1geJ61AKW0QEgLzGMNwtIMdxbDUzLxcun8W7NbJO0e2Jg/Nxm3cCSVzzg=="], "@xmldom/xmldom": ["@xmldom/xmldom@0.9.10", "https://registry.npmmirror.com/@xmldom/xmldom/-/xmldom-0.9.10.tgz", {}, "sha512-A9gOqLdi6cV4ibazAjcQufGj0B1y/vDqYrcuP6d/6x8P27gRS8643Dj9o1dEKtB6O7fwxb2FgBmJS2mX7gpvdw=="], @@ -367,6 +384,8 @@ "acorn-jsx": ["acorn-jsx@5.3.2", "https://registry.npmmirror.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="], + "ai": ["ai@6.0.184", "https://registry.npmmirror.com/ai/-/ai-6.0.184.tgz", { "dependencies": { "@ai-sdk/gateway": "3.0.115", "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@opentelemetry/api": "^1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-j//zHkKvj5ra27l8izHco8cj1g1Pr7vx1ZK+hrzrkHvndgIRmdfZKOb6+RAPpvbk42qGIsuYvlYbGlVAu3erNQ=="], + "ajv": ["ajv@8.20.0", "https://registry.npmmirror.com/ajv/-/ajv-8.20.0.tgz", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA=="], "ansi-escapes": ["ansi-escapes@7.3.0", "https://registry.npmmirror.com/ansi-escapes/-/ansi-escapes-7.3.0.tgz", { "dependencies": { "environment": "^1.0.0" } }, "sha512-BvU8nYgGQBxcmMuEeUEmNTvrMVjJNSH7RgW24vXexN4Ven6qCvy4TntnvlnwnMLTVlcRQQdbRY8NKnaIoeWDNg=="], @@ -601,6 +620,8 @@ "eventemitter3": ["eventemitter3@5.0.4", "https://registry.npmmirror.com/eventemitter3/-/eventemitter3-5.0.4.tgz", {}, "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw=="], + "eventsource-parser": ["eventsource-parser@3.0.8", "https://registry.npmmirror.com/eventsource-parser/-/eventsource-parser-3.0.8.tgz", {}, "sha512-70QWGkr4snxr0OXLRWsFLeRBIRPuQOvt4s8QYjmUlmlkyTZkRqS7EDVRZtzU3TiyDbXSzaOeF0XUKy8PchzukQ=="], + "fast-deep-equal": ["fast-deep-equal@3.1.3", "https://registry.npmmirror.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="], "fast-diff": ["fast-diff@1.3.0", "https://registry.npmmirror.com/fast-diff/-/fast-diff-1.3.0.tgz", {}, "sha512-VxPP4NqbUjj6MaAOafWeUn2cXWLcCtljklUtZf0Ind4XQ+QPtmA0b18zZy0jIQx+ExRVCR/ZQpBmik5lXshNsw=="], @@ -775,6 +796,8 @@ "json-parse-even-better-errors": ["json-parse-even-better-errors@2.3.1", "https://registry.npmmirror.com/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", {}, "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="], + "json-schema": ["json-schema@0.4.0", "https://registry.npmmirror.com/json-schema/-/json-schema-0.4.0.tgz", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], + "json-schema-traverse": ["json-schema-traverse@1.0.0", "https://registry.npmmirror.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], "json-stable-stringify-without-jsonify": ["json-stable-stringify-without-jsonify@1.0.1", "https://registry.npmmirror.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", {}, "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw=="], diff --git a/openspec/specs/llm-checker/spec.md b/openspec/specs/llm-checker/spec.md new file mode 100644 index 0000000..dc4af1d --- /dev/null +++ b/openspec/specs/llm-checker/spec.md @@ -0,0 +1,223 @@ +## Purpose + +定义 LLM checker 的配置模型、Provider/Mode 支持、执行观测、Expect 断言、失败 Phase 和测试行为。 + +## Requirements + +### Requirement: LLM Checker 注册与模块结构 +系统 SHALL 提供 `type: llm` checker,用于大模型服务的应用层拨测。LLM checker MUST 位于 `src/server/checker/runner/llm/` 自包含目录,并通过 `src/server/checker/runner/index.ts` 注册到 `CheckerRegistry`。LLM checker SHALL 复用现有 checker 抽象、配置 schema 组装、启动期语义校验、引擎调度、存储序列化和共享 expect 基础设施。 + +#### Scenario: 注册 LLM checker +- **WHEN** 系统初始化默认 checker registry +- **THEN** registry SHALL 包含 `llm` 类型,且 `/api/meta` 返回的 `checkerTypes` SHALL 包含 `llm` + +#### Scenario: LLM checker 目录自包含 +- **WHEN** 开发者查看 `src/server/checker/runner/llm/` 目录 +- **THEN** 该目录 SHALL 包含 LLM checker 的类型、schema、语义校验、provider 创建、observation 构建、expect 断言、执行逻辑和模块入口 + +#### Scenario: 不扩展存储和 API 结构 +- **WHEN** LLM checker 写入检查结果 +- **THEN** 系统 SHALL 使用现有 `CheckResult`、`targets`、`check_results` 和 Dashboard API 结构,不新增 LLM 专用存储列或 Dashboard 指标字段 + +### Requirement: LLM Provider 与调用模式 +LLM checker SHALL 支持 `openai`、`openai-responses`、`anthropic` 三类 provider。`mode: http` SHALL 调用 AI SDK `generateText`。`mode: stream` SHALL 调用 AI SDK `streamText`。所有模型调用 MUST 将 `maxRetries` 固定为 `0`,并 MUST 使用引擎注入的 `ctx.signal` 响应超时和取消。 + +#### Scenario: OpenAI Chat Completions provider +- **WHEN** target 配置 `llm.provider: openai` +- **THEN** LLM checker SHALL 使用 `@ai-sdk/openai` 的 `openai.chat(model)` 创建模型调用对象 + +#### Scenario: OpenAI Responses provider +- **WHEN** target 配置 `llm.provider: openai-responses` +- **THEN** LLM checker SHALL 使用 `@ai-sdk/openai` 的 `openai.responses(model)` 创建模型调用对象 + +#### Scenario: Anthropic provider +- **WHEN** target 配置 `llm.provider: anthropic` +- **THEN** LLM checker SHALL 使用 `@ai-sdk/anthropic` 的 `anthropic.messages(model)` 创建模型调用对象 + +#### Scenario: 非流式调用模式 +- **WHEN** target 配置 `llm.mode: http` 或省略 `llm.mode` +- **THEN** LLM checker SHALL 调用 `generateText` 并从返回结果构建非流式 observation + +#### Scenario: 流式调用模式 +- **WHEN** target 配置 `llm.mode: stream` +- **THEN** LLM checker SHALL 调用 `streamText` 并消费 `fullStream` 构建流式 observation + +#### Scenario: 超时取消传递给 SDK +- **WHEN** 引擎注入的 `ctx.signal` 被 abort +- **THEN** LLM checker SHALL 将该 signal 传递给 AI SDK 调用并将取消或超时结果记录为检查失败 + +### Requirement: LLM 配置解析与默认值 +LLM checker SHALL 解析 `llm.provider`、`llm.url`、`llm.model`、`llm.prompt`、`llm.mode`、`llm.key`、`llm.authToken`、`llm.headers`、`llm.ignoreSSL`、`llm.options` 和 `llm.providerOptions`。`llm.options` SHALL 支持 `maxOutputTokens`(默认 `16`)、`temperature`(默认 `0`)、`topP`、`topK`、`presencePenalty`、`frequencyPenalty`、`stopSequences`(字符串数组)和 `seed`。`llm.mode` 默认值 SHALL 为 `http`,`llm.key` 默认值 SHALL 为空字符串,`llm.ignoreSSL` 默认值 SHALL 为 `false`。LLM checker MUST NOT 隐式读取 AI SDK 默认环境变量。 + +#### Scenario: 最简 LLM target 解析 +- **WHEN** 系统读取只包含 `type: llm` 以及 `llm.provider`、`llm.url`、`llm.model`、`llm.prompt` 的 target +- **THEN** 系统 SHALL 解析为 LLM target,并填充 `mode=http`、`key=""`、`ignoreSSL=false`、`options.maxOutputTokens=16`、`options.temperature=0` + +#### Scenario: headers 默认值合并 +- **WHEN** `defaults.llm.headers` 和 target `llm.headers` 同时配置同名 header +- **THEN** LLM checker SHALL 按原始 header key 浅合并 headers,并由 target `llm.headers` 覆盖 defaults 中同名 key + +#### Scenario: options 默认值合并 +- **WHEN** `defaults.llm.options` 和 target `llm.options` 同时配置同名 option +- **THEN** LLM checker SHALL 浅合并 options,并由 target `llm.options` 覆盖 defaults 中同名字段 + +#### Scenario: providerOptions 默认值合并 +- **WHEN** `defaults.llm.providerOptions` 和 target `llm.providerOptions` 同时配置同名 provider namespace +- **THEN** LLM checker SHALL 按 provider namespace 浅合并 providerOptions,并由 target namespace 覆盖 defaults 中同名 namespace + +#### Scenario: Anthropic Bearer token +- **WHEN** target 配置 `llm.provider: anthropic` 和非空 `llm.authToken` +- **THEN** LLM checker SHALL 将 `authToken` 映射到 Anthropic SDK 的 Bearer token 认证字段 + +#### Scenario: key 不隐式读取环境变量 +- **WHEN** target 未配置 `llm.key` +- **THEN** LLM checker SHALL 将 SDK provider 的 api key 设置为空字符串,而不是隐式读取 SDK 默认环境变量 + +### Requirement: LLM HTTP Metadata 与 TLS +LLM checker SHALL 通过 AI SDK provider 的 custom fetch 注入 observing fetch。observing fetch SHALL 调用 Bun `fetch`,在不消费 response body 的前提下记录 HTTP status、statusText 和 headers。`llm.ignoreSSL: true` 时,observing fetch SHALL 仅对当前 target 的 provider 请求使用 Bun `tls.rejectUnauthorized=false`。 + +#### Scenario: 捕获 HTTP metadata +- **WHEN** AI SDK provider 发起模型 HTTP 请求并收到响应 +- **THEN** observing fetch SHALL 记录 status code 和响应 headers,供 `expect.status` 与 `expect.headers` 使用 + +#### Scenario: 不消费响应体 +- **WHEN** observing fetch 捕获 HTTP metadata +- **THEN** observing fetch SHALL 返回原始 response 给 AI SDK,不提前读取或克隆消费 body + +#### Scenario: 忽略证书校验 +- **WHEN** target 配置 `llm.ignoreSSL: true` +- **THEN** observing fetch SHALL 对当前 target 的 provider 请求设置 `tls.rejectUnauthorized=false` + +### Requirement: LLM Observation +LLM checker SHALL 在 SDK 调用结果和 expect 断言之间构建 `LlmCheckObservation`。observation SHALL 包含 provider、model、mode、outputText、finishReason、rawFinishReason、usage、stream、http 和 warnings 中可观测的字段。`mode: http` 的 `outputText` SHALL 来自 `generateText.text`。`mode: stream` 的 `outputText` SHALL 来自 `fullStream` 中 `text-delta` 的原始文本聚合。 + +#### Scenario: 非流式 observation +- **WHEN** `generateText` 调用成功 +- **THEN** LLM checker SHALL 从 SDK result 中提取 outputText、finishReason、rawFinishReason、usage、response headers 和 HTTP metadata + +#### Scenario: 流式 observation +- **WHEN** `streamText` 调用成功且 stream 正常完成 +- **THEN** LLM checker SHALL 从 `fullStream` 聚合 outputText,并记录 stream.completed、firstTokenMs、finishReason、rawFinishReason、usage 和 HTTP metadata + +#### Scenario: APICallError observation +- **WHEN** AI SDK 抛出带 statusCode 或 responseHeaders 的 `APICallError` +- **THEN** LLM checker SHALL 构建包含可用 HTTP metadata 的 observation,并继续执行可执行的 status、headers 和 duration 断言 + +#### Scenario: 无 HTTP metadata 的 SDK 错误 +- **WHEN** AI SDK 抛出不带 statusCode 和 responseHeaders 的错误 +- **THEN** LLM checker SHALL 返回 `phase: "request"` 的 error failure + +### Requirement: LLM Expect 断言 +LLM checker SHALL 支持 `expect.status`、`expect.headers`、`expect.output`、`expect.finishReason`、`expect.rawFinishReason`、`expect.usage.inputTokens`、`expect.usage.outputTokens`、`expect.usage.totalTokens`、`expect.stream.completed`、`expect.stream.firstTokenMs` 和 `expect.maxDurationMs`。`expect.status` 和 `expect.headers` 的运行期断言 SHALL 复用 `src/server/checker/runner/http/expect.ts` 中的 `checkStatus` 和 `checkHeaders` 函数。LLM checker MUST 按固定顺序快速失败,非流式顺序为 status、headers、output、finishReason、rawFinishReason、usage、duration;流式顺序为 status、headers、stream.completed、stream.firstTokenMs、output、finishReason、rawFinishReason、usage、duration。 + +#### Scenario: 默认 status 断言 +- **WHEN** LLM target 未配置 `expect.status` +- **THEN** LLM checker SHALL 使用默认 `status: [200]` 语义 + +#### Scenario: expect headers 通过 +- **WHEN** observing fetch 捕获的响应 headers 满足 `expect.headers` 配置 +- **THEN** LLM checker SHALL 判定 headers 断言通过 + +#### Scenario: expect headers 不匹配 +- **WHEN** observing fetch 捕获的响应 headers 不满足 `expect.headers` 中的某项配置 +- **THEN** LLM checker SHALL 返回 `phase: "headers"` 的 mismatch failure + +#### Scenario: 全部 expect 通过 +- **WHEN** LLM checker 构建出的 observation 满足所有已配置 expect +- **THEN** 检查结果 SHALL 为 `matched=true` 且 `failure=null` + +#### Scenario: 首个 expect 失败 +- **WHEN** 多个 LLM expect 中某个较早顺序的断言失败 +- **THEN** LLM checker SHALL 立即返回该断言对应的 mismatch failure,不继续执行后续断言 + +#### Scenario: 期望认证失败状态 +- **WHEN** AI SDK 抛出带 HTTP status 401 的 `APICallError`,且 target 仅配置 `expect.status: [401]` +- **THEN** LLM checker SHALL 判定本次检查为 `matched=true` + +#### Scenario: APICallError 缺失模型输出 +- **WHEN** AI SDK 抛出带 HTTP status 的 `APICallError`,且 target 同时配置需要模型结果的 `expect.output` +- **THEN** LLM checker SHALL 因 `outputText` 缺失返回 `phase: "output"` 的 mismatch failure + +### Requirement: LLM Output 规则 +LLM checker SHALL 支持 `expect.output` 有序规则数组,每个规则 MUST 仅包含 `equals`、`contains`、`regex` 或 `json` 中的一种。`equals` SHALL 对原始输出字符串做严格相等比较。`contains` SHALL 判断原始输出是否包含子串。`regex` SHALL 对原始输出执行正则匹配。`json` SHALL 将原始输出解析为 JSON,并用现有 JSONPath 子集和 operator 校验提取值。 + +#### Scenario: 原始输出严格相等 +- **WHEN** `outputText` 为 `"OK\n"` 且 target 配置 `expect.output: [{ equals: "OK" }]` +- **THEN** LLM checker SHALL 判定 output 断言失败,因为 equals 不自动 trim + +#### Scenario: output contains 通过 +- **WHEN** `outputText` 包含配置的子串 +- **THEN** LLM checker SHALL 判定该 output contains 规则通过 + +#### Scenario: output regex 通过 +- **WHEN** `outputText` 匹配配置的合法正则 +- **THEN** LLM checker SHALL 判定该 output regex 规则通过 + +#### Scenario: output JSONPath 通过 +- **WHEN** `outputText` 是 JSON 字符串且 JSONPath 提取值满足 operator +- **THEN** LLM checker SHALL 判定该 output json 规则通过 + +#### Scenario: output 规则按顺序快速失败 +- **WHEN** `expect.output` 包含多个规则且第一条规则失败 +- **THEN** LLM checker SHALL 返回第一条失败规则的 mismatch failure,不继续校验后续 output 规则 + +### Requirement: LLM Stream 断言 +LLM checker SHALL 仅允许 `mode: stream` 使用 `expect.stream`。`expect.stream.completed` 未配置时,LLM checker SHALL 在 stream observation 路径使用默认 `true` 语义。`expect.stream.firstTokenMs` SHALL 仅统计第一个非空 `text-delta` 事件耗时,不统计 reasoning、tool call 或 source 事件。 + +#### Scenario: stream completed 默认值 +- **WHEN** target 配置 `llm.mode: stream` 且未配置 `expect.stream.completed` +- **THEN** LLM checker SHALL 要求 SDK stream 正常完成 + +#### Scenario: stream error +- **WHEN** `fullStream` 产生 error part +- **THEN** LLM checker SHALL 返回 `phase: "stream"` 的 failure + +#### Scenario: firstTokenMs 达标 +- **WHEN** target 配置 `expect.stream.firstTokenMs` 且首个非空 text delta 耗时满足 operator +- **THEN** LLM checker SHALL 判定 firstTokenMs 断言通过 + +#### Scenario: firstTokenMs 缺失 +- **WHEN** target 配置 `expect.stream.firstTokenMs` 但 stream 未产生非空 text delta +- **THEN** LLM checker SHALL 返回 `phase: "stream"` 的 mismatch failure + +#### Scenario: APICallError 不被默认 completed 阻断 +- **WHEN** `mode: stream` 的 SDK 调用在 stream 启动前抛出带 HTTP status 的 `APICallError` +- **THEN** 默认 `stream.completed=true` SHALL NOT 阻断基于 status 和 headers 的 APICallError 状态探测 + +### Requirement: LLM Failure Phase 与状态摘要 +LLM checker SHALL 使用 `request`、`status`、`headers`、`stream`、`output`、`finishReason`、`rawFinishReason`、`usage`、`duration` 作为第一版 failure phase。成功结果的 `statusDetail` SHALL 简短描述 provider、mode、HTTP status、finish reason、raw finish reason、first token、输出长度和 token usage 中可用的信息。`statusDetail` MUST NOT 写入完整 prompt、完整输出或 key。 + +#### Scenario: request failure +- **WHEN** 模型请求因网络错误、认证调用异常、AbortSignal 或无 HTTP metadata 的 SDK 错误失败 +- **THEN** LLM checker SHALL 返回 `phase: "request"` 的 error failure + +#### Scenario: output mismatch failure +- **WHEN** 模型输出不满足 `expect.output` +- **THEN** LLM checker SHALL 返回 `phase: "output"` 的 mismatch failure + +#### Scenario: 非流式成功摘要 +- **WHEN** `provider: openai` 的非流式检查成功 +- **THEN** `statusDetail` SHALL 使用类似 `LLM openai http 200 finish=stop, output=2 chars, usage=12/2 tokens` 的简短格式 + +#### Scenario: 流式成功摘要 +- **WHEN** `provider: anthropic` 的流式检查成功且存在 raw finish reason +- **THEN** `statusDetail` SHALL 使用类似 `LLM anthropic stream 200 finish=stop raw=end_turn, firstToken=624ms, output=2 chars` 的简短格式 + +#### Scenario: serialize 展示文本 +- **WHEN** store 同步 LLM target +- **THEN** LLM checker `serialize()` SHALL 返回类似 `openai:gpt-4o-mini @ https://api.openai.com/v1` 的 target 展示文本和 resolved config JSON + +### Requirement: LLM Checker 测试策略 +LLM checker 的自动化测试 MUST 不访问真实外部模型服务。测试 SHALL 使用本地 mock HTTP/SSE 服务模拟 OpenAI Chat Completions、OpenAI Responses 和 Anthropic Messages 的成功、错误和流式响应。测试 SHALL 覆盖 schema、语义校验、defaults 合并、变量替换、provider factory、observation、expect、execute、registry 注册、配置加载和 JSON Schema 导出。 + +#### Scenario: 本地 mock provider 测试成功路径 +- **WHEN** 测试运行 LLM checker 的 OpenAI、OpenAI Responses 和 Anthropic 成功路径 +- **THEN** 测试 SHALL 使用本地 mock 服务返回 provider 响应,不依赖外部网络或真实 API key + +#### Scenario: 本地 mock provider 测试错误路径 +- **WHEN** 测试运行 401、429、500、超时、stream error、stream abort、缺 usage 或无文本输出路径 +- **THEN** 测试 SHALL 断言 LLM checker 返回符合 spec 的 matched、failure phase、actual 和 statusDetail + +#### Scenario: 质量检查覆盖 LLM checker +- **WHEN** 实现完成后执行质量检查 +- **THEN** `bun run schema:check`、`bun run check` SHALL 通过 diff --git a/openspec/specs/probe-config/spec.md b/openspec/specs/probe-config/spec.md index 6cfa078..40ee57a 100644 --- a/openspec/specs/probe-config/spec.md +++ b/openspec/specs/probe-config/spec.md @@ -5,9 +5,9 @@ ## Requirements ### Requirement: YAML 配置文件格式 -系统 SHALL 支持通过 YAML 配置文件定义全部运行参数,包括 server 配置、runtime 配置、可选的 variables 段、checker 默认值和 typed target 列表(含可选 group 字段)。target MUST 使用 `id` 字段作为唯一标识符,MUST 使用 `type` 字段声明 checker 类型,SHALL 支持可选的 `name` 字段作为展示名称元信息,SHALL 支持可选的 `description` 字段作为目标说明。`name` 和 `description` 均 SHALL 允许省略或显式配置为 `null`;省略或显式 null 时解析结果 SHALL 保留为 null。HTTP 领域字段 MUST 放在 `http` 分组,cmd 领域字段 MUST 放在 `cmd` 分组,db 领域字段 MUST 放在 `db` 分组,tcp 领域字段 MUST 放在 `tcp` 分组,ping 领域字段 MUST 放在 `ping` 分组,udp 领域字段 MUST 放在 `udp` 分组。HTTP target 的 `http` 分组 SHALL 支持可选的 `ignoreSSL`(布尔值)和 `maxRedirects`(非负整数)字段。Db target 的 `db` 分组 SHALL 支持 `url`(必填)和 `query`(可选)字段。Tcp target 的 `tcp` 分组 SHALL 支持 `host`(必填)、`port`(必填)、`readBanner`(可选)、`bannerReadTimeout`(可选)和 `maxBannerBytes`(可选)字段。Ping target 的 `ping` 分组 SHALL 支持 `host`(必填)、`count`(可选,默认 3)和 `packetSize`(可选,默认 56)字段。Udp target 的 `udp` 分组 SHALL 支持 `host`(必填)、`port`(必填)、`payload`(可选,默认空字符串)、`encoding`(可选,默认 `text`)、`responseEncoding`(可选,默认 `text`)和 `maxResponseBytes`(可选,默认 4096)字段。 +系统 SHALL 支持通过 YAML 配置文件定义全部运行参数,包括 server 配置、runtime 配置、可选的 variables 段、checker 默认值和 typed target 列表(含可选 group 字段)。target MUST 使用 `id` 字段作为唯一标识符,MUST 使用 `type` 字段声明 checker 类型,SHALL 支持可选的 `name` 字段作为展示名称元信息,SHALL 支持可选的 `description` 字段作为目标说明。`name` 和 `description` 均 SHALL 允许省略或显式配置为 `null`;省略或显式 null 时解析结果 SHALL 保留为 null。HTTP 领域字段 MUST 放在 `http` 分组,cmd 领域字段 MUST 放在 `cmd` 分组,db 领域字段 MUST 放在 `db` 分组,tcp 领域字段 MUST 放在 `tcp` 分组,ping 领域字段 MUST 放在 `ping` 分组,udp 领域字段 MUST 放在 `udp` 分组,LLM 领域字段 MUST 放在 `llm` 分组。HTTP target 的 `http` 分组 SHALL 支持可选的 `ignoreSSL`(布尔值)和 `maxRedirects`(非负整数)字段。Db target 的 `db` 分组 SHALL 支持 `url`(必填)和 `query`(可选)字段。Tcp target 的 `tcp` 分组 SHALL 支持 `host`(必填)、`port`(必填)、`readBanner`(可选)、`bannerReadTimeout`(可选)和 `maxBannerBytes`(可选)字段。Ping target 的 `ping` 分组 SHALL 支持 `host`(必填)、`count`(可选,默认 3)和 `packetSize`(可选,默认 56)字段。Udp target 的 `udp` 分组 SHALL 支持 `host`(必填)、`port`(必填)、`payload`(可选,默认空字符串)、`encoding`(可选,默认 `text`)、`responseEncoding`(可选,默认 `text`)和 `maxResponseBytes`(可选,默认 4096)字段。LLM target 的 `llm` 分组 SHALL 支持 `provider`(必填)、`url`(必填)、`model`(必填)、`prompt`(必填)、`mode`(可选,默认 `http`)、`key`(可选,默认空字符串)、`authToken`(可选)、`headers`(可选)、`ignoreSSL`(可选,默认 `false`)、`options`(可选)和 `providerOptions`(可选)字段。 -`defaults.http` 分组 SHALL 仅支持 `headers`(可选)和 `maxBodyBytes`(可选)字段。`defaults.http` 分组 MUST NOT 支持 `method` 字段。`defaults.tcp` 分组 SHALL 仅支持 `bannerReadTimeout`(可选)和 `maxBannerBytes`(可选)字段。`defaults.udp` 分组 SHALL 仅支持 `encoding`(可选)、`responseEncoding`(可选)和 `maxResponseBytes`(可选)字段。 +`defaults.http` 分组 SHALL 仅支持 `headers`(可选)和 `maxBodyBytes`(可选)字段。`defaults.http` 分组 MUST NOT 支持 `method` 字段。`defaults.tcp` 分组 SHALL 仅支持 `bannerReadTimeout`(可选)和 `maxBannerBytes`(可选)字段。`defaults.udp` 分组 SHALL 仅支持 `encoding`(可选)、`responseEncoding`(可选)和 `maxResponseBytes`(可选)字段。`defaults.llm` 分组 SHALL 仅支持 `mode`(可选)、`headers`(可选)、`ignoreSSL`(可选)、`options`(可选)和 `providerOptions`(可选)字段。 #### Scenario: 完整配置文件解析 - **WHEN** 系统启动并读取包含 server、runtime、variables、defaults、targets(含 id、group 字段)的 YAML 配置文件 @@ -69,6 +69,14 @@ - **WHEN** YAML 配置中 defaults.udp 设置 `encoding`、`responseEncoding` 和 `maxResponseBytes` - **THEN** 未显式覆盖对应字段的 udp target SHALL 使用 defaults.udp 中的值 +#### Scenario: 最简 llm 配置文件解析 +- **WHEN** 系统读取只包含一个 `type: llm` target(含 `id`、`llm.provider`、`llm.url`、`llm.model` 和 `llm.prompt`)的 YAML 配置文件 +- **THEN** 系统 SHALL 使用内置默认值填充未指定字段(interval=30s, timeout=10s, group="default", llm.mode="http", llm.key="", llm.ignoreSSL=false, llm.options.maxOutputTokens=16, llm.options.temperature=0),并保留 name=null、description=null + +#### Scenario: defaults.llm 配置默认值 +- **WHEN** YAML 配置中 defaults.llm 设置 `mode`、`headers`、`ignoreSSL`、`options` 或 `providerOptions` +- **THEN** 未显式覆盖对应字段的 llm target SHALL 使用 defaults.llm 中的值 + ### Requirement: CLI 参数 系统 SHALL 通过单一命令行参数接受 YAML 配置文件路径。 @@ -293,7 +301,7 @@ - **THEN** 系统 SHALL 调用 `Bun.YAML.parse()` 将内容解析为配置对象 ### Requirement: expect 配置增强 -系统 SHALL 支持 typed target 的领域专用 expect 配置,包括 HTTP 的 `status`(支持精确数字和范围模式)、`headers`、`body`,cmd 的 `exitCode`、`stdout`、`stderr`,tcp 的 `connected`、`banner`,ping 的 `alive`、`maxPacketLoss`、`maxAvgLatencyMs`、`maxMaxLatencyMs`,以及 udp 的 `responded`、`response`、`responseSize`、`sourceHost`、`sourcePort` 和 `maxDurationMs`。内容类 expect MUST 使用数组表达配置顺序。 +系统 SHALL 支持 typed target 的领域专用 expect 配置,包括 HTTP 的 `status`(支持精确数字和范围模式)、`headers`、`body`,cmd 的 `exitCode`、`stdout`、`stderr`,tcp 的 `connected`、`banner`,ping 的 `alive`、`maxPacketLoss`、`maxAvgLatencyMs`、`maxMaxLatencyMs`,udp 的 `responded`、`response`、`responseSize`、`sourceHost`、`sourcePort` 和 `maxDurationMs`,以及 llm 的 `status`、`headers`、`output`、`finishReason`、`rawFinishReason`、`usage`、`stream` 和 `maxDurationMs`。内容类 expect MUST 使用数组表达配置顺序。 #### Scenario: 解析 HTTP expect 配置 - **WHEN** YAML 配置文件中 HTTP target 的 expect 包含 status、headers、body 规则数组及内部方法 @@ -343,6 +351,14 @@ - **WHEN** udp target 未配置任何 expect 规则 - **THEN** 系统 SHALL 正常处理,expect 字段为 undefined,执行时使用默认 responded=true 语义 +#### Scenario: 解析 llm expect 配置 +- **WHEN** YAML 配置文件中 llm target 的 expect 包含 status、headers、output、finishReason、rawFinishReason、usage、stream 和 maxDurationMs +- **THEN** 系统 SHALL 正确解析并存储为 llm target 的 expect 字段,并保留 output 规则数组顺序 + +#### Scenario: 不配置 llm expect +- **WHEN** llm target 未配置任何 expect 规则 +- **THEN** 系统 SHALL 正常处理,expect 字段为 undefined,执行时使用默认 status=[200] 语义 + ### Requirement: 数据保留配置字段 配置 schema 的 `runtime` 段 SHALL 支持 `retention` 字段,类型为字符串,格式为 `<数字><单位>`(单位:`d` 天、`h` 小时、`m` 分钟),用于指定历史数据保留时长。 @@ -469,3 +485,82 @@ #### Scenario: defaults.tcp 未知字段失败 - **WHEN** YAML 中 defaults.tcp 包含 `host` 或其他非默认字段 - **THEN** 系统 SHALL 以配置错误退出,提示 defaults.tcp 包含未知字段 + +### Requirement: LLM 配置校验 +系统 SHALL 在启动期对 llm checker 的配置契约和语义执行严格校验。LLM target 的 `llm` 分组 SHALL 只允许 `provider`、`url`、`model`、`prompt`、`mode`、`key`、`authToken`、`headers`、`ignoreSSL`、`options` 和 `providerOptions` 字段。`defaults.llm` 分组 SHALL 只允许 `mode`、`headers`、`ignoreSSL`、`options` 和 `providerOptions` 字段。LLM expect SHALL 只允许 `status`、`headers`、`output`、`finishReason`、`rawFinishReason`、`usage`、`stream` 和 `maxDurationMs` 字段。未知字段、非法 provider、非法 URL、非法 mode、非法认证组合、非法 options、非法 output 规则和 `mode: http` 下配置 `expect.stream` MUST 导致启动期配置错误。 + +#### Scenario: llm provider 非法 +- **WHEN** YAML 中 llm target 的 `llm.provider` 不是 `openai`、`openai-responses` 或 `anthropic` +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.provider 不合法 + +#### Scenario: llm url 非法 +- **WHEN** YAML 中 llm target 的 `llm.url` 不是 `http://` 或 `https://` URL +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.url 格式不合法 + +#### Scenario: llm model 为空 +- **WHEN** YAML 中 llm target 的 `llm.model` 不是非空字符串 +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.model 必须为非空字符串 + +#### Scenario: llm prompt 为空 +- **WHEN** YAML 中 llm target 的 `llm.prompt` 不是非空字符串 +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.prompt 必须为非空字符串 + +#### Scenario: llm mode 非法 +- **WHEN** YAML 中 llm target 或 defaults.llm 的 `mode` 不是 `http` 或 `stream` +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.mode 不合法 + +#### Scenario: llm headers 类型非法 +- **WHEN** YAML 中 llm target 或 defaults.llm 的 `headers` 不是对象,或任一 header 值不是字符串 +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.headers 格式错误 + +#### Scenario: llm ignoreSSL 类型非法 +- **WHEN** YAML 中 llm target 或 defaults.llm 的 `ignoreSSL` 不是布尔值 +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.ignoreSSL 必须为布尔值 + +#### Scenario: llm authToken provider 非法 +- **WHEN** YAML 中 `provider: openai` 或 `provider: openai-responses` 的 llm target 配置 `authToken` +- **THEN** 系统 SHALL 以配置错误退出,提示 authToken 仅支持 anthropic provider + +#### Scenario: Anthropic key 与 authToken 冲突 +- **WHEN** YAML 中 `provider: anthropic` 的 llm target 同时配置非空 `key` 和非空 `authToken` +- **THEN** 系统 SHALL 以配置错误退出,提示 key 与 authToken 不能同时配置 + +#### Scenario: llm options 非法 +- **WHEN** YAML 中 llm target 或 defaults.llm 的 `options.maxOutputTokens` 不是正整数,`options.temperature`/`topP`/`topK`/`presencePenalty`/`frequencyPenalty`/`seed` 类型不合法,或 `options.stopSequences` 不是字符串数组 +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.options 格式错误 + +#### Scenario: llm providerOptions 非法 +- **WHEN** YAML 中 llm target 或 defaults.llm 的 `providerOptions` 不是 JSON object +- **THEN** 系统 SHALL 以配置错误退出,提示 llm.providerOptions 格式错误 + +#### Scenario: llm 禁止字段失败 +- **WHEN** YAML 中 llm target 配置 `api`、`providerName`、`baseURL`、`apiKey`、`messages`、`maxRetries`、`request`、`maxBodyBytes` 或 `maxStreamBytes` +- **THEN** 系统 SHALL 以配置错误退出,提示 llm 分组包含未知字段 + +#### Scenario: llm output 规则缺少支持字段 +- **WHEN** YAML 中 llm target 的 `expect.output` 数组项未包含 equals、contains、regex、json 任一支持字段 +- **THEN** 系统 SHALL 以配置错误退出,提示 output rule 缺少支持的规则类型 + +#### Scenario: llm output 规则同时配置多个支持字段 +- **WHEN** YAML 中 llm target 的同一条 output rule 同时包含 equals、contains、regex、json 中的多个支持字段 +- **THEN** 系统 SHALL 以配置错误退出,提示每条 output rule 只能配置一种规则类型 + +#### Scenario: llm output regex 非法 +- **WHEN** YAML 中 llm target 的 output regex 规则不是字符串、不是可编译正则表达式或存在 ReDoS 风险 +- **THEN** 系统 SHALL 以配置错误退出,提示该 output regex 不合法 + +#### Scenario: llm output json path 非法 +- **WHEN** YAML 中 llm target 的 output json 规则缺少 path,或 path 不符合系统支持的 JSONPath 子集 +- **THEN** 系统 SHALL 以配置错误退出,提示该 output json path 不合法 + +#### Scenario: llm expect usage 非法 +- **WHEN** YAML 中 llm target 的 `expect.usage.inputTokens`、`expect.usage.outputTokens` 或 `expect.usage.totalTokens` 不是合法 operator 对象 +- **THEN** 系统 SHALL 以配置错误退出,提示 expect.usage 格式错误 + +#### Scenario: llm expect stream 仅允许 stream mode +- **WHEN** YAML 中 llm target 配置 `llm.mode: http` 且配置 `expect.stream` +- **THEN** 系统 SHALL 以配置错误退出,提示 expect.stream 仅支持 stream mode + +#### Scenario: llm expect stream firstTokenMs 非法 +- **WHEN** YAML 中 llm target 的 `expect.stream.firstTokenMs` 不是合法 operator 对象 +- **THEN** 系统 SHALL 以配置错误退出,提示 expect.stream.firstTokenMs 格式错误 diff --git a/package.json b/package.json index bd7e4d9..2e94a0b 100644 --- a/package.json +++ b/package.json @@ -46,10 +46,13 @@ "vite": "^8.0.13" }, "dependencies": { + "@ai-sdk/anthropic": "^3", + "@ai-sdk/openai": "^3", "@number-flow/react": "^0.6.0", "@sinclair/typebox": "^0.34.49", "@tanstack/react-query": "^5.100.10", "@xmldom/xmldom": "^0.9.10", + "ai": "^6", "ajv": "^8.20.0", "cheerio": "^1.2.0", "es-toolkit": "^1.46.1", diff --git a/probe-config.schema.json b/probe-config.schema.json index e3fb2e9..687776c 100644 --- a/probe-config.schema.json +++ b/probe-config.schema.json @@ -137,6 +137,77 @@ ] } } + }, + "llm": { + "additionalProperties": false, + "type": "object", + "properties": { + "headers": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "ignoreSSL": { + "type": "boolean" + }, + "mode": { + "anyOf": [ + { + "const": "http", + "type": "string" + }, + { + "const": "stream", + "type": "string" + } + ] + }, + "options": { + "additionalProperties": false, + "type": "object", + "properties": { + "frequencyPenalty": { + "type": "number" + }, + "maxOutputTokens": { + "minimum": 1, + "type": "integer" + }, + "presencePenalty": { + "type": "number" + }, + "seed": { + "type": "number" + }, + "stopSequences": { + "type": "array", + "items": { + "type": "string" + } + }, + "temperature": { + "type": "number" + }, + "topK": { + "type": "number" + }, + "topP": { + "type": "number" + } + } + }, + "providerOptions": { + "type": "object", + "patternProperties": { + "^(.*)$": { + "additionalProperties": true, + "type": "object", + "properties": {} + } + } + } + } } } }, @@ -1650,6 +1721,558 @@ } } } + }, + { + "additionalProperties": false, + "type": "object", + "required": [ + "id", + "type", + "llm" + ], + "properties": { + "description": { + "anyOf": [ + { + "type": "null" + }, + { + "maxLength": 500, + "type": "string" + } + ] + }, + "expect": { + "additionalProperties": false, + "type": "object", + "properties": { + "finishReason": { + "type": "string" + }, + "headers": { + "additionalProperties": { + "anyOf": [ + { + "type": "string" + }, + { + "additionalProperties": false, + "minProperties": 1, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "items": {}, + "type": "array" + }, + { + "additionalProperties": {}, + "type": "object" + } + ] + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + } + ] + }, + "type": "object" + }, + "maxDurationMs": { + "minimum": 0, + "type": "number" + }, + "output": { + "type": "array", + "items": { + "additionalProperties": false, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "equals": { + "type": "string" + }, + "json": { + "additionalProperties": false, + "type": "object", + "required": [ + "path" + ], + "properties": { + "path": { + "type": "string" + }, + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "type": "number" + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + }, + "regex": { + "type": "string" + } + } + } + }, + "rawFinishReason": { + "type": "string" + }, + "status": { + "type": "array", + "items": { + "anyOf": [ + { + "maximum": 599, + "minimum": 100, + "type": "integer" + }, + { + "pattern": "^[1-5]xx$", + "type": "string" + } + ] + } + }, + "stream": { + "additionalProperties": false, + "type": "object", + "properties": { + "completed": { + "type": "boolean" + }, + "firstTokenMs": { + "additionalProperties": false, + "minProperties": 1, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "items": {}, + "type": "array" + }, + { + "additionalProperties": {}, + "type": "object" + } + ] + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + } + } + }, + "usage": { + "additionalProperties": false, + "type": "object", + "properties": { + "inputTokens": { + "additionalProperties": false, + "minProperties": 1, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "items": {}, + "type": "array" + }, + { + "additionalProperties": {}, + "type": "object" + } + ] + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + }, + "outputTokens": { + "additionalProperties": false, + "minProperties": 1, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "items": {}, + "type": "array" + }, + { + "additionalProperties": {}, + "type": "object" + } + ] + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + }, + "totalTokens": { + "additionalProperties": false, + "minProperties": 1, + "type": "object", + "properties": { + "contains": { + "type": "string" + }, + "empty": { + "type": "boolean" + }, + "equals": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "items": {}, + "type": "array" + }, + { + "additionalProperties": {}, + "type": "object" + } + ] + }, + "exists": { + "type": "boolean" + }, + "gt": { + "type": "number" + }, + "gte": { + "type": "number" + }, + "lt": { + "type": "number" + }, + "lte": { + "type": "number" + }, + "match": { + "type": "string" + } + } + } + } + } + } + }, + "group": { + "type": "string" + }, + "id": { + "maxLength": 30, + "minLength": 1, + "type": "string" + }, + "interval": { + "type": "string" + }, + "name": { + "anyOf": [ + { + "type": "null" + }, + { + "maxLength": 30, + "minLength": 1, + "type": "string" + } + ] + }, + "timeout": { + "type": "string" + }, + "type": { + "const": "llm", + "type": "string" + }, + "llm": { + "additionalProperties": false, + "type": "object", + "required": [ + "model", + "prompt", + "provider", + "url" + ], + "properties": { + "authToken": { + "type": "string" + }, + "headers": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "ignoreSSL": { + "type": "boolean" + }, + "key": { + "type": "string" + }, + "mode": { + "anyOf": [ + { + "const": "http", + "type": "string" + }, + { + "const": "stream", + "type": "string" + } + ] + }, + "model": { + "minLength": 1, + "type": "string" + }, + "options": { + "additionalProperties": false, + "type": "object", + "properties": { + "frequencyPenalty": { + "type": "number" + }, + "maxOutputTokens": { + "minimum": 1, + "type": "integer" + }, + "presencePenalty": { + "type": "number" + }, + "seed": { + "type": "number" + }, + "stopSequences": { + "type": "array", + "items": { + "type": "string" + } + }, + "temperature": { + "type": "number" + }, + "topK": { + "type": "number" + }, + "topP": { + "type": "number" + } + } + }, + "prompt": { + "minLength": 1, + "type": "string" + }, + "provider": { + "anyOf": [ + { + "const": "openai", + "type": "string" + }, + { + "const": "openai-responses", + "type": "string" + }, + { + "const": "anthropic", + "type": "string" + } + ] + }, + "providerOptions": { + "type": "object", + "patternProperties": { + "^(.*)$": { + "additionalProperties": true, + "type": "object", + "properties": {} + } + } + }, + "url": { + "minLength": 1, + "type": "string" + } + } + } + } } ] } diff --git a/probes.example.yaml b/probes.example.yaml index 0b5fa21..4c40e95 100644 --- a/probes.example.yaml +++ b/probes.example.yaml @@ -228,3 +228,41 @@ targets: payload: "<14>health check" expect: responded: false + + - id: "llm-openai-probe" + name: "OpenAI Chat Completions 健康检查" + type: llm + group: "AI 服务" + llm: + provider: openai + url: "https://api.openai.com/v1" + model: "gpt-4o-mini" + prompt: "Say OK" + key: "${OPENAI_API_KEY}" + expect: + status: + - 200 + finishReason: "stop" + output: + - contains: "OK" + + - id: "llm-anthropic-stream-probe" + name: "Anthropic 流式健康检查" + type: llm + group: "AI 服务" + llm: + provider: anthropic + url: "https://api.anthropic.com/v1" + model: "claude-3-5-haiku-20241022" + prompt: "Say OK" + key: "${ANTHROPIC_API_KEY}" + mode: stream + expect: + status: + - 200 + stream: + completed: true + firstTokenMs: + lte: 5000 + finishReason: "stop" + maxDurationMs: 15000 diff --git a/src/server/checker/runner/index.ts b/src/server/checker/runner/index.ts index 3508cea..2f49089 100644 --- a/src/server/checker/runner/index.ts +++ b/src/server/checker/runner/index.ts @@ -2,6 +2,7 @@ import { CommandChecker } from "./cmd"; import { DbChecker } from "./db"; import { HttpChecker } from "./http"; import { IcmpChecker } from "./icmp"; +import { LlmChecker } from "./llm"; import { CheckerRegistry } from "./registry"; import { TcpChecker } from "./tcp"; import { UdpChecker } from "./udp"; @@ -13,6 +14,7 @@ const checkers = [ new TcpChecker(), new IcmpChecker(), new UdpChecker(), + new LlmChecker(), ]; export function createDefaultCheckerRegistry(): CheckerRegistry { diff --git a/src/server/checker/runner/llm/execute.ts b/src/server/checker/runner/llm/execute.ts new file mode 100644 index 0000000..bd0ab9b --- /dev/null +++ b/src/server/checker/runner/llm/execute.ts @@ -0,0 +1,311 @@ +import type { JSONObject } from "@ai-sdk/provider"; + +import { APICallError, generateText, streamText } from "ai"; +import { isError } from "es-toolkit"; + +import type { CheckResult, RawTargetConfig } from "../../types"; +import type { CheckerContext, CheckerDefinition, CheckerValidationInput, ResolveContext } from "../types"; +import type { LlmCheckObservation, LlmExpectConfig, LlmTargetConfig, ResolvedLlmTarget } from "./types"; + +import { checkDuration } from "../../expect/duration"; +import { errorFailure } from "../../expect/failure"; +import { runExpects } from "./expect"; +import { + buildObservationFromApiCallError, + buildObservationFromGenerateText, + buildObservationFromStreamText, +} from "./observation"; +import { createProviderModel } from "./provider"; +import { llmCheckerSchemas } from "./schema"; +import { validateLlmConfig } from "./validate"; + +export class LlmChecker implements CheckerDefinition { + readonly configKey = "llm"; + readonly schemas = llmCheckerSchemas; + readonly type = "llm"; + + async execute(t: ResolvedLlmTarget, ctx: CheckerContext): Promise { + const timestamp = new Date().toISOString(); + const expect = t.expect; + const start = performance.now(); + + try { + const { http: httpMeta, model } = createProviderModel(t.llm); + + if (t.llm.mode === "stream") { + return await this.executeStream(t, model, httpMeta, expect, ctx, timestamp, start); + } + + return await this.executeHttp(t, model, httpMeta, expect, ctx, timestamp, start); + } catch (error) { + const durationMs = Math.round(performance.now() - start); + + if (error instanceof APICallError) { + const observation = buildObservationFromApiCallError(error, t.llm.provider, t.llm.model, t.llm.mode); + + if (observation.http === null) { + return { + durationMs, + failure: errorFailure("request", "request", error.message), + matched: false, + statusDetail: null, + targetId: t.id, + timestamp, + }; + } + + const durationResult = checkDuration(durationMs, expect?.maxDurationMs); + const expectResult = runExpects(observation, expect); + const failure = expectResult.failure ?? durationResult.failure; + + return { + durationMs, + failure, + matched: failure === null, + statusDetail: buildStatusDetail(observation), + targetId: t.id, + timestamp, + }; + } + + const isTimeout = ctx.signal.aborted || (error instanceof DOMException && error.name === "AbortError"); + return { + durationMs, + failure: errorFailure( + "request", + "request", + isTimeout ? `请求超时 (${t.timeoutMs}ms)` : isError(error) ? error.message : String(error), + ), + matched: false, + statusDetail: null, + targetId: t.id, + timestamp, + }; + } + } + + resolve(target: RawTargetConfig, context: ResolveContext): ResolvedLlmTarget { + const t = target as RawTargetConfig & { llm: LlmTargetConfig; type: "llm" }; + const llmDefaults = context.defaults["llm"] as + | undefined + | { + headers?: Record; + ignoreSSL?: boolean; + mode?: string; + options?: Record; + providerOptions?: Record>; + }; + + const resolvedConfig = { + authToken: t.llm.authToken, + headers: { ...(llmDefaults?.headers ?? {}), ...(t.llm.headers ?? {}) }, + ignoreSSL: t.llm.ignoreSSL ?? llmDefaults?.ignoreSSL ?? false, + key: t.llm.key ?? "", + mode: (t.llm.mode ?? llmDefaults?.mode ?? "http") as "http" | "stream", + model: t.llm.model, + options: { + frequencyPenalty: + t.llm.options?.frequencyPenalty ?? (llmDefaults?.options?.["frequencyPenalty"] as number | undefined), + maxOutputTokens: + t.llm.options?.maxOutputTokens ?? (llmDefaults?.options?.["maxOutputTokens"] as number | undefined) ?? 16, + presencePenalty: + t.llm.options?.presencePenalty ?? (llmDefaults?.options?.["presencePenalty"] as number | undefined), + seed: t.llm.options?.seed ?? (llmDefaults?.options?.["seed"] as number | undefined), + stopSequences: + t.llm.options?.stopSequences ?? (llmDefaults?.options?.["stopSequences"] as string[] | undefined), + temperature: t.llm.options?.temperature ?? (llmDefaults?.options?.["temperature"] as number | undefined) ?? 0, + topK: t.llm.options?.topK ?? (llmDefaults?.options?.["topK"] as number | undefined), + topP: t.llm.options?.topP ?? (llmDefaults?.options?.["topP"] as number | undefined), + }, + prompt: t.llm.prompt, + provider: t.llm.provider, + providerOptions: { + ...((llmDefaults?.providerOptions ?? {}) as Record), + ...(t.llm.providerOptions ?? {}), + }, + url: t.llm.url, + }; + + return { + description: (target.description as null | string) ?? null, + expect: target.expect as LlmExpectConfig | undefined, + group: target.group ?? "default", + id: t.id, + intervalMs: context.defaultIntervalMs, + llm: resolvedConfig, + name: (target.name as null | string) ?? null, + timeoutMs: context.defaultTimeoutMs, + type: "llm", + } satisfies ResolvedLlmTarget; + } + + serialize(t: ResolvedLlmTarget): { config: string; target: string } { + return { + config: JSON.stringify({ + headers: t.llm.headers, + ignoreSSL: t.llm.ignoreSSL, + key: t.llm.key ? "***" : "", + mode: t.llm.mode, + model: t.llm.model, + options: t.llm.options, + prompt: t.llm.prompt, + provider: t.llm.provider, + providerOptions: t.llm.providerOptions, + url: t.llm.url, + }), + target: `${t.llm.provider}:${t.llm.model} @ ${t.llm.url}`, + }; + } + + validate(input: CheckerValidationInput) { + return validateLlmConfig(input); + } + + private async executeHttp( + t: ResolvedLlmTarget, + model: ReturnType["model"], + httpMeta: null | { headers: Record; status: number; statusText: string }, + expect: LlmExpectConfig | undefined, + ctx: CheckerContext, + timestamp: string, + start: number, + ): Promise { + const result = await generateText({ + abortSignal: ctx.signal, + maxRetries: 0, + model, + prompt: t.llm.prompt, + providerOptions: t.llm.providerOptions, + ...buildSdkOptions(t.llm), + }); + + const respHeaders = result.response?.headers; + const http = httpMeta ?? { + headers: respHeaders ? Object.fromEntries(Object.entries(respHeaders)) : {}, + status: 200, + statusText: "", + }; + + const observation = buildObservationFromGenerateText( + t.llm.provider, + t.llm.model, + t.llm.mode, + { + finishReason: result.finishReason, + rawFinishReason: result.rawFinishReason, + text: result.text, + usage: { + inputTokens: result.usage.inputTokens ?? 0, + outputTokens: result.usage.outputTokens ?? 0, + totalTokens: result.usage.totalTokens, + }, + warnings: result.warnings?.map((w) => + w.type === "unsupported" + ? `unsupported: ${w.feature}` + : ((w as Record)["message"] ?? JSON.stringify(w)), + ), + }, + http, + ); + + const durationMs = Math.round(performance.now() - start); + const durationResult = checkDuration(durationMs, expect?.maxDurationMs); + const expectResult = runExpects(observation, expect); + const failure = expectResult.failure ?? durationResult.failure; + + return { + durationMs, + failure, + matched: failure === null, + statusDetail: buildStatusDetail(observation), + targetId: t.id, + timestamp, + }; + } + + private async executeStream( + t: ResolvedLlmTarget, + model: ReturnType["model"], + httpMeta: null | { headers: Record; status: number; statusText: string }, + expect: LlmExpectConfig | undefined, + ctx: CheckerContext, + timestamp: string, + start: number, + ): Promise { + const stream = streamText({ + abortSignal: ctx.signal, + maxRetries: 0, + model, + prompt: t.llm.prompt, + providerOptions: t.llm.providerOptions, + ...buildSdkOptions(t.llm), + }); + + const observation = await buildObservationFromStreamText( + t.llm.provider, + t.llm.model, + t.llm.mode, + stream.fullStream, + httpMeta, + start, + ); + + const durationMs = Math.round(performance.now() - start); + const durationResult = checkDuration(durationMs, expect?.maxDurationMs); + const expectResult = runExpects(observation, expect); + const failure = expectResult.failure ?? durationResult.failure; + + return { + durationMs, + failure, + matched: failure === null, + statusDetail: buildStatusDetail(observation), + targetId: t.id, + timestamp, + }; + } +} + +function buildSdkOptions(config: ResolvedLlmTarget["llm"]): Record { + const options: Record = {}; + const opts = config.options; + if (opts.maxOutputTokens !== undefined) options["maxOutputTokens"] = opts.maxOutputTokens; + if (opts.temperature !== undefined) options["temperature"] = opts.temperature; + if (opts.topP !== undefined) options["topP"] = opts.topP; + if (opts.topK !== undefined) options["topK"] = opts.topK; + if (opts.presencePenalty !== undefined) options["presencePenalty"] = opts.presencePenalty; + if (opts.frequencyPenalty !== undefined) options["frequencyPenalty"] = opts.frequencyPenalty; + if (opts.stopSequences !== undefined) options["stopSequences"] = opts.stopSequences; + if (opts.seed !== undefined) options["seed"] = opts.seed; + return options; +} + +function buildStatusDetail(observation: LlmCheckObservation): string { + const parts: string[] = [`LLM ${observation.provider} ${observation.mode}`]; + + if (observation.http) { + parts.push(String(observation.http.status)); + } + + if (observation.finishReason) { + parts.push(`finish=${observation.finishReason}`); + } + + if (observation.rawFinishReason) { + parts.push(`raw=${observation.rawFinishReason}`); + } + + if (observation.stream?.firstTokenMs != null) { + parts.push(`firstToken=${observation.stream.firstTokenMs}ms`); + } + + if (observation.outputText !== null) { + parts.push(`output=${observation.outputText.length} chars`); + } + + if (observation.usage) { + parts.push(`usage=${observation.usage.inputTokens}/${observation.usage.outputTokens} tokens`); + } + + return parts.join(", "); +} diff --git a/src/server/checker/runner/llm/expect.ts b/src/server/checker/runner/llm/expect.ts new file mode 100644 index 0000000..2c23522 --- /dev/null +++ b/src/server/checker/runner/llm/expect.ts @@ -0,0 +1,168 @@ +import type { ExpectResult } from "../../expect/types"; +import type { LlmCheckObservation, LlmExpectConfig } from "./types"; + +import { checkDuration } from "../../expect/duration"; +import { mismatchFailure } from "../../expect/failure"; +import { applyOperator } from "../../expect/operator"; +import { checkHeaders, checkStatus } from "../http/expect"; +import { checkOutputRules } from "./output"; + +export function checkStreamExpect(observation: LlmCheckObservation, expect: LlmExpectConfig): ExpectResult { + if (!observation.stream || !expect.stream) return { failure: null, matched: true }; + + const expectedCompleted = expect.stream.completed ?? true; + if (observation.stream.completed !== expectedCompleted) { + return { + failure: mismatchFailure( + "stream", + "stream.completed", + expectedCompleted, + observation.stream.completed, + "stream.completed mismatch", + ), + matched: false, + }; + } + + if (expect.stream.firstTokenMs && observation.stream.firstTokenMs !== null) { + if (!applyOperator(observation.stream.firstTokenMs, expect.stream.firstTokenMs)) { + return { + failure: mismatchFailure( + "stream", + "stream.firstTokenMs", + expect.stream.firstTokenMs, + observation.stream.firstTokenMs, + "stream.firstTokenMs mismatch", + ), + matched: false, + }; + } + } else if (expect.stream.firstTokenMs && observation.stream.firstTokenMs === null) { + return { + failure: mismatchFailure( + "stream", + "stream.firstTokenMs", + expect.stream.firstTokenMs, + null, + "stream.firstTokenMs missing", + ), + matched: false, + }; + } + + return { failure: null, matched: true }; +} + +export function runExpects(observation: LlmCheckObservation, expect: LlmExpectConfig | undefined): ExpectResult { + if (!expect) { + const defaultStatus = checkStatus(observation.http?.status ?? 0, [200]); + if (!defaultStatus.matched) return defaultStatus; + return { failure: null, matched: true }; + } + + const http = observation.http; + + const statusResult = checkStatus(http?.status ?? 0, expect.status ?? [200]); + if (!statusResult.matched) return statusResult; + + if (http && expect.headers) { + const headersResult = checkHeaders(http.headers, expect.headers); + if (!headersResult.matched) return headersResult; + } + + if (observation.stream !== null) { + const streamResult = checkStreamExpect(observation, expect); + if (!streamResult.matched) return streamResult; + } + + const outputResult = checkOutputRules(observation.outputText, expect.output); + if (!outputResult.matched) return outputResult; + + if (expect.finishReason !== undefined) { + if (observation.finishReason !== expect.finishReason) { + return { + failure: mismatchFailure( + "finishReason", + "finishReason", + expect.finishReason, + observation.finishReason, + "finishReason mismatch", + ), + matched: false, + }; + } + } + + if (expect.rawFinishReason !== undefined) { + if (observation.rawFinishReason !== expect.rawFinishReason) { + return { + failure: mismatchFailure( + "rawFinishReason", + "rawFinishReason", + expect.rawFinishReason, + observation.rawFinishReason, + "rawFinishReason mismatch", + ), + matched: false, + }; + } + } + + if (expect.usage && observation.usage) { + const usageResult = checkUsageExpect(observation.usage, expect.usage); + if (!usageResult.matched) return usageResult; + } + + return { failure: null, matched: true }; +} + +function checkUsageExpect( + usage: { inputTokens: number; outputTokens: number; totalTokens: number }, + expectUsage: { inputTokens?: unknown; outputTokens?: unknown; totalTokens?: unknown }, +): ExpectResult { + if (expectUsage.inputTokens !== undefined) { + if (!applyOperator(usage.inputTokens, expectUsage.inputTokens as Parameters[1])) { + return { + failure: mismatchFailure( + "usage", + "usage.inputTokens", + expectUsage.inputTokens, + usage.inputTokens, + "usage.inputTokens mismatch", + ), + matched: false, + }; + } + } + if (expectUsage.outputTokens !== undefined) { + if (!applyOperator(usage.outputTokens, expectUsage.outputTokens as Parameters[1])) { + return { + failure: mismatchFailure( + "usage", + "usage.outputTokens", + expectUsage.outputTokens, + usage.outputTokens, + "usage.outputTokens mismatch", + ), + matched: false, + }; + } + } + if (expectUsage.totalTokens !== undefined) { + if (!applyOperator(usage.totalTokens, expectUsage.totalTokens as Parameters[1])) { + return { + failure: mismatchFailure( + "usage", + "usage.totalTokens", + expectUsage.totalTokens, + usage.totalTokens, + "usage.totalTokens mismatch", + ), + matched: false, + }; + } + } + return { failure: null, matched: true }; +} + +export { checkDuration }; diff --git a/src/server/checker/runner/llm/index.ts b/src/server/checker/runner/llm/index.ts new file mode 100644 index 0000000..1aa24dd --- /dev/null +++ b/src/server/checker/runner/llm/index.ts @@ -0,0 +1 @@ +export { LlmChecker } from "./execute"; diff --git a/src/server/checker/runner/llm/observation.ts b/src/server/checker/runner/llm/observation.ts new file mode 100644 index 0000000..345c1a9 --- /dev/null +++ b/src/server/checker/runner/llm/observation.ts @@ -0,0 +1,131 @@ +import type { APICallError } from "ai"; + +import type { + LlmCheckObservation, + LlmHttpMetadata, + LlmMode, + LlmProvider, + LlmStreamObservation, + LlmUsageObservation, +} from "./types"; + +export function buildObservationFromApiCallError( + error: APICallError, + provider: LlmProvider, + model: string, + mode: LlmMode, +): LlmCheckObservation { + const http: LlmHttpMetadata | null = + error.statusCode !== undefined + ? { + headers: error.responseHeaders ?? {}, + status: error.statusCode, + statusText: "", + } + : null; + + return { + finishReason: null, + http, + mode, + model, + outputText: null, + provider, + rawFinishReason: null, + stream: null, + usage: null, + warnings: [], + }; +} + +export function buildObservationFromGenerateText( + provider: LlmProvider, + model: string, + mode: LlmMode, + result: { + finishReason: string; + rawFinishReason: null | string | undefined; + text: string; + usage: { inputTokens: number; outputTokens: number; totalTokens?: number | undefined }; + warnings?: string[]; + }, + http: LlmHttpMetadata | null, +): LlmCheckObservation { + return { + finishReason: result.finishReason, + http, + mode, + model, + outputText: result.text, + provider, + rawFinishReason: result.rawFinishReason ?? null, + stream: null, + usage: { + inputTokens: result.usage.inputTokens ?? 0, + outputTokens: result.usage.outputTokens ?? 0, + totalTokens: result.usage.totalTokens ?? (result.usage.inputTokens ?? 0) + (result.usage.outputTokens ?? 0), + }, + warnings: result.warnings ?? [], + }; +} + +export async function buildObservationFromStreamText( + provider: LlmProvider, + model: string, + mode: LlmMode, + fullStream: AsyncIterable, + http: LlmHttpMetadata | null, + startMs: number, +): Promise { + let outputText = ""; + let firstTokenMs: null | number = null; + let completed = false; + let finishReason: null | string = null; + let rawFinishReason: null | string = null; + let usage: LlmUsageObservation | null = null; + const warnings: string[] = []; + + for await (const part of fullStream) { + const p = part as Record; + const type = p["type"] as string; + + if (type === "text-delta") { + const delta = p["textDelta"] as string; + if (delta !== "") { + firstTokenMs ??= Math.round(performance.now() - startMs); + outputText += delta; + } + } else if (type === "finish") { + completed = true; + finishReason = (p["finishReason"] as string) ?? null; + rawFinishReason = (p["rawFinishReason"] as string | undefined) ?? null; + + const totalUsage = p["totalUsage"] as + | undefined + | { inputTokens: number; outputTokens: number; totalTokens: number }; + const partUsage = p["usage"] as undefined | { inputTokens: number; outputTokens: number }; + + usage = { + inputTokens: totalUsage?.inputTokens ?? partUsage?.inputTokens ?? 0, + outputTokens: totalUsage?.outputTokens ?? partUsage?.outputTokens ?? 0, + totalTokens: totalUsage?.totalTokens ?? (partUsage?.inputTokens ?? 0) + (partUsage?.outputTokens ?? 0), + }; + } else if (type === "error") { + const err = p["error"] as Error | undefined; + warnings.push(err?.message ?? "stream error"); + } + } + + return { + finishReason, + http, + mode, + model, + outputText: outputText || null, + provider, + rawFinishReason, + stream: { completed, firstTokenMs } satisfies LlmStreamObservation, + usage, + warnings, + }; +} diff --git a/src/server/checker/runner/llm/output.ts b/src/server/checker/runner/llm/output.ts new file mode 100644 index 0000000..bc32a33 --- /dev/null +++ b/src/server/checker/runner/llm/output.ts @@ -0,0 +1,83 @@ +import type { ExpectResult } from "../../expect/types"; +import type { OutputRule } from "./types"; + +import { mismatchFailure } from "../../expect/failure"; +import { applyOperator, evaluateJsonPath } from "../../expect/operator"; + +export function checkOutputRules(outputText: null | string, rules: OutputRule[] | undefined): ExpectResult { + if (!rules || rules.length === 0) return { failure: null, matched: true }; + + for (const rule of rules) { + const result = checkSingleOutputRule(outputText, rule); + if (!result.matched) return result; + } + + return { failure: null, matched: true }; +} + +function checkSingleOutputRule(outputText: null | string, rule: OutputRule): ExpectResult { + if ("equals" in rule) { + if (outputText === null || outputText !== rule.equals) { + return { + failure: mismatchFailure("output", "output", rule.equals, outputText, "output equals mismatch"), + matched: false, + }; + } + return { failure: null, matched: true }; + } + + if ("contains" in rule) { + if (!outputText?.includes(rule.contains)) { + return { + failure: mismatchFailure( + "output", + "output", + `contains: ${rule.contains}`, + outputText, + "output contains mismatch", + ), + matched: false, + }; + } + return { failure: null, matched: true }; + } + + if ("regex" in rule) { + if (outputText === null || !new RegExp(rule.regex).test(outputText)) { + return { + failure: mismatchFailure("output", "output", `match: ${rule.regex}`, outputText, "output regex mismatch"), + matched: false, + }; + } + return { failure: null, matched: true }; + } + + if ("json" in rule) { + if (outputText === null) { + return { + failure: mismatchFailure("output", "output", "valid JSON", null, "output is null, cannot parse JSON"), + matched: false, + }; + } + let parsed: unknown; + try { + parsed = JSON.parse(outputText); + } catch { + return { + failure: mismatchFailure("output", "output", "valid JSON", outputText, "output is not valid JSON"), + matched: false, + }; + } + + const value = evaluateJsonPath(parsed, rule.json.path); + if (!applyOperator(value, rule.json)) { + return { + failure: mismatchFailure("output", "output", rule.json, value, "output json mismatch"), + matched: false, + }; + } + return { failure: null, matched: true }; + } + + return { failure: null, matched: true }; +} diff --git a/src/server/checker/runner/llm/provider.ts b/src/server/checker/runner/llm/provider.ts new file mode 100644 index 0000000..e17b7fb --- /dev/null +++ b/src/server/checker/runner/llm/provider.ts @@ -0,0 +1,62 @@ +import type { LanguageModel } from "ai"; + +import { createAnthropic } from "@ai-sdk/anthropic"; +import { createOpenAI } from "@ai-sdk/openai"; + +import type { LlmHttpMetadata, ResolvedLlmConfig } from "./types"; + +export interface ProviderResult { + http: LlmHttpMetadata | null; + model: LanguageModel; +} + +export function createProviderModel(config: ResolvedLlmConfig): ProviderResult { + let httpMeta: LlmHttpMetadata | null = null; + + const observingFetch = async (input: RequestInfo | URL, init?: RequestInit): Promise => { + const fetchInit: Record = { ...init }; + if (config.ignoreSSL) { + fetchInit["tls"] = { rejectUnauthorized: false }; + } + + const response = await fetch(input, fetchInit); + httpMeta = { + headers: Object.fromEntries(response.headers), + status: response.status, + statusText: response.statusText, + }; + return response; + }; + + const sharedOptions = { + apiKey: config.key, + baseURL: config.url, + fetch: observingFetch as typeof fetch, + headers: config.headers, + }; + + let model: LanguageModel; + + switch (config.provider) { + case "anthropic": { + const provider = createAnthropic({ + ...sharedOptions, + ...(config.authToken ? { headers: { ...config.headers, Authorization: `Bearer ${config.authToken}` } } : {}), + }); + model = provider.messages(config.model); + break; + } + case "openai": { + const provider = createOpenAI(sharedOptions); + model = provider.chat(config.model); + break; + } + case "openai-responses": { + const provider = createOpenAI(sharedOptions); + model = provider.responses(config.model); + break; + } + } + + return { http: httpMeta, model }; +} diff --git a/src/server/checker/runner/llm/schema.ts b/src/server/checker/runner/llm/schema.ts new file mode 100644 index 0000000..d25f3c0 --- /dev/null +++ b/src/server/checker/runner/llm/schema.ts @@ -0,0 +1,115 @@ +import { Type } from "@sinclair/typebox"; + +import type { CheckerSchemas } from "../types"; + +import { + createHeaderExpectSchema, + createPureOperatorSchema, + statusCodePatternSchema, + stringMapSchema, +} from "../../schema/fragments"; + +function createLlmOptionsSchema() { + return Type.Object( + { + frequencyPenalty: Type.Optional(Type.Number()), + maxOutputTokens: Type.Optional(Type.Integer({ minimum: 1 })), + presencePenalty: Type.Optional(Type.Number()), + seed: Type.Optional(Type.Number()), + stopSequences: Type.Optional(Type.Array(Type.String())), + temperature: Type.Optional(Type.Number()), + topK: Type.Optional(Type.Number()), + topP: Type.Optional(Type.Number()), + }, + { additionalProperties: false }, + ); +} + +function createLlmOutputRulesSchema() { + return Type.Array( + Type.Object( + { + contains: Type.Optional(Type.String()), + equals: Type.Optional(Type.String()), + json: Type.Optional( + Type.Object({ path: Type.String(), ...operatorProperties() }, { additionalProperties: false }), + ), + regex: Type.Optional(Type.String()), + }, + { additionalProperties: false }, + ), + ); +} + +function operatorProperties() { + return { + contains: Type.Optional(Type.String()), + empty: Type.Optional(Type.Boolean()), + equals: Type.Optional(Type.Number()), + exists: Type.Optional(Type.Boolean()), + gt: Type.Optional(Type.Number()), + gte: Type.Optional(Type.Number()), + lt: Type.Optional(Type.Number()), + lte: Type.Optional(Type.Number()), + match: Type.Optional(Type.String()), + }; +} + +export const llmCheckerSchemas: CheckerSchemas = { + config: Type.Object( + { + authToken: Type.Optional(Type.String()), + headers: Type.Optional(stringMapSchema), + ignoreSSL: Type.Optional(Type.Boolean()), + key: Type.Optional(Type.String()), + mode: Type.Optional(Type.Union([Type.Literal("http"), Type.Literal("stream")])), + model: Type.String({ minLength: 1 }), + options: Type.Optional(createLlmOptionsSchema()), + prompt: Type.String({ minLength: 1 }), + provider: Type.Union([Type.Literal("openai"), Type.Literal("openai-responses"), Type.Literal("anthropic")]), + providerOptions: Type.Optional(Type.Record(Type.String(), Type.Object({}, { additionalProperties: true }))), + url: Type.String({ minLength: 1 }), + }, + { additionalProperties: false }, + ), + defaults: Type.Object( + { + headers: Type.Optional(stringMapSchema), + ignoreSSL: Type.Optional(Type.Boolean()), + mode: Type.Optional(Type.Union([Type.Literal("http"), Type.Literal("stream")])), + options: Type.Optional(createLlmOptionsSchema()), + providerOptions: Type.Optional(Type.Record(Type.String(), Type.Object({}, { additionalProperties: true }))), + }, + { additionalProperties: false }, + ), + expect: Type.Object( + { + finishReason: Type.Optional(Type.String()), + headers: Type.Optional(createHeaderExpectSchema()), + maxDurationMs: Type.Optional(Type.Number({ minimum: 0 })), + output: Type.Optional(createLlmOutputRulesSchema()), + rawFinishReason: Type.Optional(Type.String()), + status: Type.Optional(Type.Array(statusCodePatternSchema)), + stream: Type.Optional( + Type.Object( + { + completed: Type.Optional(Type.Boolean()), + firstTokenMs: Type.Optional(createPureOperatorSchema()), + }, + { additionalProperties: false }, + ), + ), + usage: Type.Optional( + Type.Object( + { + inputTokens: Type.Optional(createPureOperatorSchema()), + outputTokens: Type.Optional(createPureOperatorSchema()), + totalTokens: Type.Optional(createPureOperatorSchema()), + }, + { additionalProperties: false }, + ), + ), + }, + { additionalProperties: false }, + ), +}; diff --git a/src/server/checker/runner/llm/types.ts b/src/server/checker/runner/llm/types.ts new file mode 100644 index 0000000..9783451 --- /dev/null +++ b/src/server/checker/runner/llm/types.ts @@ -0,0 +1,121 @@ +import type { JSONObject } from "@ai-sdk/provider"; + +import type { ExpectOperator, ResolvedTargetBase } from "../../types"; + +export interface LlmCheckObservation { + finishReason: null | string; + http: LlmHttpMetadata | null; + mode: LlmMode; + model: string; + outputText: null | string; + provider: LlmProvider; + rawFinishReason: null | string; + stream: LlmStreamObservation | null; + usage: LlmUsageObservation | null; + warnings: string[]; +} +export interface LlmDefaultsConfig { + headers?: Record; + ignoreSSL?: boolean; + mode?: LlmMode; + options?: LlmOptions; + providerOptions?: Record; +} + +export interface LlmExpectConfig { + finishReason?: string; + headers?: Record; + maxDurationMs?: number; + output?: OutputRule[]; + rawFinishReason?: string; + status?: Array; + stream?: LlmStreamExpect; + usage?: LlmUsageExpect; +} + +export interface LlmHttpMetadata { + headers: Record; + status: number; + statusText: string; +} + +export type LlmMode = "http" | "stream"; + +export interface LlmOptions { + frequencyPenalty?: number; + maxOutputTokens?: number; + presencePenalty?: number; + seed?: number; + stopSequences?: string[]; + temperature?: number; + topK?: number; + topP?: number; +} + +export type LlmProvider = "anthropic" | "openai" | "openai-responses"; + +export interface LlmStreamExpect { + completed?: boolean; + firstTokenMs?: ExpectOperator; +} + +export interface LlmStreamObservation { + completed: boolean; + firstTokenMs: null | number; +} + +export interface LlmTargetConfig { + authToken?: string; + headers?: Record; + ignoreSSL?: boolean; + key?: string; + mode?: LlmMode; + model: string; + options?: LlmOptions; + prompt: string; + provider: LlmProvider; + providerOptions?: Record; + url: string; +} + +export interface LlmUsageExpect { + inputTokens?: ExpectOperator; + outputTokens?: ExpectOperator; + totalTokens?: ExpectOperator; +} + +export interface LlmUsageObservation { + inputTokens: number; + outputTokens: number; + totalTokens: number; +} + +export interface OutputJsonRule extends ExpectOperator { + path: string; +} + +export type OutputRule = { contains: string } | { equals: string } | { json: OutputJsonRule } | { regex: string }; + +export interface ResolvedLlmConfig { + authToken?: string; + headers: Record; + ignoreSSL: boolean; + key: string; + mode: LlmMode; + model: string; + options: LlmOptions; + prompt: string; + provider: LlmProvider; + providerOptions: Record; + url: string; +} + +export interface ResolvedLlmTarget extends ResolvedTargetBase { + expect?: LlmExpectConfig; + group: string; + intervalMs: number; + llm: ResolvedLlmConfig; + name: null | string; + timeoutMs: number; + type: "llm"; +} diff --git a/src/server/checker/runner/llm/validate.ts b/src/server/checker/runner/llm/validate.ts new file mode 100644 index 0000000..c56923a --- /dev/null +++ b/src/server/checker/runner/llm/validate.ts @@ -0,0 +1,397 @@ +import { isBoolean, isNumber, isPlainObject, isString } from "es-toolkit"; +import { isArray } from "es-toolkit/compat"; + +import type { ConfigValidationIssue } from "../../schema/issues"; +import type { CheckerValidationInput } from "../types"; + +import { isUnsafeRegex } from "../../expect/redos"; +import { isPlainRecord, validateOperatorObject } from "../../expect/validate-operator"; +import { issue, joinPath } from "../../schema/issues"; + +const ALLOWED_PROVIDERS = new Set(["anthropic", "openai", "openai-responses"]); +const ALLOWED_PROTOCOLS = new Set(["http:", "https:"]); +const ALLOWED_MODES = new Set(["http", "stream"]); +const OUTPUT_RULE_KEYS = ["contains", "equals", "json", "regex"] as const; + +export function validateLlmConfig(input: CheckerValidationInput): ConfigValidationIssue[] { + const issues: ConfigValidationIssue[] = []; + const defaults = + isPlainRecord(input.defaults) && isPlainRecord(input.defaults["llm"]) ? input.defaults["llm"] : undefined; + + if (defaults) { + issues.push(...validateLlmDefaults(defaults, "defaults.llm")); + } + + for (let i = 0; i < input.targets.length; i++) { + const target = input.targets[i] as unknown; + if (!isPlainRecord(target)) continue; + if (target["type"] !== "llm") continue; + issues.push(...validateLlmTarget(target, `targets[${i}]`)); + } + + return issues; +} + +function getTargetName(target: Record): string | undefined { + if (isString(target["name"])) return target["name"]; + return isString(target["id"]) ? target["id"] : undefined; +} + +function isNonNegativeFiniteNumber(value: unknown): boolean { + return isNumber(value) && Number.isFinite(value) && value >= 0; +} + +function validateLlmDefaults(defaults: Record, path: string): ConfigValidationIssue[] { + const issues: ConfigValidationIssue[] = []; + + if (defaults["mode"] !== undefined && !ALLOWED_MODES.has(defaults["mode"] as string)) { + issues.push(issue("invalid-type", joinPath(path, "mode"), "必须为 http 或 stream")); + } + if (defaults["ignoreSSL"] !== undefined && !isBoolean(defaults["ignoreSSL"])) { + issues.push(issue("invalid-type", joinPath(path, "ignoreSSL"), "必须为布尔值")); + } + if (defaults["headers"] !== undefined) { + issues.push(...validateStringMap(defaults["headers"], joinPath(path, "headers"))); + } + if (defaults["options"] !== undefined) { + issues.push(...validateLlmOptions(defaults["options"], joinPath(path, "options"))); + } + if (defaults["providerOptions"] !== undefined) { + issues.push(...validateProviderOptions(defaults["providerOptions"], joinPath(path, "providerOptions"))); + } + + return issues; +} + +function validateLlmExpect( + target: Record, + path: string, + mode: string | undefined, + targetName?: string, +): ConfigValidationIssue[] { + const expect = target["expect"]; + if (expect === undefined || expect === null || !isPlainRecord(expect)) return []; + const issues: ConfigValidationIssue[] = []; + const expectPath = joinPath(path, "expect"); + + if (isArray(expect["status"])) { + issues.push(...validateStatusValues(expect["status"], joinPath(expectPath, "status"), targetName)); + } + + if (isPlainRecord(expect["headers"])) { + for (const [key, value] of Object.entries(expect["headers"])) { + if (isString(value)) continue; + issues.push(...validateOperatorObject(value, joinPath(joinPath(expectPath, "headers"), key), targetName)); + } + } + + if (expect["output"] !== undefined) { + issues.push(...validateOutputRules(expect["output"], joinPath(expectPath, "output"), targetName)); + } + + if (expect["finishReason"] !== undefined && !isString(expect["finishReason"])) { + issues.push(issue("invalid-type", joinPath(expectPath, "finishReason"), "必须为字符串", targetName)); + } + + if (expect["rawFinishReason"] !== undefined && !isString(expect["rawFinishReason"])) { + issues.push(issue("invalid-type", joinPath(expectPath, "rawFinishReason"), "必须为字符串", targetName)); + } + + if (expect["usage"] !== undefined) { + issues.push(...validateUsageExpect(expect["usage"], joinPath(expectPath, "usage"), targetName)); + } + + if (expect["stream"] !== undefined) { + if (mode === "http") { + issues.push( + issue("invalid-type", joinPath(expectPath, "stream"), "expect.stream 仅支持 stream mode", targetName), + ); + } else { + issues.push(...validateStreamExpect(expect["stream"], joinPath(expectPath, "stream"), targetName)); + } + } + + if (expect["maxDurationMs"] !== undefined && !isNonNegativeFiniteNumber(expect["maxDurationMs"])) { + issues.push(issue("invalid-type", joinPath(expectPath, "maxDurationMs"), "必须为非负有限数字", targetName)); + } + + return issues; +} + +function validateLlmOptions(options: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainRecord(options)) return [issue("invalid-type", path, "必须为对象", targetName)]; + const issues: ConfigValidationIssue[] = []; + + if (options["maxOutputTokens"] !== undefined) { + if ( + !isNumber(options["maxOutputTokens"]) || + !Number.isInteger(options["maxOutputTokens"]) || + options["maxOutputTokens"] < 1 + ) { + issues.push(issue("invalid-type", joinPath(path, "maxOutputTokens"), "必须为正整数", targetName)); + } + } + + for (const key of ["temperature", "topP", "topK", "presencePenalty", "frequencyPenalty", "seed"]) { + if (options[key] !== undefined && (!isNumber(options[key]) || !Number.isFinite(options[key]))) { + issues.push(issue("invalid-type", joinPath(path, key), "必须为有限数字", targetName)); + } + } + + if (options["stopSequences"] !== undefined) { + if (!isArray(options["stopSequences"])) { + issues.push(issue("invalid-type", joinPath(path, "stopSequences"), "必须为字符串数组", targetName)); + } else { + for (let i = 0; i < options["stopSequences"].length; i++) { + if (!isString(options["stopSequences"][i])) { + issues.push(issue("invalid-type", `${joinPath(path, "stopSequences")}[${i}]`, "必须为字符串", targetName)); + } + } + } + } + + return issues; +} + +function validateLlmTarget(target: Record, path: string): ConfigValidationIssue[] { + const issues: ConfigValidationIssue[] = []; + const targetName = getTargetName(target); + const llm = target["llm"]; + if (!isPlainRecord(llm)) { + issues.push(issue("required", joinPath(path, "llm"), "缺少 llm 配置", targetName)); + issues.push(...validateLlmExpect(target, path, undefined, targetName)); + return issues; + } + + if (!isString(llm["provider"]) || !ALLOWED_PROVIDERS.has(llm["provider"])) { + issues.push( + issue( + "invalid-type", + joinPath(joinPath(path, "llm"), "provider"), + "必须为 openai、openai-responses 或 anthropic", + targetName, + ), + ); + } + + if (!isString(llm["url"]) || llm["url"].trim() === "") { + issues.push(issue("required", joinPath(joinPath(path, "llm"), "url"), "缺少 llm.url 字段", targetName)); + } else { + try { + const url = new URL(llm["url"]); + if (!ALLOWED_PROTOCOLS.has(url.protocol)) { + issues.push( + issue( + "invalid-url", + joinPath(joinPath(path, "llm"), "url"), + "格式不合法,必须以 http:// 或 https:// 开头", + targetName, + ), + ); + } + } catch { + issues.push(issue("invalid-url", joinPath(joinPath(path, "llm"), "url"), "格式不合法", targetName)); + } + } + + if (!isString(llm["model"]) || llm["model"].trim() === "") { + issues.push(issue("required", joinPath(joinPath(path, "llm"), "model"), "必须为非空字符串", targetName)); + } + + if (!isString(llm["prompt"]) || llm["prompt"].trim() === "") { + issues.push(issue("required", joinPath(joinPath(path, "llm"), "prompt"), "必须为非空字符串", targetName)); + } + + if (llm["mode"] !== undefined && !ALLOWED_MODES.has(llm["mode"] as string)) { + issues.push(issue("invalid-type", joinPath(joinPath(path, "llm"), "mode"), "必须为 http 或 stream", targetName)); + } + + if (llm["headers"] !== undefined) { + issues.push(...validateStringMap(llm["headers"], joinPath(joinPath(path, "llm"), "headers"), targetName)); + } + + if (llm["ignoreSSL"] !== undefined && !isBoolean(llm["ignoreSSL"])) { + issues.push(issue("invalid-type", joinPath(joinPath(path, "llm"), "ignoreSSL"), "必须为布尔值", targetName)); + } + + const provider = llm["provider"] as string | undefined; + + if (llm["authToken"] !== undefined) { + if (provider !== "anthropic") { + issues.push( + issue( + "invalid-auth", + joinPath(joinPath(path, "llm"), "authToken"), + "authToken 仅支持 anthropic provider", + targetName, + ), + ); + } + } + + if ( + provider === "anthropic" && + isString(llm["key"]) && + llm["key"].trim() !== "" && + isString(llm["authToken"]) && + llm["authToken"].trim() !== "" + ) { + issues.push( + issue("auth-conflict", joinPath(joinPath(path, "llm"), "key"), "key 与 authToken 不能同时配置", targetName), + ); + } + + if (llm["options"] !== undefined) { + issues.push(...validateLlmOptions(llm["options"], joinPath(joinPath(path, "llm"), "options"), targetName)); + } + + if (llm["providerOptions"] !== undefined) { + issues.push( + ...validateProviderOptions( + llm["providerOptions"], + joinPath(joinPath(path, "llm"), "providerOptions"), + targetName, + ), + ); + } + + const mode = (llm["mode"] as string | undefined) ?? "http"; + issues.push(...validateLlmExpect(target, path, mode, targetName)); + + return issues; +} + +function validateOutputJsonRule(value: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainRecord(value)) return [issue("invalid-type", path, "必须为对象", targetName)]; + const issues: ConfigValidationIssue[] = []; + + if (!isString(value["path"]) || !value["path"].startsWith("$.") || value["path"].length <= 2) { + issues.push(issue("invalid-jsonpath", joinPath(path, "path"), '必须为以 "$." 开头的有效 JSONPath', targetName)); + } + + const operatorKeys = new Set(["path"]); + const operators: Record = {}; + for (const [key, val] of Object.entries(value)) { + if (operatorKeys.has(key)) continue; + operators[key] = val; + } + issues.push(...validateOperatorObject(operators, path, targetName, { requireAtLeastOne: false })); + + return issues; +} + +function validateOutputRegex(value: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isString(value)) return [issue("invalid-type", path, "必须为字符串", targetName)]; + try { + new RegExp(value); + } catch { + return [issue("invalid-regex", path, "正则不合法", targetName)]; + } + return isUnsafeRegex(value) ? [issue("unsafe-regex", path, "正则存在 ReDoS 风险", targetName)] : []; +} + +function validateOutputRules(rules: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isArray(rules)) return [issue("invalid-type", path, "必须为数组", targetName)]; + return rules.flatMap((rule, index) => validateSingleOutputRule(rule, `${path}[${index}]`, targetName)); +} + +function validateProviderOptions(value: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainObject(value)) return [issue("invalid-type", path, "必须为 JSON object", targetName)]; + return []; +} + +function validateSingleOutputRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainRecord(rule)) return [issue("invalid-type", path, "必须为对象", targetName)]; + + const found = OUTPUT_RULE_KEYS.filter((type) => type in rule); + if (found.length === 0) return [issue("missing-body-rule", path, "缺少支持的规则类型", targetName)]; + if (found.length > 1) return [issue("multiple-body-rules", path, "只能配置一种规则类型", targetName)]; + + const ruleType = found[0]!; + const issues: ConfigValidationIssue[] = []; + + for (const key of Object.keys(rule)) { + if (key !== ruleType) issues.push(issue("unknown-field", joinPath(path, key), "是未知字段", targetName)); + } + if (issues.length > 0) return issues; + + switch (ruleType) { + case "contains": + return isString(rule["contains"]) + ? [] + : [issue("invalid-type", joinPath(path, "contains"), "必须为字符串", targetName)]; + case "equals": + return isString(rule["equals"]) + ? [] + : [issue("invalid-type", joinPath(path, "equals"), "必须为字符串", targetName)]; + case "json": + return validateOutputJsonRule(rule["json"], joinPath(path, "json"), targetName); + case "regex": + return validateOutputRegex(rule["regex"], joinPath(path, "regex"), targetName); + } +} + +function validateStatusValues(values: unknown[], path: string, targetName?: string): ConfigValidationIssue[] { + const issues: ConfigValidationIssue[] = []; + for (let i = 0; i < values.length; i++) { + const value = values[i]; + const itemPath = `${path}[${i}]`; + if (isNumber(value)) { + if (!Number.isInteger(value) || value < 100 || value > 599) { + issues.push(issue("invalid-status", itemPath, "status 数字必须为 100-599 之间的整数", targetName)); + } + continue; + } + if (isString(value)) { + if (!/^[1-5]xx$/.test(value)) { + issues.push(issue("invalid-status", itemPath, "status 模式必须为 1xx 到 5xx", targetName)); + } + continue; + } + issues.push(issue("invalid-status", itemPath, "status 必须为整数或 1xx 到 5xx 模式", targetName)); + } + return issues; +} + +function validateStreamExpect(stream: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainRecord(stream)) return [issue("invalid-type", path, "必须为对象", targetName)]; + const issues: ConfigValidationIssue[] = []; + + if (stream["completed"] !== undefined && !isBoolean(stream["completed"])) { + issues.push(issue("invalid-type", joinPath(path, "completed"), "必须为布尔值", targetName)); + } + + if (stream["firstTokenMs"] !== undefined) { + issues.push(...validateOperatorObject(stream["firstTokenMs"], joinPath(path, "firstTokenMs"), targetName)); + } + + return issues; +} + +function validateStringMap(value: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainObject(value)) return [issue("invalid-type", path, "必须为对象", targetName)]; + const issues: ConfigValidationIssue[] = []; + for (const [key, val] of Object.entries(value as Record)) { + if (!isString(val)) { + issues.push(issue("invalid-type", joinPath(path, key), "必须为字符串", targetName)); + } + } + return issues; +} + +function validateUsageExpect(usage: unknown, path: string, targetName?: string): ConfigValidationIssue[] { + if (!isPlainRecord(usage)) return [issue("invalid-type", path, "必须为对象", targetName)]; + const issues: ConfigValidationIssue[] = []; + + if (usage["inputTokens"] !== undefined) { + issues.push(...validateOperatorObject(usage["inputTokens"], joinPath(path, "inputTokens"), targetName)); + } + if (usage["outputTokens"] !== undefined) { + issues.push(...validateOperatorObject(usage["outputTokens"], joinPath(path, "outputTokens"), targetName)); + } + if (usage["totalTokens"] !== undefined) { + issues.push(...validateOperatorObject(usage["totalTokens"], joinPath(path, "totalTokens"), targetName)); + } + + return issues; +} diff --git a/tests/server/checker/runner/llm/execute.test.ts b/tests/server/checker/runner/llm/execute.test.ts new file mode 100644 index 0000000..8d22f09 --- /dev/null +++ b/tests/server/checker/runner/llm/execute.test.ts @@ -0,0 +1,209 @@ +import { afterAll, beforeAll, describe, expect, test } from "bun:test"; + +import type { ResolvedLlmTarget } from "../../../../../src/server/checker/runner/llm/types"; +import type { CheckerContext } from "../../../../../src/server/checker/runner/types"; + +import { LlmChecker } from "../../../../../src/server/checker/runner/llm/execute"; + +const MOCK_PORT = 18456; +let server: ReturnType; + +function makeCtx(timeoutMs = 10000): CheckerContext { + const controller = new AbortController(); + setTimeout(() => controller.abort(), timeoutMs); + return { signal: controller.signal }; +} + +function makeTarget( + overrides?: Partial, + expectOverrides?: Partial, +): ResolvedLlmTarget { + return { + description: null, + expect: expectOverrides, + group: "default", + id: "test-llm", + intervalMs: 30000, + llm: { + headers: {}, + ignoreSSL: false, + key: "test-key", + mode: "http", + model: "gpt-4o-mini", + options: { maxOutputTokens: 16, temperature: 0 }, + prompt: "Say OK", + provider: "openai", + providerOptions: {}, + url: `http://127.0.0.1:${MOCK_PORT}/v1`, + ...overrides, + }, + name: null, + timeoutMs: 10000, + type: "llm", + }; +} + +function openaiResponse( + content: string, + options?: { usage?: { completion_tokens: number; prompt_tokens: number; total_tokens: number } }, +) { + return JSON.stringify({ + choices: [{ finish_reason: "stop", index: 0, message: { content, role: "assistant" } }], + created: Date.now(), + id: "chatcmpl-test", + model: "gpt-4o-mini", + object: "chat.completion", + usage: options?.usage ?? { completion_tokens: 2, prompt_tokens: 12, total_tokens: 14 }, + }); +} + +beforeAll(() => { + server = Bun.serve({ + fetch(req) { + const url = new URL(req.url); + + const authHeader = req.headers.get("Authorization"); + + if (url.pathname === "/v1/rate_limit/chat/completions") { + return new Response(JSON.stringify({ error: { message: "Rate limit exceeded", type: "rate_limit_error" } }), { + headers: { "Content-Type": "application/json" }, + status: 429, + }); + } + + if (url.pathname === "/v1/server_error/chat/completions") { + return new Response(JSON.stringify({ error: { message: "Internal server error", type: "server_error" } }), { + headers: { "Content-Type": "application/json" }, + status: 500, + }); + } + + if (url.pathname === "/v1/no_content/chat/completions") { + return new Response( + openaiResponse("", { usage: { completion_tokens: 0, prompt_tokens: 5, total_tokens: 5 } }), + { headers: { "Content-Type": "application/json" }, status: 200 }, + ); + } + + if (authHeader === "Bearer bad-key") { + return new Response( + JSON.stringify({ + error: { message: "Invalid API key", param: null, type: "invalid_request_error" }, + }), + { headers: { "Content-Type": "application/json" }, status: 401 }, + ); + } + + return new Response(openaiResponse("OK"), { headers: { "Content-Type": "application/json" }, status: 200 }); + }, + port: MOCK_PORT, + }); +}); + +afterAll(() => { + void server.stop(); +}); + +const checker = new LlmChecker(); + +describe("LlmChecker execute - 非流式", () => { + test("成功调用返回 matched=true", async () => { + const result = await checker.execute(makeTarget(), makeCtx()); + expect(result.matched).toBe(true); + expect(result.failure).toBeNull(); + expect(result.statusDetail).toContain("openai"); + expect(result.statusDetail).toContain("http"); + expect(result.statusDetail).toContain("200"); + expect(result.statusDetail).toContain("finish=stop"); + }); + + test("status expect 不匹配", async () => { + const result = await checker.execute(makeTarget(undefined, { status: [404] }), makeCtx()); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("status"); + }); + + test("output equals 不匹配", async () => { + const result = await checker.execute(makeTarget(undefined, { output: [{ equals: "WRONG" }] }), makeCtx()); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("output"); + }); + + test("output contains 通过", async () => { + const result = await checker.execute(makeTarget(undefined, { output: [{ contains: "O" }] }), makeCtx()); + expect(result.matched).toBe(true); + }); + + test("finishReason expect 不匹配", async () => { + const result = await checker.execute(makeTarget(undefined, { finishReason: "length" }), makeCtx()); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("finishReason"); + }); + + test("401 错误可通过 status expect 捕获", async () => { + const result = await checker.execute(makeTarget({ key: "bad-key" }, { status: [401] }), makeCtx()); + expect(result.matched).toBe(true); + }); + + test("429 错误可通过 status expect 捕获", async () => { + const result = await checker.execute( + makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/rate_limit` }, { status: [429] }), + makeCtx(), + ); + expect(result.matched).toBe(true); + }); + + test("500 错误返回 status failure", async () => { + const result = await checker.execute( + makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/server_error` }), + makeCtx(), + ); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("status"); + }); + + test("连接失败返回 request failure", async () => { + const result = await checker.execute(makeTarget({ url: "http://127.0.0.1:19999/v1" }), makeCtx(5000)); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("request"); + }); + + test("statusDetail 包含 output 长度和 usage", async () => { + const result = await checker.execute(makeTarget(), makeCtx()); + expect(result.statusDetail).toContain("output="); + expect(result.statusDetail).toContain("chars"); + expect(result.statusDetail).toContain("usage="); + expect(result.statusDetail).toContain("tokens"); + }); + + test("无文本输出且配置 output expect 失败", async () => { + const result = await checker.execute( + makeTarget({ url: `http://127.0.0.1:${MOCK_PORT}/v1/no_content` }, { output: [{ equals: "OK" }] }), + makeCtx(), + ); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("output"); + }); + + test("无 expect 默认 status=200 通过", async () => { + const result = await checker.execute(makeTarget(), makeCtx()); + expect(result.matched).toBe(true); + }); + + test("headers 断言通过", async () => { + const result = await checker.execute( + makeTarget(undefined, { headers: { "content-type": "application/json" } }), + makeCtx(), + ); + expect(result.matched).toBe(true); + }); + + test("headers 断言失败", async () => { + const result = await checker.execute( + makeTarget(undefined, { headers: { "content-type": "text/plain" } }), + makeCtx(), + ); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("headers"); + }); +}); diff --git a/tests/server/checker/runner/llm/output-expect.test.ts b/tests/server/checker/runner/llm/output-expect.test.ts new file mode 100644 index 0000000..e6ad7a8 --- /dev/null +++ b/tests/server/checker/runner/llm/output-expect.test.ts @@ -0,0 +1,198 @@ +import { describe, expect, test } from "bun:test"; + +import type { LlmCheckObservation } from "../../../../../src/server/checker/runner/llm/types"; + +import { runExpects } from "../../../../../src/server/checker/runner/llm/expect"; +import { checkOutputRules } from "../../../../../src/server/checker/runner/llm/output"; + +function makeObservation(overrides?: Partial): LlmCheckObservation { + return { + finishReason: "stop", + http: { headers: {}, status: 200, statusText: "OK" }, + mode: "http", + model: "gpt-4o-mini", + outputText: "OK", + provider: "openai", + rawFinishReason: "stop", + stream: null, + usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 }, + warnings: [], + ...overrides, + }; +} + +describe("LLM output rules", () => { + test("equals 严格匹配", () => { + expect(checkOutputRules("OK", [{ equals: "OK" }]).matched).toBe(true); + expect(checkOutputRules("OK\n", [{ equals: "OK" }]).matched).toBe(false); + expect(checkOutputRules("OK ", [{ equals: "OK" }]).matched).toBe(false); + }); + + test("equals null 输出失败", () => { + expect(checkOutputRules(null, [{ equals: "OK" }]).matched).toBe(false); + }); + + test("contains 匹配", () => { + expect(checkOutputRules("Hello World", [{ contains: "World" }]).matched).toBe(true); + expect(checkOutputRules("Hello", [{ contains: "World" }]).matched).toBe(false); + expect(checkOutputRules(null, [{ contains: "World" }]).matched).toBe(false); + }); + + test("regex 匹配", () => { + expect(checkOutputRules("status: ok", [{ regex: "^status:" }]).matched).toBe(true); + expect(checkOutputRules("status: ok", [{ regex: "^error:" }]).matched).toBe(false); + expect(checkOutputRules(null, [{ regex: "^status:" }]).matched).toBe(false); + }); + + test("json 匹配", () => { + expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { equals: "ok", path: "$.status" } }]).matched).toBe( + true, + ); + expect(checkOutputRules('{"status":"ok","code":200}', [{ json: { gte: 200, path: "$.code" } }]).matched).toBe(true); + expect(checkOutputRules('{"status":"ok"}', [{ json: { exists: true, path: "$.code" } }]).matched).toBe(false); + }); + + test("json 非法 JSON 失败", () => { + expect(checkOutputRules("not json", [{ json: { exists: true, path: "$.x" } }]).matched).toBe(false); + }); + + test("多规则按顺序快速失败", () => { + const result = checkOutputRules("Hello World", [{ equals: "wrong" }, { contains: "World" }]); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("output"); + }); + + test("undefined rules 返回通过", () => { + expect(checkOutputRules("anything", undefined).matched).toBe(true); + expect(checkOutputRules(null, undefined).matched).toBe(true); + }); +}); + +describe("LLM runExpects", () => { + test("全部 expect 通过", () => { + const observation = makeObservation(); + const result = runExpects(observation, { + finishReason: "stop", + output: [{ contains: "OK" }], + status: [200], + }); + expect(result.matched).toBe(true); + expect(result.failure).toBeNull(); + }); + + test("默认 status=200 通过", () => { + const observation = makeObservation(); + const result = runExpects(observation, undefined); + expect(result.matched).toBe(true); + }); + + test("status 不匹配失败", () => { + const observation = makeObservation(); + const result = runExpects(observation, { status: [404] }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("status"); + }); + + test("finishReason 不匹配失败", () => { + const observation = makeObservation(); + const result = runExpects(observation, { finishReason: "length" }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("finishReason"); + }); + + test("rawFinishReason 不匹配失败", () => { + const observation = makeObservation(); + const result = runExpects(observation, { rawFinishReason: "end_turn" }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("rawFinishReason"); + }); + + test("usage 不匹配失败", () => { + const observation = makeObservation(); + const result = runExpects(observation, { usage: { totalTokens: { gte: 100 } } }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("usage"); + }); + + test("usage 匹配通过", () => { + const observation = makeObservation(); + const result = runExpects(observation, { usage: { totalTokens: { lte: 20 } } }); + expect(result.matched).toBe(true); + }); + + test("stream completed 匹配", () => { + const observation = makeObservation({ + mode: "stream", + stream: { completed: true, firstTokenMs: 500 }, + }); + const result = runExpects(observation, { + stream: { completed: true }, + }); + expect(result.matched).toBe(true); + }); + + test("stream firstTokenMs 匹配", () => { + const observation = makeObservation({ + mode: "stream", + stream: { completed: true, firstTokenMs: 500 }, + }); + const result = runExpects(observation, { + stream: { firstTokenMs: { lte: 1000 } }, + }); + expect(result.matched).toBe(true); + }); + + test("stream firstTokenMs 缺失失败", () => { + const observation = makeObservation({ + mode: "stream", + stream: { completed: true, firstTokenMs: null }, + }); + const result = runExpects(observation, { + stream: { firstTokenMs: { lte: 1000 } }, + }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("stream"); + }); + + test("headers 匹配通过", () => { + const observation = makeObservation({ + http: { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" }, + }); + const result = runExpects(observation, { + headers: { "content-type": "application/json" }, + }); + expect(result.matched).toBe(true); + }); + + test("headers 不匹配失败", () => { + const observation = makeObservation({ + http: { headers: { "content-type": "text/plain" }, status: 200, statusText: "OK" }, + }); + const result = runExpects(observation, { + headers: { "content-type": "application/json" }, + }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("headers"); + }); + + test("首个 expect 失败立即返回", () => { + const observation = makeObservation(); + const result = runExpects(observation, { + output: [{ contains: "OK" }], + status: [404], + }); + expect(result.matched).toBe(false); + expect(result.failure?.phase).toBe("status"); + }); + + test("APICallError 状态码 expect 通过", () => { + const observation = makeObservation({ + finishReason: null, + http: { headers: {}, status: 401, statusText: "Unauthorized" }, + outputText: null, + usage: null, + }); + const result = runExpects(observation, { status: [401] }); + expect(result.matched).toBe(true); + }); +}); diff --git a/tests/server/checker/runner/llm/provider-observation.test.ts b/tests/server/checker/runner/llm/provider-observation.test.ts new file mode 100644 index 0000000..62c52ae --- /dev/null +++ b/tests/server/checker/runner/llm/provider-observation.test.ts @@ -0,0 +1,258 @@ +import { describe, expect, test } from "bun:test"; + +import { + buildObservationFromApiCallError, + buildObservationFromGenerateText, + buildObservationFromStreamText, +} from "../../../../../src/server/checker/runner/llm/observation"; +import { createProviderModel } from "../../../../../src/server/checker/runner/llm/provider"; + +describe("LLM provider factory", () => { + test("createProviderModel 返回 model 和 http 初始为 null", () => { + const { http, model } = createProviderModel({ + headers: {}, + ignoreSSL: false, + key: "test-key", + mode: "http", + model: "gpt-4o-mini", + options: {}, + prompt: "test", + provider: "openai", + providerOptions: {}, + url: "https://api.openai.com/v1", + }); + expect(http).toBeNull(); + expect(model).toBeDefined(); + }); + + test("openai provider 使用 chat 路径", () => { + const { model } = createProviderModel({ + headers: {}, + ignoreSSL: false, + key: "test-key", + mode: "http", + model: "gpt-4o-mini", + options: {}, + prompt: "test", + provider: "openai", + providerOptions: {}, + url: "https://api.openai.com/v1", + }); + expect(model).toBeDefined(); + }); + + test("openai-responses provider 使用 responses 路径", () => { + const { model } = createProviderModel({ + headers: {}, + ignoreSSL: false, + key: "test-key", + mode: "http", + model: "gpt-4o-mini", + options: {}, + prompt: "test", + provider: "openai-responses", + providerOptions: {}, + url: "https://api.openai.com/v1", + }); + expect(model).toBeDefined(); + }); + + test("anthropic provider 使用 messages 路径", () => { + const { model } = createProviderModel({ + headers: {}, + ignoreSSL: false, + key: "test-key", + mode: "http", + model: "claude-3-5-haiku-20241022", + options: {}, + prompt: "test", + provider: "anthropic", + providerOptions: {}, + url: "https://api.anthropic.com/v1", + }); + expect(model).toBeDefined(); + }); + + test("anthropic authToken 映射到 Authorization header", () => { + const { model } = createProviderModel({ + authToken: "my-bearer-token", + headers: {}, + ignoreSSL: false, + key: "", + mode: "http", + model: "claude-3-5-haiku-20241022", + options: {}, + prompt: "test", + provider: "anthropic", + providerOptions: {}, + url: "https://api.anthropic.com/v1", + }); + expect(model).toBeDefined(); + }); +}); + +describe("LLM observation - generateText", () => { + test("构建非流式 observation", () => { + const observation = buildObservationFromGenerateText( + "openai", + "gpt-4o-mini", + "http", + { + finishReason: "stop", + rawFinishReason: "stop", + text: "OK", + usage: { inputTokens: 12, outputTokens: 2, totalTokens: 14 }, + }, + { headers: { "content-type": "application/json" }, status: 200, statusText: "OK" }, + ); + + expect(observation.provider).toBe("openai"); + expect(observation.model).toBe("gpt-4o-mini"); + expect(observation.mode).toBe("http"); + expect(observation.outputText).toBe("OK"); + expect(observation.finishReason).toBe("stop"); + expect(observation.rawFinishReason).toBe("stop"); + expect(observation.usage).toEqual({ inputTokens: 12, outputTokens: 2, totalTokens: 14 }); + expect(observation.stream).toBeNull(); + expect(observation.http?.status).toBe(200); + }); + + test("rawFinishReason 为 undefined 时转为 null", () => { + const observation = buildObservationFromGenerateText( + "openai", + "gpt-4o-mini", + "http", + { + finishReason: "stop", + rawFinishReason: undefined, + text: "OK", + usage: { inputTokens: 5, outputTokens: 1 }, + }, + null, + ); + expect(observation.rawFinishReason).toBeNull(); + }); + + test("usage totalTokens 缺失时自动计算", () => { + const observation = buildObservationFromGenerateText( + "openai", + "gpt-4o-mini", + "http", + { + finishReason: "stop", + rawFinishReason: "stop", + text: "OK", + usage: { inputTokens: 10, outputTokens: 3 }, + }, + null, + ); + expect(observation.usage?.totalTokens).toBe(13); + }); +}); + +describe("LLM observation - APICallError", () => { + test("带 statusCode 的 APICallError 构建 http metadata", async () => { + const { APICallError } = await import("ai"); + const error = new APICallError({ + message: "Unauthorized", + requestBodyValues: {}, + responseBody: '{"error":{"message":"Invalid API key"}}', + responseHeaders: { "content-type": "application/json" }, + statusCode: 401, + url: "https://api.openai.com/v1/chat/completions", + }); + + const observation = buildObservationFromApiCallError(error, "openai", "gpt-4o-mini", "http"); + expect(observation.http?.status).toBe(401); + expect(observation.http?.headers).toEqual({ "content-type": "application/json" }); + expect(observation.outputText).toBeNull(); + expect(observation.finishReason).toBeNull(); + expect(observation.usage).toBeNull(); + }); +}); + +describe("LLM observation - streamText", () => { + test("消费 fullStream 构建流式 observation", async () => { + const parts = [ + { textDelta: "Hello", type: "text-delta" }, + { textDelta: " world", type: "text-delta" }, + { + finishReason: "stop", + rawFinishReason: "stop", + totalUsage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 }, + type: "finish", + usage: { inputTokens: 10, outputTokens: 5 }, + }, + ]; + + async function* fakeStream() { + for (const part of parts) { + yield await Promise.resolve(part); + } + } + + const observation = await buildObservationFromStreamText( + "openai", + "gpt-4o-mini", + "stream", + fakeStream(), + { headers: {}, status: 200, statusText: "OK" }, + performance.now() - 100, + ); + + expect(observation.outputText).toBe("Hello world"); + expect(observation.stream?.completed).toBe(true); + expect(observation.stream?.firstTokenMs).not.toBeNull(); + expect(observation.finishReason).toBe("stop"); + expect(observation.rawFinishReason).toBe("stop"); + expect(observation.usage?.totalTokens).toBe(15); + }); + + test("空 text-delta 不触发 firstTokenMs", async () => { + const parts = [ + { textDelta: "", type: "text-delta" }, + { textDelta: "OK", type: "text-delta" }, + { finishReason: "stop", type: "finish", usage: { inputTokens: 5, outputTokens: 1 } }, + ]; + + async function* fakeStream() { + for (const part of parts) { + yield await Promise.resolve(part); + } + } + + const observation = await buildObservationFromStreamText( + "openai", + "gpt-4o-mini", + "stream", + fakeStream(), + null, + performance.now(), + ); + expect(observation.stream?.firstTokenMs).not.toBeNull(); + expect(observation.outputText).toBe("OK"); + }); + + test("error part 添加到 warnings", async () => { + const parts = [ + { error: new Error("stream broken"), type: "error" }, + { finishReason: "error", type: "finish", usage: { inputTokens: 5, outputTokens: 0 } }, + ]; + + async function* fakeStream() { + for (const part of parts) { + yield await Promise.resolve(part); + } + } + + const observation = await buildObservationFromStreamText( + "openai", + "gpt-4o-mini", + "stream", + fakeStream(), + null, + performance.now(), + ); + expect(observation.warnings).toContain("stream broken"); + }); +}); diff --git a/tests/server/checker/runner/llm/registry.test.ts b/tests/server/checker/runner/llm/registry.test.ts new file mode 100644 index 0000000..f110722 --- /dev/null +++ b/tests/server/checker/runner/llm/registry.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, test } from "bun:test"; + +import { checkerRegistry } from "../../../../../src/server/checker/runner"; + +describe("LLM registry integration", () => { + test("registry 包含 llm 类型", () => { + expect(checkerRegistry.supportedTypes).toContain("llm"); + }); + + test("llm checker 可获取", () => { + const checker = checkerRegistry.tryGet("llm"); + expect(checker).toBeDefined(); + expect(checker!.type).toBe("llm"); + expect(checker!.configKey).toBe("llm"); + }); + + test("llm checker schemas 有效", () => { + const checker = checkerRegistry.get("llm"); + expect(checker.schemas.config).toBeDefined(); + expect(checker.schemas.defaults).toBeDefined(); + expect(checker.schemas.expect).toBeDefined(); + }); + + test("llm checker validate 方法可用", () => { + const checker = checkerRegistry.get("llm"); + const issues = checker.validate({ + defaults: {}, + targets: [], + }); + expect(issues).toHaveLength(0); + }); +}); diff --git a/tests/server/checker/runner/llm/schema-validate-resolve.test.ts b/tests/server/checker/runner/llm/schema-validate-resolve.test.ts new file mode 100644 index 0000000..ed1bfbc --- /dev/null +++ b/tests/server/checker/runner/llm/schema-validate-resolve.test.ts @@ -0,0 +1,384 @@ +import { describe, expect, test } from "bun:test"; + +import type { ResolvedLlmTarget } from "../../../../../src/server/checker/runner/llm/types"; +import type { ResolveContext } from "../../../../../src/server/checker/runner/types"; +import type { RawTargetConfig } from "../../../../../src/server/checker/types"; + +import { checkerRegistry } from "../../../../../src/server/checker/runner"; +import { validateLlmConfig } from "../../../../../src/server/checker/runner/llm/validate"; + +interface SerializedConfig { + headers: Record; + ignoreSSL: boolean; + key: string; + mode: string; + model: string; + options: Record; + prompt: string; + provider: string; + providerOptions: Record; + url: string; +} + +function asLlm(resolved: ReturnType["resolve"]>): ResolvedLlmTarget { + return resolved as ResolvedLlmTarget; +} + +function makeRawTarget(overrides?: Partial): RawTargetConfig { + return { + id: "test-llm", + llm: { + model: "gpt-4o-mini", + prompt: "Say OK", + provider: "openai", + url: "https://api.openai.com/v1", + }, + type: "llm", + ...overrides, + }; +} + +function makeResolveContext(overrides?: Partial): ResolveContext { + return { + configDir: "/tmp", + defaultIntervalMs: 30000, + defaults: {}, + defaultTimeoutMs: 10000, + ...overrides, + }; +} + +function parseSerializedConfig(json: string): SerializedConfig { + return JSON.parse(json) as SerializedConfig; +} + +describe("LlmChecker schema", () => { + const checker = checkerRegistry.tryGet("llm"); + + test("llm checker 注册到 registry", () => { + expect(checker).toBeDefined(); + expect(checker?.type).toBe("llm"); + expect(checker?.configKey).toBe("llm"); + }); + + test("schemas 包含 config、defaults、expect", () => { + expect(checker).toBeDefined(); + expect(Object.keys(checker!.schemas).sort()).toEqual(["config", "defaults", "expect"].sort()); + }); +}); + +describe("LlmChecker validate", () => { + test("合法 LLM target 无校验问题", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [makeRawTarget()], + }); + expect(issues).toHaveLength(0); + }); + + test("provider 非法报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "m", prompt: "p", provider: "gemini", url: "https://x" }, + }), + ], + }); + expect(issues.length).toBeGreaterThan(0); + expect(issues.some((i) => i.code === "invalid-type" && i.path.includes("provider"))).toBe(true); + }); + + test("url 非法报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "m", prompt: "p", provider: "openai", url: "ftp://bad" }, + }), + ], + }); + expect(issues.length).toBeGreaterThan(0); + expect(issues.some((i) => i.code === "invalid-url")).toBe(true); + }); + + test("model 为空报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.length).toBeGreaterThan(0); + expect(issues.some((i) => i.path.includes("model"))).toBe(true); + }); + + test("prompt 为空报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "m", prompt: "", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.length).toBeGreaterThan(0); + expect(issues.some((i) => i.path.includes("prompt"))).toBe(true); + }); + + test("mode 非法报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { mode: "batch", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.length).toBeGreaterThan(0); + expect(issues.some((i) => i.path.includes("mode"))).toBe(true); + }); + + test("openai provider 不允许 authToken", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { authToken: "tok", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.code === "invalid-auth")).toBe(true); + }); + + test("anthropic 同时配置 key 和 authToken 报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { authToken: "tok", key: "k", model: "m", prompt: "p", provider: "anthropic", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.code === "auth-conflict")).toBe(true); + }); + + test("ignoreSSL 非布尔值报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { ignoreSSL: "yes", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.path.includes("ignoreSSL"))).toBe(true); + }); + + test("options.maxOutputTokens 非正整数报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "m", options: { maxOutputTokens: -1 }, prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.path.includes("maxOutputTokens"))).toBe(true); + }); + + test("options.stopSequences 非字符串数组报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + llm: { model: "m", options: { stopSequences: [123] }, prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.path.includes("stopSequences"))).toBe(true); + }); + + test("expect.output 缺少规则类型报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [makeRawTarget({ expect: { output: [{}] } })], + }); + expect(issues.some((i) => i.code === "missing-body-rule")).toBe(true); + }); + + test("expect.output 同时多种规则类型报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [makeRawTarget({ expect: { output: [{ contains: "y", equals: "x" }] } })], + }); + expect(issues.some((i) => i.code === "multiple-body-rules")).toBe(true); + }); + + test("expect.output regex ReDoS 报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [makeRawTarget({ expect: { output: [{ regex: "(a+)+" }] } })], + }); + expect(issues.some((i) => i.code === "unsafe-regex")).toBe(true); + }); + + test("expect.stream 在 mode:http 下报错", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + expect: { stream: { completed: true } }, + llm: { mode: "http", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues.some((i) => i.message.includes("stream mode"))).toBe(true); + }); + + test("expect.stream 在 mode:stream 下合法", () => { + const issues = validateLlmConfig({ + defaults: {}, + targets: [ + makeRawTarget({ + expect: { stream: { completed: true } }, + llm: { mode: "stream", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }), + ], + }); + expect(issues).toHaveLength(0); + }); + + test("defaults.llm 合法配置", () => { + const issues = validateLlmConfig({ + defaults: { + llm: { headers: { "X-Custom": "val" }, ignoreSSL: false, mode: "http", options: { maxOutputTokens: 32 } }, + }, + targets: [makeRawTarget()], + }); + expect(issues).toHaveLength(0); + }); + + test("defaults.llm mode 非法报错", () => { + const issues = validateLlmConfig({ + defaults: { llm: { mode: "batch" } }, + targets: [makeRawTarget()], + }); + expect(issues.some((i) => i.path.includes("defaults.llm.mode"))).toBe(true); + }); +}); + +describe("LlmChecker resolve", () => { + const checker = checkerRegistry.tryGet("llm")!; + + test("最简 target 填充默认值", () => { + const resolved = asLlm(checker.resolve(makeRawTarget(), makeResolveContext())); + expect(resolved.type).toBe("llm"); + expect(resolved.llm.mode).toBe("http"); + expect(resolved.llm.key).toBe(""); + expect(resolved.llm.ignoreSSL).toBe(false); + expect(resolved.llm.options.maxOutputTokens).toBe(16); + expect(resolved.llm.options.temperature).toBe(0); + expect(resolved.group).toBe("default"); + expect(resolved.intervalMs).toBe(30000); + expect(resolved.timeoutMs).toBe(10000); + }); + + test("defaults.llm 与 target.llm 浅合并", () => { + const raw = makeRawTarget({ + llm: { + headers: { Authorization: "Bearer test" }, + model: "gpt-4o-mini", + prompt: "Say OK", + provider: "openai", + url: "https://api.openai.com/v1", + }, + }); + const ctx = makeResolveContext({ + defaults: { + llm: { + headers: { "X-Custom": "default" }, + ignoreSSL: true, + mode: "stream", + options: { maxOutputTokens: 64, temperature: 0.5 }, + }, + }, + }); + const resolved = asLlm(checker.resolve(raw, ctx)); + expect(resolved.llm.mode).toBe("stream"); + expect(resolved.llm.ignoreSSL).toBe(true); + expect(resolved.llm.headers).toEqual({ Authorization: "Bearer test", "X-Custom": "default" }); + expect(resolved.llm.options.maxOutputTokens).toBe(64); + expect(resolved.llm.options.temperature).toBe(0.5); + }); + + test("target 字段覆盖 defaults", () => { + const raw = makeRawTarget({ + llm: { + ignoreSSL: false, + mode: "http", + model: "gpt-4o-mini", + options: { maxOutputTokens: 8 }, + prompt: "Say OK", + provider: "openai", + url: "https://api.openai.com/v1", + }, + }); + const ctx = makeResolveContext({ + defaults: { + llm: { + ignoreSSL: true, + mode: "stream", + options: { maxOutputTokens: 64 }, + }, + }, + }); + const resolved = asLlm(checker.resolve(raw, ctx)); + expect(resolved.llm.mode).toBe("http"); + expect(resolved.llm.ignoreSSL).toBe(false); + expect(resolved.llm.options.maxOutputTokens).toBe(8); + }); + + test("serialize 返回正确格式", () => { + const resolved = asLlm(checker.resolve(makeRawTarget(), makeResolveContext())); + const serialized = checker.serialize(resolved); + expect(serialized.target).toBe("openai:gpt-4o-mini @ https://api.openai.com/v1"); + const config = parseSerializedConfig(serialized.config); + expect(config.provider).toBe("openai"); + expect(config.key).toBe(""); + expect(config.model).toBe("gpt-4o-mini"); + }); + + test("serialize 隐藏 key", () => { + const raw = makeRawTarget({ + llm: { key: "sk-secret-key", model: "m", prompt: "p", provider: "openai", url: "https://x" }, + }); + const resolved = asLlm(checker.resolve(raw, makeResolveContext())); + const serialized = checker.serialize(resolved); + const config = parseSerializedConfig(serialized.config); + expect(config.key).toBe("***"); + }); + + test("providerOptions 浅合并", () => { + const raw = makeRawTarget({ + llm: { + model: "m", + prompt: "p", + provider: "openai", + providerOptions: { openai: { store: true } }, + url: "https://x", + }, + }); + const ctx = makeResolveContext({ + defaults: { + llm: { + providerOptions: { openai: { user: "default-user" } }, + }, + }, + }); + const resolved = asLlm(checker.resolve(raw, ctx)); + expect(resolved.llm.providerOptions).toEqual({ openai: { store: true } }); + }); +}); diff --git a/tests/server/checker/runner/registry.test.ts b/tests/server/checker/runner/registry.test.ts index 6919242..9571252 100644 --- a/tests/server/checker/runner/registry.test.ts +++ b/tests/server/checker/runner/registry.test.ts @@ -66,8 +66,8 @@ describe("CheckerRegistry", () => { const second = createDefaultCheckerRegistry(); first.register(createChecker("custom")); - expect(first.supportedTypes).toEqual(["http", "cmd", "db", "tcp", "ping", "udp", "custom"]); - expect(second.supportedTypes).toEqual(["http", "cmd", "db", "tcp", "ping", "udp"]); + expect(first.supportedTypes).toEqual(["http", "cmd", "db", "tcp", "ping", "udp", "llm", "custom"]); + expect(second.supportedTypes).toEqual(["http", "cmd", "db", "tcp", "ping", "udp", "llm"]); expect( first.definitions.every( (checker) => checker.schemas.config && checker.schemas.defaults && checker.schemas.expect,