diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..143fa50 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,350 @@ +# 兼容性检测脚本 + +## 概述 + +本目录包含一组用于检测 LLM API 网关对 **OpenAI** 和 **Anthropic** 协议兼容性的测试脚本。通过向目标服务发送一系列结构化请求,验证响应格式、字段类型、错误处理等是否符合协议规范。 + +## 脚本结构 + +``` +scripts/ +├── core.py # 公共基础设施 +├── detect_openai.py # OpenAI 兼容协议测试 +└── detect_anthropic.py # Anthropic 兼容协议测试 +``` + +### core.py — 公共模块 + +提供所有检测脚本共享的基础功能: + +| 函数/类 | 说明 | +|---------|------| +| `TestCase` | 测试用例数据类(URL、方法、请求头、请求体、验证器) | +| `TestResult` | 测试结果数据类(状态码、耗时、错误类型、响应内容) | +| `http_request()` | 普通 HTTP 请求(支持重试、自动 JSON 序列化) | +| `http_stream_request()` | 流式 HTTP 请求(SSE,支持重试) | +| `parse_sse_events()` | 从 SSE 响应文本中提取 `data:` 事件列表 | +| `create_ssl_context()` | 创建不验证证书的 SSL 上下文(测试环境用) | +| `run_test()` | 执行单个用例并打印结构化输出 | +| `run_test_suite()` | 执行完整测试套件并打印统计摘要 | +| `check_required_fields()` | 检查必需字段(通用验证辅助) | +| `check_field_type()` | 检查字段类型(通用验证辅助) | +| `check_enum_value()` | 检查枚举值(通用验证辅助) | +| `check_array_items_type()` | 检查数组元素类型(通用验证辅助) | +| `validate_response_structure()` | 组合上述函数的通用验证器 | + +**注意**:`core.py` 只包含协议无关的通用功能。每个协议独有的响应验证函数应定义在各自的检测脚本中(如 `validate_openai_chat_completion_response` 在 `detect_openai.py` 中)。 + +### detect_openai.py — OpenAI 兼容测试 + +检测目标服务对 OpenAI Chat Completions API 的兼容程度。 + +**覆盖的 API 端点:** +- `GET /models` — 模型列表 +- `GET /models/{model}` — 模型详情 +- `POST /chat/completions` — 对话补全 + +**测试类别:** +- **正面用例**:基本对话、system/developer 角色、多轮对话、参数组合(temperature、top_p、seed、penalty、stop、n、max_tokens、max_completion_tokens、logit_bias、reasoning_effort、service_tier、verbosity、response_format) +- **扩展功能**:`--vision`(图片输入)、`--stream`(流式响应)、`--tools`(工具调用)、`--logprobs`(对数概率)、`json_schema`(结构化输出) +- **负面用例**:缺参数、空消息、无效认证、不存在的模型、畸形 JSON、max_tokens 负数/0、temperature 越界 + +**响应验证:** +- Models List:检查 `object: "list"`、`data` 数组中每个模型的 `id`、`object`、`created`、`owned_by` +- Model Retrieve:检查 `id`、`object: "model"`、`created`、`owned_by` +- Chat Completion:检查 `id`、`object: "chat.completion"`、`created`、`model`、`choices` 数组结构、`usage` 对象 + +### detect_anthropic.py — Anthropic 兼容测试 + +检测目标服务对 Anthropic Messages API 的兼容程度。 + +**覆盖的 API 端点:** +- `GET /v1/models` — 模型列表 +- `GET /v1/models/{model}` — 模型详情 +- `POST /v1/messages` — 消息对话 +- `POST /v1/messages/count_tokens` — Token 计数 + +**测试类别:** +- **正面用例**:基本对话、system prompt(字符串/数组格式)、多轮对话、assistant prefill、content 数组格式、参数组合(temperature、top_p、top_k、max_tokens、stop_sequences、metadata) +- **扩展功能**:`--vision`(图片输入)、`--stream`(流式响应)、`--tools`(工具调用)、`--thinking`(扩展思维) +- **负面用例**:缺 header、无效认证、缺参数、空消息、畸形 JSON、非法 role、max_tokens 负数/0、temperature 越界 + +**响应验证:** +- Models List:检查 `data`、`has_more`、每个模型的 `id`、`type: "model"`、`display_name`、`created_at` +- Model Retrieve:检查 `id`、`type: "model"`、`display_name`、`created_at` +- Messages:检查 `id`、`type: "message"`、`role: "assistant"`、`content` 数组、`model`、`usage` +- Count Tokens:检查 `input_tokens` 为数字 + +## 使用方式 + +### 基本用法 + +```bash +# OpenAI 兼容测试 +python3 scripts/detect_openai.py --base_url http://localhost:9826/v1 + +# Anthropic 兼容测试 +python3 scripts/detect_anthropic.py --base_url http://localhost:9826 +``` + +### 带认证 + +```bash +python3 scripts/detect_openai.py --base_url http://localhost:9826/v1 --api_key sk-xxx --model gpt-4o + +python3 scripts/detect_anthropic.py --base_url http://localhost:9826 --api_key sk-xxx --model claude-sonnet-4-5 +``` + +### 扩展测试 + +```bash +# 开启所有扩展测试 +python3 scripts/detect_openai.py --base_url http://localhost:9826/v1 --all + +python3 scripts/detect_anthropic.py --base_url http://localhost:9826 --all + +# 单独开启某项 +python3 scripts/detect_openai.py --base_url http://localhost:9826/v1 --stream --tools + +python3 scripts/detect_anthropic.py --base_url http://localhost:9826 --stream --tools --thinking +``` + +### 命令行参数 + +| 参数 | 说明 | 默认值 | +|------|------|--------| +| `--base_url` | API 基础地址(必填) | — | +| `--api_key` | API 密钥 | 空 | +| `--model` | 测试使用的模型名称 | `gpt-4o` / `claude-sonnet-4-5` | +| `--vision` | 执行视觉相关测试 | 关闭 | +| `--stream` | 执行流式响应测试 | 关闭 | +| `--tools` | 执行工具调用测试 | 关闭 | +| `--logprobs` | 执行 logprobs 测试(仅 OpenAI) | 关闭 | +| `--json_schema` | 执行 Structured Output 测试(仅 OpenAI) | 关闭 | +| `--thinking` | 执行扩展思维测试(仅 Anthropic) | 关闭 | +| `--all` | 开启所有扩展测试 | 关闭 | + +## 输出示例 + +``` +Anthropic 兼容性测试 +目标: http://localhost:9826 +模型: claude-sonnet-4-5 +时间: 2026-04-21 10:30:00 +用例: 35 个 | 扩展: stream, tools + +[1/35] 获取模型列表 (GET /v1/models) + +URL: GET http://localhost:9826/v1/models + +Headers: + x-api-key: sk-xxx + anthropic-version: 2023-06-01 + +响应 (200, 0.12s): +{ + "data": [...], + "has_more": false +} +✓ 响应验证通过 + +[5/35] 基本对话(仅 user) + +URL: POST http://localhost:9826/v1/messages + +Headers: + x-api-key: sk-xxx + Content-Type: application/json + +入参: +{ + "model": "claude-sonnet-4-5", + "max_tokens": 5, + "messages": [{"role": "user", "content": "Hi"}] +} + +响应 (200, 0.23s): +{ + "id": "msg_xxx", + "type": "message", + "role": "assistant", + "content": [...], + "model": "claude-sonnet-4-5", + "usage": {"input_tokens": 10, "output_tokens": 5} +} +✓ 响应验证通过 + +测试完成 | 总计: 35 | 成功: 33 | 客户端错误: 2 | 服务端错误: 0 | 网络错误: 0 +``` + +## 测试设计原则 + +1. **所有正面用例都启用响应验证器** — 任何响应结构偏差都会立即暴露,避免掩盖错误 +2. **负面用例覆盖常见错误场景** — 缺参数、类型错误、范围越界、认证失败 +3. **扩展功能通过 flag 按需开启** — 避免在基础测试中引入不必要的依赖 +4. **验证器基于协议规范编写** — 严格检查必需字段、类型、枚举值 +5. **流式与非流式覆盖一致** — 流式只是传输方式不同,功能覆盖范围应完全对应(见下文) + +## 新增检测脚本开发流程 + +如需为新的协议(如 Google Gemini、Cohere 等)开发检测脚本,遵循以下流程: + +### 1. 在新脚本中定义协议专用的验证函数 + +每个协议的响应结构是独特的,验证函数应定义在各自的脚本中,不要放入 `core.py`。例如: + +```python +# 在 detect_gemini.py 中 +def validate_gemini_generate_content_response(response_text: str) -> Tuple[bool, List[str]]: + """验证 Gemini GenerateContent 响应""" + errors = [] + try: + data = json.loads(response_text) + except json.JSONDecodeError as e: + return False, [f"响应不是有效的JSON: {e}"] + + # 检查 Gemini 特有的字段 + required_fields = ["candidates", "usageMetadata"] + for field in required_fields: + if field not in data: + errors.append(f"缺少必需字段: {field}") + ... + return len(errors) == 0, errors +``` + +### 2. 在 `core.py` 中只添加通用验证辅助 + +只有当多个协议都需要相同的验证逻辑时,才将函数提取到 `core.py`。目前已有的通用函数: + +| 函数 | 说明 | +|------|------| +| `check_required_fields()` | 检查必需字段是否存在 | +| `check_field_type()` | 检查字段类型 | +| `check_enum_value()` | 检查枚举值 | +| `check_array_items_type()` | 检查数组元素类型 | +| `validate_response_structure()` | 组合上述函数的通用验证器 | +| `parse_sse_events()` | 从 SSE 响应文本中提取 `data:` 事件 | + +### 3. 创建检测脚本模板 + +```python +#!/usr/bin/env python3 +"""新协议兼容性接口测试脚本""" + +import json +import argparse +from typing import Dict, List, Tuple, Any +from core import ( + create_ssl_context, + TestCase, + run_test_suite, + validate_response_structure, +) + +def build_headers(api_key: str) -> Dict[str, str]: + """构建请求头""" + ... + +def validate_xxx_response(response_text: str) -> Tuple[bool, List[str]]: + """验证响应结构(协议专用)""" + ... + +def validate_xxx_streaming_response(response_text: str) -> Tuple[bool, List[str]]: + """验证流式响应结构(协议专用)""" + from core import parse_sse_events + ... + +def main(): + parser = argparse.ArgumentParser(...) + parser.add_argument("--base_url", required=True, ...) + parser.add_argument("--api_key", default="", ...) + parser.add_argument("--model", default="...", ...) + parser.add_argument("--stream", action="store_true", ...) + parser.add_argument("--all", action="store_true", ...) + args = parser.parse_args() + + cases: List[TestCase] = [] + + # ---- 共享定义(供流式和非流式用例共同使用)---- + # 将 tool、image_url 等定义放在所有功能块之前, + # 避免流式和非流式块中重复定义 + tool_xxx = { ... } + image_url = "..." + + # ==== 非流式正面用例(都添加 validator)==== + cases.append(TestCase( + desc="...", method="...", url=..., headers=..., body=..., + validator=validate_xxx_response + )) + + # ==== 非流式负面用例(不添加 validator)==== + cases.append(TestCase(desc="...", method="...", url=..., headers=..., body=...)) + + # ==== --stream ==== + if args.stream: + # 核心对话流式用例:每个非流式正面用例都应有对应的流式版本 + # 仅传输方式不同(stream=True, stream=True), + # 功能覆盖(参数、角色、多轮等)必须与非流式一致 + cases.append(TestCase( + desc="流式...", method="POST", url=..., headers=headers, + body={ ..., "stream": True }, + stream=True, + validator=validate_xxx_streaming_response + )) + + # 流式 + 其他 flag 组合(放在 --stream 块内部) + if args.vision: + cases.append(TestCase( + desc="流式图片输入 (--stream + --vision)", + ..., + stream=True, + validator=validate_xxx_streaming_response + )) + if args.tools: + cases.append(TestCase( + desc="流式工具调用 (--stream + --tools)", + ..., + stream=True, + validator=validate_xxx_streaming_response + )) + + run_test_suite(cases=cases, ssl_ctx=ssl_ctx, title="...", base_url=..., model=..., flags=...) + +if __name__ == "__main__": + main() +``` + +### 关键要点 + +- **协议专用验证函数放在各自的脚本中** — 不要污染 `core.py` +- **只有多协议通用的验证逻辑才提取到 `core.py`** — 遵循 DRY 原则但不过度抽象 +- **所有正面用例必须添加 validator** — 确保响应结构正确 +- **负面用例不添加 validator** — 预期返回错误响应 +- **扩展功能用 flag 控制** — 保持基础测试轻量 +- **遵循现有命名和代码风格** — 中文注释、类型注解、dataclass 使用 + +### 流式测试覆盖原则 + +流式(SSE)与非流式只是数据传输方式不同,服务端对请求参数的处理逻辑应完全一致。因此: + +1. **每个非流式正面用例都应有对应的流式版本** — 包括不同的消息角色组合、参数组合、工具调用等 +2. **共享定义提前声明** — `tool`、`image_url`、`json_schema` 等定义放在所有功能块之前,流式和非流式共用同一实例,避免重复定义 +3. **flag 组合放在 `--stream` 块内部** — 流式+工具、流式+视觉等组合用例放在 `if args.stream:` 内部的 `if args.tools:` / `if args.vision:` 子块中,不需要单独的组合 flag +4. **负面用例不需要流式版本** — 参数校验发生在请求处理之前,与传输方式无关 +5. **Models API 等非 Chat 端点不需要流式测试** — 它们本身不支持流式传输 + +| 用例类别 | 非流式 | 流式 | +|----------|--------|------| +| 基本对话 / 多轮对话 | ✓ | ✓ | +| 消息角色组合(system, developer 等) | ✓ | ✓ | +| 参数组合(temperature, top_p, max_tokens 等) | ✓ | ✓ | +| 工具调用(tool_choice 各模式) | ✓ | ✓(在 `--stream` 块内检查 `--tools`) | +| 视觉(图片输入) | ✓ | ✓(在 `--stream` 块内检查 `--vision`) | +| 扩展思维 / Logprobs 等特性 | ✓ | ✓(在 `--stream` 块内检查对应 flag) | +| 高级参数(service_tier, reasoning_effort 等) | ✓ | ✓ | +| 负面用例(缺参数、越界、认证失败) | ✓ | ✗(参数校验与传输方式无关) | +| Models API(GET 端点) | ✓ | ✗(不支持流式) | + +## 许可证 + +MIT diff --git a/scripts/core.py b/scripts/core.py index a852946..fffc48f 100644 --- a/scripts/core.py +++ b/scripts/core.py @@ -140,7 +140,8 @@ def http_stream_request( headers: Optional[Dict[str, str]] = None, body: Optional[Any] = None, ssl_ctx: Optional[ssl.SSLContext] = None, - retries: int = MAX_RETRIES + retries: int = MAX_RETRIES, + method: str = "POST" ) -> TestResult: """执行流式 HTTP 请求 (SSE,支持重试) @@ -150,11 +151,12 @@ def http_stream_request( body: 请求体 (dict) ssl_ctx: SSL 上下文 retries: 重试次数 + method: HTTP 方法 (默认 POST) Returns: TestResult 对象 """ - req = urllib.request.Request(url, method="POST") + req = urllib.request.Request(url, method=method) if headers: for k, v in headers.items(): req.add_header(k, v) @@ -203,6 +205,25 @@ def http_stream_request( ) +def parse_sse_events(response_text: str) -> List[str]: + """从 SSE 响应文本中解析出所有 data 事件的数据。 + + Args: + response_text: SSE 响应的原始文本 + + Returns: + data 字段内容的列表(已跳过 [DONE]) + """ + events = [] + for line in response_text.split("\n"): + line = line.strip() + if line.startswith("data:"): + data = line[len("data:"):].strip() + if data and data != "[DONE]": + events.append(data) + return events + + def format_json(text: str) -> str: """格式化 JSON 文本(用于美化输出) @@ -237,8 +258,15 @@ def run_test( TestResult 对象 """ print(f"\n[{index}/{total}] {test_case.desc}") - print(f">>> {test_case.method} {test_case.url}") + print(f"\nURL: {test_case.method} {test_case.url}") + + if test_case.headers: + print("\nHeaders:") + for k, v in test_case.headers.items(): + print(f" {k}: {v}") + if test_case.body is not None: + print("\n入参:") if isinstance(test_case.body, str): print(test_case.body) else: @@ -261,18 +289,16 @@ def run_test( ) if result.status is not None: - print(f"状态码: {result.status} | 耗时: {result.elapsed:.2f}s") + print(f"\n响应 ({result.status}, {result.elapsed:.2f}s):") else: - print(f"请求失败 | 耗时: {result.elapsed:.2f}s") + print(f"\n请求失败 ({result.elapsed:.2f}s):") if test_case.stream and result.status and result.status < 300: - # 流式响应按 SSE 行逐行输出 for line in result.response.split("\n"): print(line) else: print(format_json(result.response)) - # 执行响应验证 if test_case.validator and result.status and 200 <= result.status < 300: is_valid, errors = test_case.validator(result.response) if is_valid: @@ -312,8 +338,7 @@ def run_test_suite( count_server_error = 0 count_network_error = 0 - print("=" * 60) - print(title) + print(f"\n{title}") print(f"目标: {base_url}") print(f"模型: {model}") print(f"时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") @@ -321,7 +346,7 @@ def run_test_suite( print(f"用例: {total} 个 | 扩展: {', '.join(flags)}") else: print(f"用例: {total} 个") - print("=" * 60) + print() for i, test_case in enumerate(cases, 1): result = run_test(i, total, test_case, ssl_ctx) @@ -336,11 +361,9 @@ def run_test_suite( count_network_error += 1 print() - print("=" * 60) print(f"测试完成 | 总计: {total} | 成功: {count_success} | " f"客户端错误: {count_client_error} | 服务端错误: {count_server_error} | " f"网络错误: {count_network_error}") - print("=" * 60) return total, count_success, count_client_error, count_server_error diff --git a/scripts/anthropic_detect.py b/scripts/detect_anthropic.py similarity index 69% rename from scripts/anthropic_detect.py rename to scripts/detect_anthropic.py index 7ab1236..e314ca7 100644 --- a/scripts/anthropic_detect.py +++ b/scripts/detect_anthropic.py @@ -18,7 +18,6 @@ from core import ( TestCase, run_test_suite, validate_response_structure, - format_validation_errors ) ANTHROPIC_VERSION = "2023-06-01" @@ -170,14 +169,121 @@ def validate_anthropic_count_tokens_response(response_text: str) -> Tuple[bool, """验证 Anthropic Count Tokens 响应 根据API文档,响应应包含: - - input_tokens: number + - message_tokens_count: object { input_tokens } """ - required_fields = ["input_tokens"] - field_types = { - "input_tokens": (int, float) - } + errors = [] - return validate_response_structure(response_text, required_fields, field_types) + try: + data = json.loads(response_text) + except json.JSONDecodeError as e: + return False, [f"响应不是有效的JSON: {e}"] + + # 检查嵌套结构 + if "message_tokens_count" not in data: + errors.append("缺少必需字段: message_tokens_count") + else: + mtc = data["message_tokens_count"] + if not isinstance(mtc, dict): + errors.append(f"字段 'message_tokens_count' 类型错误: 期望 object, 实际 {type(mtc).__name__}") + else: + if "input_tokens" not in mtc: + errors.append("message_tokens_count 缺少必需字段: input_tokens") + elif not isinstance(mtc["input_tokens"], (int, float)): + errors.append(f"message_tokens_count.input_tokens 类型错误: 期望 number, 实际 {type(mtc['input_tokens']).__name__}") + + return len(errors) == 0, errors + + +def validate_anthropic_streaming_response(response_text: str) -> Tuple[bool, List[str]]: + """验证 Anthropic 流式响应 + + 流式响应使用 SSE 格式,每行以 "data: " 开头。 + 事件类型包括:message_start, content_block_start, content_block_delta, content_block_stop, message_delta, message_stop + + 验证要点: + - 每个事件是有效的 JSON + - 包含 message_start 和 message_stop 事件 + - message_start 事件包含完整的 message 对象 + + Args: + response_text: SSE 格式的响应文本 + + Returns: + (是否验证通过, 错误信息列表) + """ + from core import parse_sse_events + + errors = [] + events = parse_sse_events(response_text) + + if not events: + errors.append("未收到任何 SSE 事件") + return False, errors + + has_message_start = False + has_message_stop = False + + for i, event_data in enumerate(events): + try: + event = json.loads(event_data) + except json.JSONDecodeError as e: + errors.append(f"事件[{i}] 不是有效的JSON: {e}") + continue + + if "type" not in event: + errors.append(f"事件[{i}] 缺少必需字段: type") + continue + + event_type = event["type"] + + if event_type == "message_start": + has_message_start = True + if "message" not in event: + errors.append(f"message_start 事件缺少 message 字段") + elif not isinstance(event["message"], dict): + errors.append(f"message_start 事件的 message 不是对象") + else: + msg = event["message"] + if "id" not in msg: + errors.append(f"message_start.message 缺少 id 字段") + if "type" not in msg: + errors.append(f"message_start.message 缺少 type 字段") + elif msg["type"] != "message": + errors.append(f"message_start.message.type 值错误: 期望 'message', 实际 '{msg['type']}'") + if "role" not in msg: + errors.append(f"message_start.message 缺少 role 字段") + elif msg["role"] != "assistant": + errors.append(f"message_start.message.role 值错误: 期望 'assistant', 实际 '{msg['role']}'") + if "content" not in msg: + errors.append(f"message_start.message 缺少 content 字段") + elif not isinstance(msg["content"], list): + errors.append(f"message_start.message.content 类型错误: 期望 list") + + elif event_type == "message_stop": + has_message_stop = True + + elif event_type == "content_block_start": + if "index" not in event: + errors.append(f"content_block_start 事件缺少 index 字段") + if "content_block" not in event: + errors.append(f"content_block_start 事件缺少 content_block 字段") + elif not isinstance(event["content_block"], dict): + errors.append(f"content_block_start 事件的 content_block 不是对象") + else: + cb = event["content_block"] + if "type" not in cb: + errors.append(f"content_block_start.content_block 缺少 type 字段") + + elif event_type == "content_block_delta": + if "delta" not in event: + errors.append(f"content_block_delta 事件缺少 delta 字段") + + if not has_message_start: + errors.append("缺少 message_start 事件") + if not has_message_stop: + errors.append("缺少 message_stop 事件") + + return len(errors) == 0, errors def main(): @@ -212,6 +318,24 @@ def main(): models_url = f"{base_url}/v1/models" count_tokens_url = f"{base_url}/v1/messages/count_tokens" + # ---- 共享定义(供流式和非流式用例共同使用)---- + image_url = ( + "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/" + "Gfp-wisconsin-madison-the-nature-boardwalk.jpg/" + "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + ) + tool_weather = { + "name": "get_weather", + "description": "获取指定城市的天气", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "城市名称"} + }, + "required": ["location"] + } + } + # --- 收集测试用例 --- cases: List[TestCase] = [] @@ -267,7 +391,8 @@ def main(): "max_tokens": 5, "system": "You are a helpful assistant.", "messages": [{"role": "user", "content": "1+1="}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="system prompt 数组格式(带缓存控制)", @@ -281,7 +406,8 @@ def main(): {"type": "text", "text": "You are a helpful assistant.", "cache_control": {"type": "ephemeral"}} ], "messages": [{"role": "user", "content": "Hi"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="多轮对话(含 assistant 历史)", @@ -296,7 +422,8 @@ def main(): {"role": "assistant", "content": "Hello!"}, {"role": "user", "content": "1+1="} ] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="assistant prefill(部分回复填充)", @@ -310,7 +437,8 @@ def main(): {"role": "user", "content": "What is latin for Ant? (A) Apoidea (B) Rhopalocera (C) Formicidae"}, {"role": "assistant", "content": "The answer is ("} ] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="content 数组格式(多个 text block)", @@ -324,7 +452,8 @@ def main(): {"type": "text", "text": "Hello"}, {"type": "text", "text": "1+1=?"} ]}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="temperature + top_p", @@ -337,7 +466,8 @@ def main(): "temperature": 0.5, "top_p": 0.9, "messages": [{"role": "user", "content": "Hi"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="temperature = 0(类确定性输出)", @@ -349,7 +479,8 @@ def main(): "max_tokens": 5, "temperature": 0, "messages": [{"role": "user", "content": "1+1="}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="top_k 参数", @@ -361,7 +492,8 @@ def main(): "max_tokens": 5, "top_k": 40, "messages": [{"role": "user", "content": "Hi"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="max_tokens 限制", @@ -372,7 +504,8 @@ def main(): "model": model, "max_tokens": 10, "messages": [{"role": "user", "content": "讲一个故事"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="stop_sequences", @@ -384,7 +517,8 @@ def main(): "max_tokens": 20, "stop_sequences": ["5"], "messages": [{"role": "user", "content": "数数: 1,2,3,"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="metadata 参数(user_id)", @@ -396,7 +530,8 @@ def main(): "max_tokens": 5, "metadata": {"user_id": "test-user-001"}, "messages": [{"role": "user", "content": "Hi"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="assistant content 数组格式(text + tool_use 块)", @@ -418,7 +553,8 @@ def main(): ]} ]} ] - } + }, + validator=validate_anthropic_messages_response )) # ==== Count Tokens API ==== @@ -626,71 +762,129 @@ def main(): {"type": "text", "text": "用一个词描述这张图"}, {"type": "image", "source": { "type": "url", - "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/" - "Gfp-wisconsin-madison-the-nature-boardwalk.jpg/" - "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + "url": image_url }} ]}] - } + }, + validator=validate_anthropic_messages_response )) # ==== --stream ==== if args.stream: + # 核心用例 cases.append(TestCase( - desc="基本流式 (--stream)", + desc="流式基本对话", method="POST", url=messages_url, headers=headers, - body={ - "model": model, - "max_tokens": 5, - "stream": True, - "messages": [{"role": "user", "content": "Hi"}] - }, - stream=True + body={"model": model, "max_tokens": 5, "stream": True, "messages": [{"role": "user", "content": "Hi"}]}, + stream=True, + validator=validate_anthropic_streaming_response )) cases.append(TestCase( - desc="流式 + system prompt (--stream)", + desc="流式 + system prompt", method="POST", url=messages_url, headers=headers, - body={ - "model": model, - "max_tokens": 5, - "stream": True, - "system": "Reply in one word.", - "messages": [{"role": "user", "content": "1+1="}] - }, - stream=True + body={"model": model, "max_tokens": 5, "stream": True, "system": "有帮助的助手", "messages": [{"role": "user", "content": "Hi"}]}, + stream=True, + validator=validate_anthropic_streaming_response )) cases.append(TestCase( - desc="流式 + stop_sequences (--stream)", + desc="流式多轮对话", method="POST", url=messages_url, headers=headers, - body={ - "model": model, - "max_tokens": 20, - "stream": True, - "stop_sequences": ["5"], - "messages": [{"role": "user", "content": "数数: 1,2,3,"}] - }, - stream=True + body={"model": model, "max_tokens": 5, "stream": True, "messages": [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello"}, {"role": "user", "content": "1+1"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + cases.append(TestCase( + desc="流式 temperature + top_p", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 5, "stream": True, "temperature": 0.5, "top_p": 0.9, "messages": [{"role": "user", "content": "Hi"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + cases.append(TestCase( + desc="流式 max_tokens", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 3, "stream": True, "messages": [{"role": "user", "content": "Hi"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + cases.append(TestCase( + desc="流式 stop_sequences", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 10, "stream": True, "stop_sequences": ["5"], "messages": [{"role": "user", "content": "数数: 1,2,3,"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + + # 流式 + vision + if args.vision: + cases.append(TestCase( + desc="流式图片输入", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 10, "stream": True, "messages": [{"role": "user", "content": [{"type": "text", "text": "描述图"}, {"type": "image", "source": {"type": "url", "url": image_url}}]}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + + # 流式 + tools + if args.tools: + cases.append(TestCase( + desc="流式工具调用 auto", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 50, "stream": True, "tools": [tool_weather], "tool_choice": {"type": "auto"}, "messages": [{"role": "user", "content": "北京天气?"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + cases.append(TestCase( + desc="流式多轮工具调用", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 20, "stream": True, "tools": [tool_weather], "messages": [{"role": "user", "content": "北京天气?"}, {"role": "assistant", "content": [{"type": "tool_use", "id": "toolu_001", "name": "get_weather", "input": {"location": "Beijing"}}]}, {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "toolu_001", "content": '{"temp": 22}'}]}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + + # 流式 + thinking + if args.thinking: + cases.append(TestCase( + desc="流式扩展思维", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 100, "stream": True, "thinking": {"type": "enabled", "budget_tokens": 50}, "messages": [{"role": "user", "content": "1+1=?"}]}, + stream=True, + validator=validate_anthropic_streaming_response + )) + + # 流式高级参数 + cases.append(TestCase( + desc="流式 service_tier: auto", + method="POST", + url=messages_url, + headers=headers, + body={"model": model, "max_tokens": 5, "stream": True, "service_tier": "auto", "messages": [{"role": "user", "content": "Hi"}]}, + stream=True, + validator=validate_anthropic_streaming_response )) # ==== --tools ==== if args.tools: - tool_weather = { - "name": "get_weather", - "description": "获取指定城市的天气", - "input_schema": { - "type": "object", - "properties": { - "location": {"type": "string", "description": "城市名称"} - }, - "required": ["location"] - } - } cases.append(TestCase( desc="工具调用 tool_choice: auto (--tools)", method="POST", @@ -702,7 +896,8 @@ def main(): "tools": [tool_weather], "tool_choice": {"type": "auto"}, "messages": [{"role": "user", "content": "北京天气怎么样?"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="工具调用 tool_choice: any (--tools)", @@ -715,7 +910,8 @@ def main(): "tools": [tool_weather], "tool_choice": {"type": "any"}, "messages": [{"role": "user", "content": "北京天气怎么样?"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="指定工具调用 tool_choice: {name} (--tools)", @@ -728,7 +924,8 @@ def main(): "tools": [tool_weather], "tool_choice": {"type": "tool", "name": "get_weather"}, "messages": [{"role": "user", "content": "北京天气怎么样?"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="tool_choice: none (--tools)", @@ -741,7 +938,8 @@ def main(): "tools": [tool_weather], "tool_choice": {"type": "none"}, "messages": [{"role": "user", "content": "北京天气怎么样?"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="多轮工具调用(tool_result 返回)(--tools)", @@ -762,7 +960,8 @@ def main(): {"type": "tool_result", "tool_use_id": "toolu_001", "content": "{\"temperature\": 22, \"condition\": \"晴\"}"} ]} ] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="多轮工具调用(tool_result 带 is_error)(--tools)", @@ -782,7 +981,8 @@ def main(): {"type": "tool_result", "tool_use_id": "toolu_002", "is_error": True, "content": "天气服务不可用"} ]} ] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="tool_choice 指向不存在的工具(负面)(--tools)", @@ -821,7 +1021,8 @@ def main(): ], "tool_choice": {"type": "auto"}, "messages": [{"role": "user", "content": "北京现在几点了?天气怎么样?"}] - } + }, + validator=validate_anthropic_messages_response )) # ==== --thinking ==== @@ -836,7 +1037,8 @@ def main(): "max_tokens": 200, "thinking": {"type": "enabled", "budget_tokens": 100}, "messages": [{"role": "user", "content": "1+1=?"}] - } + }, + validator=validate_anthropic_messages_response )) cases.append(TestCase( desc="扩展思维 adaptive (--thinking)", @@ -848,36 +1050,8 @@ def main(): "max_tokens": 200, "thinking": {"type": "adaptive", "budget_tokens": 100}, "messages": [{"role": "user", "content": "1+1=?"}] - } - )) - - # ==== --stream + --tools 组合 ==== - if args.stream and args.tools: - tool_weather_stream = { - "name": "get_weather", - "description": "获取指定城市的天气", - "input_schema": { - "type": "object", - "properties": { - "location": {"type": "string", "description": "城市名称"} - }, - "required": ["location"] - } - } - cases.append(TestCase( - desc="流式工具调用 (--stream --tools)", - method="POST", - url=messages_url, - headers=headers, - body={ - "model": model, - "max_tokens": 50, - "stream": True, - "tools": [tool_weather_stream], - "tool_choice": {"type": "auto"}, - "messages": [{"role": "user", "content": "北京天气怎么样?"}] }, - stream=True + validator=validate_anthropic_messages_response )) # ==== 高级参数测试 ==== @@ -892,7 +1066,8 @@ def main(): "max_tokens": 10, "cache_control": {"type": "ephemeral"}, "messages": [{"role": "user", "content": "Hello"}] - } + }, + validator=validate_anthropic_messages_response )) # output_config: 输出配置 @@ -906,7 +1081,21 @@ def main(): "max_tokens": 10, "output_config": {"format": "text"}, "messages": [{"role": "user", "content": "Hi"}] - } + }, + validator=validate_anthropic_messages_response + )) + cases.append(TestCase( + desc="output_config 带 effort", + method="POST", + url=messages_url, + headers=headers, + body={ + "model": model, + "max_tokens": 10, + "output_config": {"format": "text", "effort": "low"}, + "messages": [{"role": "user", "content": "Hi"}] + }, + validator=validate_anthropic_messages_response )) # service_tier: 服务层级 @@ -920,7 +1109,21 @@ def main(): "max_tokens": 5, "service_tier": "auto", "messages": [{"role": "user", "content": "Hello"}] - } + }, + validator=validate_anthropic_messages_response + )) + cases.append(TestCase( + desc="service_tier: standard_only", + method="POST", + url=messages_url, + headers=headers, + body={ + "model": model, + "max_tokens": 5, + "service_tier": "standard_only", + "messages": [{"role": "user", "content": "Hello"}] + }, + validator=validate_anthropic_messages_response )) # ==== Models API 分页测试 ==== diff --git a/scripts/openai_detect.py b/scripts/detect_openai.py similarity index 51% rename from scripts/openai_detect.py rename to scripts/detect_openai.py index abf1292..4e3ce70 100755 --- a/scripts/openai_detect.py +++ b/scripts/detect_openai.py @@ -12,13 +12,12 @@ import json import argparse -from typing import Dict, List, Tuple, Any +from typing import Dict, List, Tuple, Any, Optional from core import ( create_ssl_context, TestCase, run_test_suite, validate_response_structure, - format_validation_errors ) @@ -99,7 +98,7 @@ def validate_openai_model_retrieve_response(response_text: str) -> Tuple[bool, L return validate_response_structure(response_text, required_fields, field_types, enum_values) -def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, List[str]]: +def validate_openai_chat_completion_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]: """验证 OpenAI Chat Completion 响应 根据API文档,响应应包含: @@ -109,6 +108,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, - model: string - choices: array - usage: object (可选) + + Args: + response_text: 响应文本 + expected_n: 期望的 choices 数量(对应请求中的 n 参数) """ errors = [] @@ -132,6 +135,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, if not isinstance(data["choices"], list): errors.append(f"字段 'choices' 类型错误: 期望 list, 实际 {type(data['choices']).__name__}") else: + # 校验 choices 数量与 n 参数匹配 + if expected_n is not None and len(data["choices"]) != expected_n: + errors.append(f"choices 数量不匹配: 期望 {expected_n}, 实际 {len(data['choices'])}") + for i, choice in enumerate(data["choices"]): if not isinstance(choice, dict): errors.append(f"choices[{i}] 不是对象") @@ -164,6 +171,79 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, return len(errors) == 0, errors +def validate_openai_streaming_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]: + """验证 OpenAI 流式响应 + + 流式响应使用 SSE 格式,每行以 "data: " 开头,包含 chat.completion.chunk 对象。 + 最后一个事件是 "data: [DONE]"。 + + 验证要点: + - 每个事件是有效的 JSON + - object 字段为 "chat.completion.chunk" + - choices 数组存在 + - 如果指定了 expected_n,校验 choices 数量匹配 + - 最后一个非[DONE]事件的 finish_reason 不为 null + + Args: + response_text: SSE 格式的响应文本 + expected_n: 期望的 choices 数量 + + Returns: + (是否验证通过, 错误信息列表) + """ + from core import parse_sse_events + + errors = [] + events = parse_sse_events(response_text) + + if not events: + errors.append("未收到任何 SSE 事件") + return False, errors + + chunk_count = 0 + choice_counts = set() + + for i, event_data in enumerate(events): + try: + event = json.loads(event_data) + except json.JSONDecodeError as e: + errors.append(f"事件[{i}] 不是有效的JSON: {e}") + continue + + chunk_count += 1 + + # 检查 object 字段 + if "object" not in event: + errors.append(f"事件[{i}] 缺少必需字段: object") + elif event["object"] != "chat.completion.chunk": + errors.append(f"事件[{i}].object 值错误: 期望 'chat.completion.chunk', 实际 '{event['object']}'") + + # 检查 choices 数组 + if "choices" not in event: + errors.append(f"事件[{i}] 缺少必需字段: choices") + elif not isinstance(event["choices"], list): + errors.append(f"事件[{i}].choices 类型错误: 期望 list") + else: + choice_counts.add(len(event["choices"])) + if expected_n is not None and len(event["choices"]) != expected_n: + errors.append(f"事件[{i}].choices 数量不匹配: 期望 {expected_n}, 实际 {len(event['choices'])}") + + for j, choice in enumerate(event["choices"]): + if not isinstance(choice, dict): + errors.append(f"事件[{i}].choices[{j}] 不是对象") + continue + + if "index" not in choice: + errors.append(f"事件[{i}].choices[{j}] 缺少必需字段: index") + + # 过滤掉空 choices 的情况(如最后一个 usage chunk) + non_empty_choice_counts = {c for c in choice_counts if c > 0} + if expected_n is not None and expected_n not in non_empty_choice_counts: + errors.append(f"流式响应中 choices 数量不一致: 期望 {expected_n}, 实际出现 {non_empty_choice_counts}") + + return len(errors) == 0, errors + + def main(): parser = argparse.ArgumentParser( description="OpenAI 兼容性接口测试", @@ -261,7 +341,8 @@ def main(): {"role": "user", "content": "1+1="} ], "max_tokens": 5 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="多轮对话(含 assistant 历史)", @@ -276,7 +357,8 @@ def main(): {"role": "user", "content": "1+1="} ], "max_tokens": 5 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="temperature + top_p", @@ -289,7 +371,8 @@ def main(): "max_tokens": 5, "temperature": 0.5, "top_p": 0.9 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="max_tokens 限制", @@ -300,7 +383,8 @@ def main(): "model": model, "messages": [{"role": "user", "content": "讲一个故事"}], "max_tokens": 10 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="stop sequences", @@ -312,7 +396,8 @@ def main(): "messages": [{"role": "user", "content": "数数: 1,2,3,"}], "max_tokens": 20, "stop": ["5"] - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="n=2 多候选", @@ -324,7 +409,8 @@ def main(): "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "n": 2 - } + }, + validator=lambda r: validate_openai_chat_completion_response(r, expected_n=2) )) cases.append(TestCase( desc="seed 参数", @@ -336,7 +422,8 @@ def main(): "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "seed": 42 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="frequency_penalty + presence_penalty", @@ -349,7 +436,8 @@ def main(): "max_tokens": 5, "frequency_penalty": 0.5, "presence_penalty": 0.5 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="max_completion_tokens 参数", @@ -360,7 +448,8 @@ def main(): "model": model, "messages": [{"role": "user", "content": "讲一个故事"}], "max_completion_tokens": 10 - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="JSON mode (response_format: json_object)", @@ -375,7 +464,8 @@ def main(): ], "max_tokens": 10, "response_format": {"type": "json_object"} - } + }, + validator=validate_openai_chat_completion_response )) # ---- Chat Completions: 负面用例 ---- @@ -440,13 +530,173 @@ def main(): body="invalid json{" )) + cases.append(TestCase( + desc="max_tokens 为负数", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": -1 + } + )) + cases.append(TestCase( + desc="max_tokens = 0", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 0 + } + )) + cases.append(TestCase( + desc="temperature 超出范围 (2.5)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "temperature": 2.5 + } + )) + cases.append(TestCase( + desc="frequency_penalty 超出范围 (3.0)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "frequency_penalty": 3.0 + } + )) + cases.append(TestCase( + desc="frequency_penalty 超出范围 (-3.0)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "frequency_penalty": -3.0 + } + )) + cases.append(TestCase( + desc="presence_penalty 超出范围 (3.0)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "presence_penalty": 3.0 + } + )) + cases.append(TestCase( + desc="presence_penalty 超出范围 (-3.0)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "presence_penalty": -3.0 + } + )) + cases.append(TestCase( + desc="top_p 超出范围 (1.5)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "top_p": 1.5 + } + )) + cases.append(TestCase( + desc="top_p 超出范围 (-0.1)", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "top_p": -0.1 + } + )) + cases.append(TestCase( + desc="n 为负数", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "n": -1 + } + )) + cases.append(TestCase( + desc="n 为 0", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "n": 0 + } + )) + + # ---- 共享定义(供流式和非流式用例共同使用)---- + image_url = ( + "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/" + "Gfp-wisconsin-madison-the-nature-boardwalk.jpg/" + "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + ) + tool_weather = { + "type": "function", + "function": { + "name": "get_weather", + "description": "获取指定城市的天气", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "城市名称"} + }, + "required": ["location"] + } + } + } + json_schema_math = { + "name": "math_answer", + "strict": True, + "schema": { + "type": "object", + "properties": { + "answer": {"type": "number"}, + "explanation": {"type": "string"} + }, + "required": ["answer", "explanation"], + "additionalProperties": False + } + } + # ---- --vision ---- if args.vision: - image_url = ( - "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/" - "Gfp-wisconsin-madison-the-nature-boardwalk.jpg/" - "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" - ) cases.append(TestCase( desc="图片 URL 输入 + detail 参数 (--vision)", method="POST", @@ -464,69 +714,165 @@ def main(): ]} ], "max_tokens": 10 - } + }, + validator=validate_openai_chat_completion_response )) # ---- --stream ---- if args.stream: + # 核心用例 cases.append(TestCase( - desc="基本流式 (--stream)", + desc="流式基本对话", method="POST", url=chat_url, headers=headers, - body={ - "model": model, - "messages": [{"role": "user", "content": "Hi"}], - "max_tokens": 5, - "stream": True - }, - stream=True + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "stream": True}, + stream=True, + validator=validate_openai_streaming_response )) cases.append(TestCase( - desc="流式 + include_usage (--stream)", + desc="流式 + include_usage", method="POST", url=chat_url, headers=headers, - body={ - "model": model, - "messages": [{"role": "user", "content": "Hi"}], - "max_tokens": 5, - "stream": True, - "stream_options": {"include_usage": True} - }, - stream=True + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "stream": True, "stream_options": {"include_usage": True}}, + stream=True, + validator=validate_openai_streaming_response )) cases.append(TestCase( - desc="流式 + stop sequences (--stream)", + desc="流式 + system prompt", method="POST", url=chat_url, headers=headers, - body={ - "model": model, - "messages": [{"role": "user", "content": "数数: 1,2,3,"}], - "max_tokens": 20, - "stream": True, - "stop": ["5"] - }, - stream=True + body={"model": model, "messages": [{"role": "system", "content": "有帮助的助手"}, {"role": "user", "content": "Hi"}], "max_tokens": 5, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式多轮对话", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "1+1?"}, {"role": "assistant", "content": "2"}, {"role": "user", "content": "2+2?"}], "max_tokens": 5, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式 temperature + top_p", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "temperature": 0.5, "top_p": 0.9, "max_tokens": 5, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式 max_tokens", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 3, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式 stop_sequences", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "数数: 1,2,3,"}], "max_tokens": 10, "stop": ["5"], "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式 JSON mode", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "system", "content": "以JSON回复"}, {"role": "user", "content": "颜色"}], "max_tokens": 20, "response_format": {"type": "json_object"}, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + + # 流式 + vision + if args.vision: + cases.append(TestCase( + desc="流式图片输入", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": [{"type": "text", "text": "描述图"}, {"type": "image_url", "image_url": {"url": image_url}}]}], "max_tokens": 10, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + + # 流式 + tools + if args.tools: + cases.append(TestCase( + desc="流式工具调用 auto", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "北京天气?"}], "max_tokens": 50, "stream": True, "tools": [tool_weather], "tool_choice": "auto"}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式多轮工具调用", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "北京天气?"}, {"role": "assistant", "content": None, "tool_calls": [{"id": "call_001", "type": "function", "function": {"name": "get_weather", "arguments": '{"location": "Beijing"}'}}]}, {"role": "tool", "tool_call_id": "call_001", "content": '{"temp": 22}'}], "max_tokens": 10, "stream": True, "tools": [tool_weather]}, + stream=True, + validator=validate_openai_streaming_response + )) + + # 流式 + logprobs + if args.logprobs: + cases.append(TestCase( + desc="流式 logprobs", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "logprobs": True, "top_logprobs": 2, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + + # 流式 + json_schema + if args.json_schema: + cases.append(TestCase( + desc="流式 json_schema", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "1+1=?"}], "max_tokens": 20, "response_format": {"type": "json_schema", "json_schema": json_schema_math}, "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + + # 流式高级参数(选代表) + cases.append(TestCase( + desc="流式 reasoning_effort: medium", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "reasoning_effort": "medium", "stream": True}, + stream=True, + validator=validate_openai_streaming_response + )) + cases.append(TestCase( + desc="流式 service_tier: auto", + method="POST", + url=chat_url, + headers=headers, + body={"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "service_tier": "auto", "stream": True}, + stream=True, + validator=validate_openai_streaming_response )) # ---- --tools ---- if args.tools: - tool_weather = { - "type": "function", - "function": { - "name": "get_weather", - "description": "获取指定城市的天气", - "parameters": { - "type": "object", - "properties": { - "location": {"type": "string", "description": "城市名称"} - }, - "required": ["location"] - } - } - } cases.append(TestCase( desc="工具调用 tool_choice: auto (--tools)", method="POST", @@ -538,7 +884,8 @@ def main(): "max_tokens": 50, "tools": [tool_weather], "tool_choice": "auto" - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="工具调用 tool_choice: required (--tools)", @@ -551,7 +898,8 @@ def main(): "max_tokens": 50, "tools": [tool_weather], "tool_choice": "required" - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="指定函数调用 tool_choice: {name} (--tools)", @@ -567,7 +915,8 @@ def main(): "type": "function", "function": {"name": "get_weather"} } - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="多轮工具调用(构造 tool 结果)(--tools)", @@ -590,7 +939,8 @@ def main(): ], "max_tokens": 20, "tools": [tool_weather] - } + }, + validator=validate_openai_chat_completion_response )) cases.append(TestCase( desc="parallel_tool_calls: false (--tools)", @@ -604,7 +954,8 @@ def main(): "tools": [tool_weather], "tool_choice": "auto", "parallel_tool_calls": False - } + }, + validator=validate_openai_chat_completion_response )) # ---- --logprobs ---- @@ -620,7 +971,8 @@ def main(): "max_tokens": 5, "logprobs": True, "top_logprobs": 2 - } + }, + validator=validate_openai_chat_completion_response )) # ---- --json-schema ---- @@ -636,21 +988,10 @@ def main(): "max_tokens": 20, "response_format": { "type": "json_schema", - "json_schema": { - "name": "math_answer", - "strict": True, - "schema": { - "type": "object", - "properties": { - "answer": {"type": "number"}, - "explanation": {"type": "string"} - }, - "required": ["answer", "explanation"], - "additionalProperties": False - } - } + "json_schema": json_schema_math } - } + }, + validator=validate_openai_chat_completion_response )) # ---- 高级参数测试 ---- @@ -664,11 +1005,51 @@ def main(): "model": model, "messages": [{"role": "user", "content": "Hello"}], "max_tokens": 5, - "logit_bias": {"1234": -100, "5678": 50} # token_id: bias - } + "logit_bias": {"1234": -100, "5678": 50} + }, + validator=validate_openai_chat_completion_response )) # reasoning_effort: 推理努力级别(需要模型支持) + cases.append(TestCase( + desc="reasoning_effort: none", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "none" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="reasoning_effort: minimal", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "minimal" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="reasoning_effort: low", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "low" + }, + validator=validate_openai_chat_completion_response + )) cases.append(TestCase( desc="reasoning_effort: medium", method="POST", @@ -679,7 +1060,21 @@ def main(): "messages": [{"role": "user", "content": "1+1=?"}], "max_tokens": 10, "reasoning_effort": "medium" - } + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="reasoning_effort: high", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "high" + }, + validator=validate_openai_chat_completion_response )) # service_tier: 服务层级 @@ -693,7 +1088,47 @@ def main(): "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "service_tier": "auto" - } + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="service_tier: default", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "service_tier": "default" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="service_tier: flex", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "service_tier": "flex" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="service_tier: priority", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "service_tier": "priority" + }, + validator=validate_openai_chat_completion_response )) # verbosity: 冗长程度 @@ -707,7 +1142,34 @@ def main(): "messages": [{"role": "user", "content": "介绍一下Python"}], "max_tokens": 50, "verbosity": "low" - } + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="verbosity: medium", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "介绍一下Python"}], + "max_tokens": 50, + "verbosity": "medium" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="verbosity: high", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "介绍一下Python"}], + "max_tokens": 50, + "verbosity": "high" + }, + validator=validate_openai_chat_completion_response )) # ---- 执行测试 ----