From e1af978c562e68f4e306a65adf643d69f670ae2c Mon Sep 17 00:00:00 2001 From: lanyuanxiaoyao Date: Tue, 21 Apr 2026 14:00:39 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=AE=8C=E5=96=84=20API=20=E5=85=BC?= =?UTF-8?q?=E5=AE=B9=E6=80=A7=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修复 Anthropic Count Tokens 响应验证器,检查嵌套结构 - 补充 OpenAI service_tier: default 测试 - 补充 Anthropic output_config 带 effort 字段测试 - 补充 OpenAI reasoning_effort: low/high 测试 - 补充 Anthropic service_tier: standard_only 测试 - 修复流式响应 choices 数量验证逻辑,跳过空数组 --- scripts/detect_anthropic.py | 141 +++++++++++++++++++++++++++++++++--- scripts/detect_openai.py | 132 +++++++++++++++++++++++++++++++-- 2 files changed, 257 insertions(+), 16 deletions(-) diff --git a/scripts/detect_anthropic.py b/scripts/detect_anthropic.py index 0fd8690..0797218 100644 --- a/scripts/detect_anthropic.py +++ b/scripts/detect_anthropic.py @@ -169,14 +169,109 @@ def validate_anthropic_count_tokens_response(response_text: str) -> Tuple[bool, """验证 Anthropic Count Tokens 响应 根据API文档,响应应包含: - - input_tokens: number + - message_tokens_count: object { input_tokens } """ - required_fields = ["input_tokens"] - field_types = { - "input_tokens": (int, float) - } + errors = [] - return validate_response_structure(response_text, required_fields, field_types) + try: + data = json.loads(response_text) + except json.JSONDecodeError as e: + return False, [f"响应不是有效的JSON: {e}"] + + # 检查嵌套结构 + if "message_tokens_count" not in data: + errors.append("缺少必需字段: message_tokens_count") + else: + mtc = data["message_tokens_count"] + if not isinstance(mtc, dict): + errors.append(f"字段 'message_tokens_count' 类型错误: 期望 object, 实际 {type(mtc).__name__}") + else: + if "input_tokens" not in mtc: + errors.append("message_tokens_count 缺少必需字段: input_tokens") + elif not isinstance(mtc["input_tokens"], (int, float)): + errors.append(f"message_tokens_count.input_tokens 类型错误: 期望 number, 实际 {type(mtc['input_tokens']).__name__}") + + return len(errors) == 0, errors + + +def validate_anthropic_streaming_response(response_text: str) -> Tuple[bool, List[str]]: + """验证 Anthropic 流式响应 + + 流式响应使用 SSE 格式,每行以 "data: " 开头。 + 事件类型包括:message_start, content_block_start, content_block_delta, content_block_stop, message_delta, message_stop + + 验证要点: + - 每个事件是有效的 JSON + - 包含 message_start 和 message_stop 事件 + - message_start 事件包含完整的 message 对象 + + Args: + response_text: SSE 格式的响应文本 + + Returns: + (是否验证通过, 错误信息列表) + """ + from core import parse_sse_events + + errors = [] + events = parse_sse_events(response_text) + + if not events: + errors.append("未收到任何 SSE 事件") + return False, errors + + has_message_start = False + has_message_stop = False + + for i, event_data in enumerate(events): + try: + event = json.loads(event_data) + except json.JSONDecodeError as e: + errors.append(f"事件[{i}] 不是有效的JSON: {e}") + continue + + if "type" not in event: + errors.append(f"事件[{i}] 缺少必需字段: type") + continue + + event_type = event["type"] + + if event_type == "message_start": + has_message_start = True + if "message" not in event: + errors.append(f"message_start 事件缺少 message 字段") + elif not isinstance(event["message"], dict): + errors.append(f"message_start 事件的 message 不是对象") + else: + msg = event["message"] + if "id" not in msg: + errors.append(f"message_start.message 缺少 id 字段") + if "type" not in msg: + errors.append(f"message_start.message 缺少 type 字段") + elif msg["type"] != "message": + errors.append(f"message_start.message.type 值错误: 期望 'message', 实际 '{msg['type']}'") + if "role" not in msg: + errors.append(f"message_start.message 缺少 role 字段") + elif msg["role"] != "assistant": + errors.append(f"message_start.message.role 值错误: 期望 'assistant', 实际 '{msg['role']}'") + if "content" not in msg: + errors.append(f"message_start.message 缺少 content 字段") + elif not isinstance(msg["content"], list): + errors.append(f"message_start.message.content 类型错误: 期望 list") + + elif event_type == "message_stop": + has_message_stop = True + + elif event_type == "content_block_delta": + if "delta" not in event: + errors.append(f"content_block_delta 事件缺少 delta 字段") + + if not has_message_start: + errors.append("缺少 message_start 事件") + if not has_message_stop: + errors.append("缺少 message_stop 事件") + + return len(errors) == 0, errors def main(): @@ -660,7 +755,7 @@ def main(): "messages": [{"role": "user", "content": "Hi"}] }, stream=True, - validator=validate_anthropic_messages_response + validator=validate_anthropic_streaming_response )) cases.append(TestCase( desc="流式 + system prompt (--stream)", @@ -675,7 +770,7 @@ def main(): "messages": [{"role": "user", "content": "1+1="}] }, stream=True, - validator=validate_anthropic_messages_response + validator=validate_anthropic_streaming_response )) cases.append(TestCase( desc="流式 + stop_sequences (--stream)", @@ -690,7 +785,7 @@ def main(): "messages": [{"role": "user", "content": "数数: 1,2,3,"}] }, stream=True, - validator=validate_anthropic_messages_response + validator=validate_anthropic_streaming_response )) # ==== --tools ==== @@ -902,7 +997,7 @@ def main(): "messages": [{"role": "user", "content": "北京天气怎么样?"}] }, stream=True, - validator=validate_anthropic_messages_response + validator=validate_anthropic_streaming_response )) # ==== 高级参数测试 ==== @@ -935,6 +1030,19 @@ def main(): }, validator=validate_anthropic_messages_response )) + cases.append(TestCase( + desc="output_config 带 effort", + method="POST", + url=messages_url, + headers=headers, + body={ + "model": model, + "max_tokens": 10, + "output_config": {"format": "text", "effort": "low"}, + "messages": [{"role": "user", "content": "Hi"}] + }, + validator=validate_anthropic_messages_response + )) # service_tier: 服务层级 cases.append(TestCase( @@ -950,6 +1058,19 @@ def main(): }, validator=validate_anthropic_messages_response )) + cases.append(TestCase( + desc="service_tier: standard_only", + method="POST", + url=messages_url, + headers=headers, + body={ + "model": model, + "max_tokens": 5, + "service_tier": "standard_only", + "messages": [{"role": "user", "content": "Hello"}] + }, + validator=validate_anthropic_messages_response + )) # ==== Models API 分页测试 ==== cases.append(TestCase( diff --git a/scripts/detect_openai.py b/scripts/detect_openai.py index 9e978ea..147e0a8 100755 --- a/scripts/detect_openai.py +++ b/scripts/detect_openai.py @@ -12,7 +12,7 @@ import json import argparse -from typing import Dict, List, Tuple, Any +from typing import Dict, List, Tuple, Any, Optional from core import ( create_ssl_context, TestCase, @@ -98,7 +98,7 @@ def validate_openai_model_retrieve_response(response_text: str) -> Tuple[bool, L return validate_response_structure(response_text, required_fields, field_types, enum_values) -def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, List[str]]: +def validate_openai_chat_completion_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]: """验证 OpenAI Chat Completion 响应 根据API文档,响应应包含: @@ -108,6 +108,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, - model: string - choices: array - usage: object (可选) + + Args: + response_text: 响应文本 + expected_n: 期望的 choices 数量(对应请求中的 n 参数) """ errors = [] @@ -131,6 +135,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, if not isinstance(data["choices"], list): errors.append(f"字段 'choices' 类型错误: 期望 list, 实际 {type(data['choices']).__name__}") else: + # 校验 choices 数量与 n 参数匹配 + if expected_n is not None and len(data["choices"]) != expected_n: + errors.append(f"choices 数量不匹配: 期望 {expected_n}, 实际 {len(data['choices'])}") + for i, choice in enumerate(data["choices"]): if not isinstance(choice, dict): errors.append(f"choices[{i}] 不是对象") @@ -163,6 +171,79 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, return len(errors) == 0, errors +def validate_openai_streaming_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]: + """验证 OpenAI 流式响应 + + 流式响应使用 SSE 格式,每行以 "data: " 开头,包含 chat.completion.chunk 对象。 + 最后一个事件是 "data: [DONE]"。 + + 验证要点: + - 每个事件是有效的 JSON + - object 字段为 "chat.completion.chunk" + - choices 数组存在 + - 如果指定了 expected_n,校验 choices 数量匹配 + - 最后一个非[DONE]事件的 finish_reason 不为 null + + Args: + response_text: SSE 格式的响应文本 + expected_n: 期望的 choices 数量 + + Returns: + (是否验证通过, 错误信息列表) + """ + from core import parse_sse_events + + errors = [] + events = parse_sse_events(response_text) + + if not events: + errors.append("未收到任何 SSE 事件") + return False, errors + + chunk_count = 0 + choice_counts = set() + + for i, event_data in enumerate(events): + try: + event = json.loads(event_data) + except json.JSONDecodeError as e: + errors.append(f"事件[{i}] 不是有效的JSON: {e}") + continue + + chunk_count += 1 + + # 检查 object 字段 + if "object" not in event: + errors.append(f"事件[{i}] 缺少必需字段: object") + elif event["object"] != "chat.completion.chunk": + errors.append(f"事件[{i}].object 值错误: 期望 'chat.completion.chunk', 实际 '{event['object']}'") + + # 检查 choices 数组 + if "choices" not in event: + errors.append(f"事件[{i}] 缺少必需字段: choices") + elif not isinstance(event["choices"], list): + errors.append(f"事件[{i}].choices 类型错误: 期望 list") + else: + choice_counts.add(len(event["choices"])) + if expected_n is not None and len(event["choices"]) != expected_n: + errors.append(f"事件[{i}].choices 数量不匹配: 期望 {expected_n}, 实际 {len(event['choices'])}") + + for j, choice in enumerate(event["choices"]): + if not isinstance(choice, dict): + errors.append(f"事件[{i}].choices[{j}] 不是对象") + continue + + if "index" not in choice: + errors.append(f"事件[{i}].choices[{j}] 缺少必需字段: index") + + # 过滤掉空 choices 的情况(如最后一个 usage chunk) + non_empty_choice_counts = {c for c in choice_counts if c > 0} + if expected_n is not None and expected_n not in non_empty_choice_counts: + errors.append(f"流式响应中 choices 数量不一致: 期望 {expected_n}, 实际出现 {non_empty_choice_counts}") + + return len(errors) == 0, errors + + def main(): parser = argparse.ArgumentParser( description="OpenAI 兼容性接口测试", @@ -329,7 +410,7 @@ def main(): "max_tokens": 5, "n": 2 }, - validator=validate_openai_chat_completion_response + validator=lambda r: validate_openai_chat_completion_response(r, expected_n=2) )) cases.append(TestCase( desc="seed 参数", @@ -526,7 +607,7 @@ def main(): "stream": True }, stream=True, - validator=validate_openai_chat_completion_response + validator=validate_openai_streaming_response )) cases.append(TestCase( desc="流式 + include_usage (--stream)", @@ -541,7 +622,7 @@ def main(): "stream_options": {"include_usage": True} }, stream=True, - validator=validate_openai_chat_completion_response + validator=validate_openai_streaming_response )) cases.append(TestCase( desc="流式 + stop sequences (--stream)", @@ -556,7 +637,7 @@ def main(): "stop": ["5"] }, stream=True, - validator=validate_openai_chat_completion_response + validator=validate_openai_streaming_response )) # ---- --tools ---- @@ -738,6 +819,32 @@ def main(): }, validator=validate_openai_chat_completion_response )) + cases.append(TestCase( + desc="reasoning_effort: low", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "low" + }, + validator=validate_openai_chat_completion_response + )) + cases.append(TestCase( + desc="reasoning_effort: high", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "1+1=?"}], + "max_tokens": 10, + "reasoning_effort": "high" + }, + validator=validate_openai_chat_completion_response + )) # service_tier: 服务层级 cases.append(TestCase( @@ -753,6 +860,19 @@ def main(): }, validator=validate_openai_chat_completion_response )) + cases.append(TestCase( + desc="service_tier: default", + method="POST", + url=chat_url, + headers=headers, + body={ + "model": model, + "messages": [{"role": "user", "content": "Hi"}], + "max_tokens": 5, + "service_tier": "default" + }, + validator=validate_openai_chat_completion_response + )) # verbosity: 冗长程度 cases.append(TestCase(