feat: 完善 API 兼容性测试用例
- 修复 Anthropic Count Tokens 响应验证器,检查嵌套结构 - 补充 OpenAI service_tier: default 测试 - 补充 Anthropic output_config 带 effort 字段测试 - 补充 OpenAI reasoning_effort: low/high 测试 - 补充 Anthropic service_tier: standard_only 测试 - 修复流式响应 choices 数量验证逻辑,跳过空数组
This commit is contained in:
@@ -169,14 +169,109 @@ def validate_anthropic_count_tokens_response(response_text: str) -> Tuple[bool,
|
|||||||
"""验证 Anthropic Count Tokens 响应
|
"""验证 Anthropic Count Tokens 响应
|
||||||
|
|
||||||
根据API文档,响应应包含:
|
根据API文档,响应应包含:
|
||||||
- input_tokens: number
|
- message_tokens_count: object { input_tokens }
|
||||||
"""
|
"""
|
||||||
required_fields = ["input_tokens"]
|
errors = []
|
||||||
field_types = {
|
|
||||||
"input_tokens": (int, float)
|
|
||||||
}
|
|
||||||
|
|
||||||
return validate_response_structure(response_text, required_fields, field_types)
|
try:
|
||||||
|
data = json.loads(response_text)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
return False, [f"响应不是有效的JSON: {e}"]
|
||||||
|
|
||||||
|
# 检查嵌套结构
|
||||||
|
if "message_tokens_count" not in data:
|
||||||
|
errors.append("缺少必需字段: message_tokens_count")
|
||||||
|
else:
|
||||||
|
mtc = data["message_tokens_count"]
|
||||||
|
if not isinstance(mtc, dict):
|
||||||
|
errors.append(f"字段 'message_tokens_count' 类型错误: 期望 object, 实际 {type(mtc).__name__}")
|
||||||
|
else:
|
||||||
|
if "input_tokens" not in mtc:
|
||||||
|
errors.append("message_tokens_count 缺少必需字段: input_tokens")
|
||||||
|
elif not isinstance(mtc["input_tokens"], (int, float)):
|
||||||
|
errors.append(f"message_tokens_count.input_tokens 类型错误: 期望 number, 实际 {type(mtc['input_tokens']).__name__}")
|
||||||
|
|
||||||
|
return len(errors) == 0, errors
|
||||||
|
|
||||||
|
|
||||||
|
def validate_anthropic_streaming_response(response_text: str) -> Tuple[bool, List[str]]:
|
||||||
|
"""验证 Anthropic 流式响应
|
||||||
|
|
||||||
|
流式响应使用 SSE 格式,每行以 "data: " 开头。
|
||||||
|
事件类型包括:message_start, content_block_start, content_block_delta, content_block_stop, message_delta, message_stop
|
||||||
|
|
||||||
|
验证要点:
|
||||||
|
- 每个事件是有效的 JSON
|
||||||
|
- 包含 message_start 和 message_stop 事件
|
||||||
|
- message_start 事件包含完整的 message 对象
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_text: SSE 格式的响应文本
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(是否验证通过, 错误信息列表)
|
||||||
|
"""
|
||||||
|
from core import parse_sse_events
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
events = parse_sse_events(response_text)
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
errors.append("未收到任何 SSE 事件")
|
||||||
|
return False, errors
|
||||||
|
|
||||||
|
has_message_start = False
|
||||||
|
has_message_stop = False
|
||||||
|
|
||||||
|
for i, event_data in enumerate(events):
|
||||||
|
try:
|
||||||
|
event = json.loads(event_data)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
errors.append(f"事件[{i}] 不是有效的JSON: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "type" not in event:
|
||||||
|
errors.append(f"事件[{i}] 缺少必需字段: type")
|
||||||
|
continue
|
||||||
|
|
||||||
|
event_type = event["type"]
|
||||||
|
|
||||||
|
if event_type == "message_start":
|
||||||
|
has_message_start = True
|
||||||
|
if "message" not in event:
|
||||||
|
errors.append(f"message_start 事件缺少 message 字段")
|
||||||
|
elif not isinstance(event["message"], dict):
|
||||||
|
errors.append(f"message_start 事件的 message 不是对象")
|
||||||
|
else:
|
||||||
|
msg = event["message"]
|
||||||
|
if "id" not in msg:
|
||||||
|
errors.append(f"message_start.message 缺少 id 字段")
|
||||||
|
if "type" not in msg:
|
||||||
|
errors.append(f"message_start.message 缺少 type 字段")
|
||||||
|
elif msg["type"] != "message":
|
||||||
|
errors.append(f"message_start.message.type 值错误: 期望 'message', 实际 '{msg['type']}'")
|
||||||
|
if "role" not in msg:
|
||||||
|
errors.append(f"message_start.message 缺少 role 字段")
|
||||||
|
elif msg["role"] != "assistant":
|
||||||
|
errors.append(f"message_start.message.role 值错误: 期望 'assistant', 实际 '{msg['role']}'")
|
||||||
|
if "content" not in msg:
|
||||||
|
errors.append(f"message_start.message 缺少 content 字段")
|
||||||
|
elif not isinstance(msg["content"], list):
|
||||||
|
errors.append(f"message_start.message.content 类型错误: 期望 list")
|
||||||
|
|
||||||
|
elif event_type == "message_stop":
|
||||||
|
has_message_stop = True
|
||||||
|
|
||||||
|
elif event_type == "content_block_delta":
|
||||||
|
if "delta" not in event:
|
||||||
|
errors.append(f"content_block_delta 事件缺少 delta 字段")
|
||||||
|
|
||||||
|
if not has_message_start:
|
||||||
|
errors.append("缺少 message_start 事件")
|
||||||
|
if not has_message_stop:
|
||||||
|
errors.append("缺少 message_stop 事件")
|
||||||
|
|
||||||
|
return len(errors) == 0, errors
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -660,7 +755,7 @@ def main():
|
|||||||
"messages": [{"role": "user", "content": "Hi"}]
|
"messages": [{"role": "user", "content": "Hi"}]
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_streaming_response
|
||||||
))
|
))
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
desc="流式 + system prompt (--stream)",
|
desc="流式 + system prompt (--stream)",
|
||||||
@@ -675,7 +770,7 @@ def main():
|
|||||||
"messages": [{"role": "user", "content": "1+1="}]
|
"messages": [{"role": "user", "content": "1+1="}]
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_streaming_response
|
||||||
))
|
))
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
desc="流式 + stop_sequences (--stream)",
|
desc="流式 + stop_sequences (--stream)",
|
||||||
@@ -690,7 +785,7 @@ def main():
|
|||||||
"messages": [{"role": "user", "content": "数数: 1,2,3,"}]
|
"messages": [{"role": "user", "content": "数数: 1,2,3,"}]
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_streaming_response
|
||||||
))
|
))
|
||||||
|
|
||||||
# ==== --tools ====
|
# ==== --tools ====
|
||||||
@@ -902,7 +997,7 @@ def main():
|
|||||||
"messages": [{"role": "user", "content": "北京天气怎么样?"}]
|
"messages": [{"role": "user", "content": "北京天气怎么样?"}]
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_streaming_response
|
||||||
))
|
))
|
||||||
|
|
||||||
# ==== 高级参数测试 ====
|
# ==== 高级参数测试 ====
|
||||||
@@ -935,6 +1030,19 @@ def main():
|
|||||||
},
|
},
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_messages_response
|
||||||
))
|
))
|
||||||
|
cases.append(TestCase(
|
||||||
|
desc="output_config 带 effort",
|
||||||
|
method="POST",
|
||||||
|
url=messages_url,
|
||||||
|
headers=headers,
|
||||||
|
body={
|
||||||
|
"model": model,
|
||||||
|
"max_tokens": 10,
|
||||||
|
"output_config": {"format": "text", "effort": "low"},
|
||||||
|
"messages": [{"role": "user", "content": "Hi"}]
|
||||||
|
},
|
||||||
|
validator=validate_anthropic_messages_response
|
||||||
|
))
|
||||||
|
|
||||||
# service_tier: 服务层级
|
# service_tier: 服务层级
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
@@ -950,6 +1058,19 @@ def main():
|
|||||||
},
|
},
|
||||||
validator=validate_anthropic_messages_response
|
validator=validate_anthropic_messages_response
|
||||||
))
|
))
|
||||||
|
cases.append(TestCase(
|
||||||
|
desc="service_tier: standard_only",
|
||||||
|
method="POST",
|
||||||
|
url=messages_url,
|
||||||
|
headers=headers,
|
||||||
|
body={
|
||||||
|
"model": model,
|
||||||
|
"max_tokens": 5,
|
||||||
|
"service_tier": "standard_only",
|
||||||
|
"messages": [{"role": "user", "content": "Hello"}]
|
||||||
|
},
|
||||||
|
validator=validate_anthropic_messages_response
|
||||||
|
))
|
||||||
|
|
||||||
# ==== Models API 分页测试 ====
|
# ==== Models API 分页测试 ====
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import argparse
|
import argparse
|
||||||
from typing import Dict, List, Tuple, Any
|
from typing import Dict, List, Tuple, Any, Optional
|
||||||
from core import (
|
from core import (
|
||||||
create_ssl_context,
|
create_ssl_context,
|
||||||
TestCase,
|
TestCase,
|
||||||
@@ -98,7 +98,7 @@ def validate_openai_model_retrieve_response(response_text: str) -> Tuple[bool, L
|
|||||||
return validate_response_structure(response_text, required_fields, field_types, enum_values)
|
return validate_response_structure(response_text, required_fields, field_types, enum_values)
|
||||||
|
|
||||||
|
|
||||||
def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool, List[str]]:
|
def validate_openai_chat_completion_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]:
|
||||||
"""验证 OpenAI Chat Completion 响应
|
"""验证 OpenAI Chat Completion 响应
|
||||||
|
|
||||||
根据API文档,响应应包含:
|
根据API文档,响应应包含:
|
||||||
@@ -108,6 +108,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool,
|
|||||||
- model: string
|
- model: string
|
||||||
- choices: array
|
- choices: array
|
||||||
- usage: object (可选)
|
- usage: object (可选)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_text: 响应文本
|
||||||
|
expected_n: 期望的 choices 数量(对应请求中的 n 参数)
|
||||||
"""
|
"""
|
||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
@@ -131,6 +135,10 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool,
|
|||||||
if not isinstance(data["choices"], list):
|
if not isinstance(data["choices"], list):
|
||||||
errors.append(f"字段 'choices' 类型错误: 期望 list, 实际 {type(data['choices']).__name__}")
|
errors.append(f"字段 'choices' 类型错误: 期望 list, 实际 {type(data['choices']).__name__}")
|
||||||
else:
|
else:
|
||||||
|
# 校验 choices 数量与 n 参数匹配
|
||||||
|
if expected_n is not None and len(data["choices"]) != expected_n:
|
||||||
|
errors.append(f"choices 数量不匹配: 期望 {expected_n}, 实际 {len(data['choices'])}")
|
||||||
|
|
||||||
for i, choice in enumerate(data["choices"]):
|
for i, choice in enumerate(data["choices"]):
|
||||||
if not isinstance(choice, dict):
|
if not isinstance(choice, dict):
|
||||||
errors.append(f"choices[{i}] 不是对象")
|
errors.append(f"choices[{i}] 不是对象")
|
||||||
@@ -163,6 +171,79 @@ def validate_openai_chat_completion_response(response_text: str) -> Tuple[bool,
|
|||||||
return len(errors) == 0, errors
|
return len(errors) == 0, errors
|
||||||
|
|
||||||
|
|
||||||
|
def validate_openai_streaming_response(response_text: str, expected_n: Optional[int] = None) -> Tuple[bool, List[str]]:
|
||||||
|
"""验证 OpenAI 流式响应
|
||||||
|
|
||||||
|
流式响应使用 SSE 格式,每行以 "data: " 开头,包含 chat.completion.chunk 对象。
|
||||||
|
最后一个事件是 "data: [DONE]"。
|
||||||
|
|
||||||
|
验证要点:
|
||||||
|
- 每个事件是有效的 JSON
|
||||||
|
- object 字段为 "chat.completion.chunk"
|
||||||
|
- choices 数组存在
|
||||||
|
- 如果指定了 expected_n,校验 choices 数量匹配
|
||||||
|
- 最后一个非[DONE]事件的 finish_reason 不为 null
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response_text: SSE 格式的响应文本
|
||||||
|
expected_n: 期望的 choices 数量
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(是否验证通过, 错误信息列表)
|
||||||
|
"""
|
||||||
|
from core import parse_sse_events
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
events = parse_sse_events(response_text)
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
errors.append("未收到任何 SSE 事件")
|
||||||
|
return False, errors
|
||||||
|
|
||||||
|
chunk_count = 0
|
||||||
|
choice_counts = set()
|
||||||
|
|
||||||
|
for i, event_data in enumerate(events):
|
||||||
|
try:
|
||||||
|
event = json.loads(event_data)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
errors.append(f"事件[{i}] 不是有效的JSON: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk_count += 1
|
||||||
|
|
||||||
|
# 检查 object 字段
|
||||||
|
if "object" not in event:
|
||||||
|
errors.append(f"事件[{i}] 缺少必需字段: object")
|
||||||
|
elif event["object"] != "chat.completion.chunk":
|
||||||
|
errors.append(f"事件[{i}].object 值错误: 期望 'chat.completion.chunk', 实际 '{event['object']}'")
|
||||||
|
|
||||||
|
# 检查 choices 数组
|
||||||
|
if "choices" not in event:
|
||||||
|
errors.append(f"事件[{i}] 缺少必需字段: choices")
|
||||||
|
elif not isinstance(event["choices"], list):
|
||||||
|
errors.append(f"事件[{i}].choices 类型错误: 期望 list")
|
||||||
|
else:
|
||||||
|
choice_counts.add(len(event["choices"]))
|
||||||
|
if expected_n is not None and len(event["choices"]) != expected_n:
|
||||||
|
errors.append(f"事件[{i}].choices 数量不匹配: 期望 {expected_n}, 实际 {len(event['choices'])}")
|
||||||
|
|
||||||
|
for j, choice in enumerate(event["choices"]):
|
||||||
|
if not isinstance(choice, dict):
|
||||||
|
errors.append(f"事件[{i}].choices[{j}] 不是对象")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if "index" not in choice:
|
||||||
|
errors.append(f"事件[{i}].choices[{j}] 缺少必需字段: index")
|
||||||
|
|
||||||
|
# 过滤掉空 choices 的情况(如最后一个 usage chunk)
|
||||||
|
non_empty_choice_counts = {c for c in choice_counts if c > 0}
|
||||||
|
if expected_n is not None and expected_n not in non_empty_choice_counts:
|
||||||
|
errors.append(f"流式响应中 choices 数量不一致: 期望 {expected_n}, 实际出现 {non_empty_choice_counts}")
|
||||||
|
|
||||||
|
return len(errors) == 0, errors
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="OpenAI 兼容性接口测试",
|
description="OpenAI 兼容性接口测试",
|
||||||
@@ -329,7 +410,7 @@ def main():
|
|||||||
"max_tokens": 5,
|
"max_tokens": 5,
|
||||||
"n": 2
|
"n": 2
|
||||||
},
|
},
|
||||||
validator=validate_openai_chat_completion_response
|
validator=lambda r: validate_openai_chat_completion_response(r, expected_n=2)
|
||||||
))
|
))
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
desc="seed 参数",
|
desc="seed 参数",
|
||||||
@@ -526,7 +607,7 @@ def main():
|
|||||||
"stream": True
|
"stream": True
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_openai_chat_completion_response
|
validator=validate_openai_streaming_response
|
||||||
))
|
))
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
desc="流式 + include_usage (--stream)",
|
desc="流式 + include_usage (--stream)",
|
||||||
@@ -541,7 +622,7 @@ def main():
|
|||||||
"stream_options": {"include_usage": True}
|
"stream_options": {"include_usage": True}
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_openai_chat_completion_response
|
validator=validate_openai_streaming_response
|
||||||
))
|
))
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
desc="流式 + stop sequences (--stream)",
|
desc="流式 + stop sequences (--stream)",
|
||||||
@@ -556,7 +637,7 @@ def main():
|
|||||||
"stop": ["5"]
|
"stop": ["5"]
|
||||||
},
|
},
|
||||||
stream=True,
|
stream=True,
|
||||||
validator=validate_openai_chat_completion_response
|
validator=validate_openai_streaming_response
|
||||||
))
|
))
|
||||||
|
|
||||||
# ---- --tools ----
|
# ---- --tools ----
|
||||||
@@ -738,6 +819,32 @@ def main():
|
|||||||
},
|
},
|
||||||
validator=validate_openai_chat_completion_response
|
validator=validate_openai_chat_completion_response
|
||||||
))
|
))
|
||||||
|
cases.append(TestCase(
|
||||||
|
desc="reasoning_effort: low",
|
||||||
|
method="POST",
|
||||||
|
url=chat_url,
|
||||||
|
headers=headers,
|
||||||
|
body={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": "1+1=?"}],
|
||||||
|
"max_tokens": 10,
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
},
|
||||||
|
validator=validate_openai_chat_completion_response
|
||||||
|
))
|
||||||
|
cases.append(TestCase(
|
||||||
|
desc="reasoning_effort: high",
|
||||||
|
method="POST",
|
||||||
|
url=chat_url,
|
||||||
|
headers=headers,
|
||||||
|
body={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": "1+1=?"}],
|
||||||
|
"max_tokens": 10,
|
||||||
|
"reasoning_effort": "high"
|
||||||
|
},
|
||||||
|
validator=validate_openai_chat_completion_response
|
||||||
|
))
|
||||||
|
|
||||||
# service_tier: 服务层级
|
# service_tier: 服务层级
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
@@ -753,6 +860,19 @@ def main():
|
|||||||
},
|
},
|
||||||
validator=validate_openai_chat_completion_response
|
validator=validate_openai_chat_completion_response
|
||||||
))
|
))
|
||||||
|
cases.append(TestCase(
|
||||||
|
desc="service_tier: default",
|
||||||
|
method="POST",
|
||||||
|
url=chat_url,
|
||||||
|
headers=headers,
|
||||||
|
body={
|
||||||
|
"model": model,
|
||||||
|
"messages": [{"role": "user", "content": "Hi"}],
|
||||||
|
"max_tokens": 5,
|
||||||
|
"service_tier": "default"
|
||||||
|
},
|
||||||
|
validator=validate_openai_chat_completion_response
|
||||||
|
))
|
||||||
|
|
||||||
# verbosity: 冗长程度
|
# verbosity: 冗长程度
|
||||||
cases.append(TestCase(
|
cases.append(TestCase(
|
||||||
|
|||||||
Reference in New Issue
Block a user