- 测试数量从 83 个增加到 193 个 (+132%) - 代码覆盖率从 48% 提升到 69% (+44%) - 为每种文档格式的所有 Reader 实现创建独立测试 - 添加跨 Reader 的一致性验证测试 - 新增 4 个测试规范 (cli-testing, exception-testing, reader-testing, test-fixtures) - 更新 README 测试统计信息 测试覆盖: - DOCX: python-docx, markitdown, docling, native-xml, pypandoc, unstructured - PDF: pypdf, markitdown, docling, docling-ocr, unstructured, unstructured-ocr - HTML: html2text, markitdown, trafilatura, domscribe - PPTX: python-pptx, markitdown, docling, native-xml, unstructured - XLSX: pandas, markitdown, docling, native-xml, unstructured - CLI: 所有命令行选项和错误处理 所有 193 个测试通过。
88 lines
2.4 KiB
Python
88 lines
2.4 KiB
Python
"""CLI 测试专用 fixtures。"""
|
||
|
||
import pytest
|
||
import sys
|
||
from io import StringIO
|
||
from contextlib import redirect_stdout, redirect_stderr
|
||
|
||
|
||
@pytest.fixture
|
||
def cli_runner():
|
||
"""CLI 运行器 fixture,用于调用 main() 函数并捕获输出。
|
||
|
||
Returns:
|
||
function: 接受 args 列表,返回 (stdout, stderr, exit_code) 元组
|
||
"""
|
||
def _run_cli(args):
|
||
"""运行 CLI 并捕获输出。
|
||
|
||
Args:
|
||
args: 命令行参数列表(不包含程序名)
|
||
|
||
Returns:
|
||
tuple: (stdout, stderr, exit_code)
|
||
"""
|
||
from scripts.lyxy_document_reader import main
|
||
|
||
# 保存原始 sys.argv 和 sys.exit
|
||
original_argv = sys.argv
|
||
original_exit = sys.exit
|
||
|
||
stdout_capture = StringIO()
|
||
stderr_capture = StringIO()
|
||
exit_code = 0
|
||
|
||
def mock_exit(code=0):
|
||
nonlocal exit_code
|
||
exit_code = code
|
||
raise SystemExit(code)
|
||
|
||
try:
|
||
# 设置命令行参数
|
||
sys.argv = ['lyxy_document_reader'] + args
|
||
sys.exit = mock_exit
|
||
|
||
# 捕获输出
|
||
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
|
||
try:
|
||
main()
|
||
except SystemExit:
|
||
pass
|
||
|
||
finally:
|
||
# 恢复原始状态
|
||
sys.argv = original_argv
|
||
sys.exit = original_exit
|
||
|
||
return stdout_capture.getvalue(), stderr_capture.getvalue(), exit_code
|
||
|
||
return _run_cli
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_test_file(tmp_path, temp_docx, temp_pdf, temp_html, temp_pptx, temp_xlsx):
|
||
"""根据格式类型创建临时测试文件的 fixture 工厂。
|
||
|
||
Args:
|
||
format_type: 文件格式类型 ('docx', 'pdf', 'html', 'pptx', 'xlsx')
|
||
**kwargs: 传递给对应 fixture 的参数
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_file(format_type, **kwargs):
|
||
if format_type == 'docx':
|
||
return temp_docx(**kwargs)
|
||
elif format_type == 'pdf':
|
||
return temp_pdf(**kwargs)
|
||
elif format_type == 'html':
|
||
return temp_html(**kwargs)
|
||
elif format_type == 'pptx':
|
||
return temp_pptx(**kwargs)
|
||
elif format_type == 'xlsx':
|
||
return temp_xlsx(**kwargs)
|
||
else:
|
||
raise ValueError(f"不支持的格式类型: {format_type}")
|
||
|
||
return _create_file
|