Files
lyxy-document/tests/test_cli/conftest.py
lanyuanxiaoyao 7eab1dcef1 test: 添加全面的测试套件,覆盖所有 Reader 实现
- 测试数量从 83 个增加到 193 个 (+132%)
- 代码覆盖率从 48% 提升到 69% (+44%)
- 为每种文档格式的所有 Reader 实现创建独立测试
- 添加跨 Reader 的一致性验证测试
- 新增 4 个测试规范 (cli-testing, exception-testing, reader-testing, test-fixtures)
- 更新 README 测试统计信息

测试覆盖:
- DOCX: python-docx, markitdown, docling, native-xml, pypandoc, unstructured
- PDF: pypdf, markitdown, docling, docling-ocr, unstructured, unstructured-ocr
- HTML: html2text, markitdown, trafilatura, domscribe
- PPTX: python-pptx, markitdown, docling, native-xml, unstructured
- XLSX: pandas, markitdown, docling, native-xml, unstructured
- CLI: 所有命令行选项和错误处理

所有 193 个测试通过。
2026-03-08 22:20:21 +08:00

88 lines
2.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""CLI 测试专用 fixtures。"""
import pytest
import sys
from io import StringIO
from contextlib import redirect_stdout, redirect_stderr
@pytest.fixture
def cli_runner():
"""CLI 运行器 fixture用于调用 main() 函数并捕获输出。
Returns:
function: 接受 args 列表,返回 (stdout, stderr, exit_code) 元组
"""
def _run_cli(args):
"""运行 CLI 并捕获输出。
Args:
args: 命令行参数列表(不包含程序名)
Returns:
tuple: (stdout, stderr, exit_code)
"""
from scripts.lyxy_document_reader import main
# 保存原始 sys.argv 和 sys.exit
original_argv = sys.argv
original_exit = sys.exit
stdout_capture = StringIO()
stderr_capture = StringIO()
exit_code = 0
def mock_exit(code=0):
nonlocal exit_code
exit_code = code
raise SystemExit(code)
try:
# 设置命令行参数
sys.argv = ['lyxy_document_reader'] + args
sys.exit = mock_exit
# 捕获输出
with redirect_stdout(stdout_capture), redirect_stderr(stderr_capture):
try:
main()
except SystemExit:
pass
finally:
# 恢复原始状态
sys.argv = original_argv
sys.exit = original_exit
return stdout_capture.getvalue(), stderr_capture.getvalue(), exit_code
return _run_cli
@pytest.fixture
def temp_test_file(tmp_path, temp_docx, temp_pdf, temp_html, temp_pptx, temp_xlsx):
"""根据格式类型创建临时测试文件的 fixture 工厂。
Args:
format_type: 文件格式类型 ('docx', 'pdf', 'html', 'pptx', 'xlsx')
**kwargs: 传递给对应 fixture 的参数
Returns:
str: 临时文件路径
"""
def _create_file(format_type, **kwargs):
if format_type == 'docx':
return temp_docx(**kwargs)
elif format_type == 'pdf':
return temp_pdf(**kwargs)
elif format_type == 'html':
return temp_html(**kwargs)
elif format_type == 'pptx':
return temp_pptx(**kwargs)
elif format_type == 'xlsx':
return temp_xlsx(**kwargs)
else:
raise ValueError(f"不支持的格式类型: {format_type}")
return _create_file