Files
lyxy-document/tests/test_core/test_parser.py
lanyuanxiaoyao c90e1c98be test: 补充缺失的核心模块测试,统一CLI测试行为
新增测试文件:
- tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result
- tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown
- tests/test_utils/test_encoding_detection.py - 测试编码检测模块
- tests/test_readers/test_html_downloader.py - 测试HTML下载器

修改:
- tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
2026-03-12 01:18:13 +08:00

257 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""测试 parser 模块的解析调度功能。"""
import pytest
from unittest.mock import patch, MagicMock
import argparse
import sys
from core.parser import parse_input, process_content, output_result
from core.exceptions import FileDetectionError, ReaderNotFoundError
class MockReader:
"""模拟 Reader 类用于测试。"""
def __init__(self, supports=True, content=None, failures=None):
self._supports = supports
self._content = content
self._failures = failures or []
def supports(self, file_path):
return self._supports
def parse(self, file_path):
return self._content, self._failures
class TestParseInput:
"""测试 parse_input 函数。"""
def test_parse_input_success(self):
"""测试成功解析的情况。"""
reader = MockReader(supports=True, content="测试内容", failures=[])
readers = [reader]
content, failures = parse_input("test.docx", readers)
assert content == "测试内容"
assert failures == []
def test_parse_input_reader_not_found(self):
"""测试没有找到支持的 reader。"""
reader = MockReader(supports=False)
readers = [reader]
with pytest.raises(ReaderNotFoundError):
parse_input("test.docx", readers)
def test_parse_input_empty_path(self):
"""测试空输入路径。"""
readers = [MockReader()]
with pytest.raises(FileDetectionError):
parse_input("", readers)
def test_parse_input_multiple_readers_first_succeeds(self):
"""测试多个 reader第一个成功。"""
reader1 = MockReader(supports=True, content="第一个结果", failures=[])
reader2 = MockReader(supports=True, content="第二个结果", failures=[])
readers = [reader1, reader2]
content, failures = parse_input("test.docx", readers)
assert content == "第一个结果"
def test_parse_input_with_failures(self):
"""测试解析返回失败信息。"""
reader = MockReader(
supports=True,
content=None,
failures=["解析器1失败", "解析器2失败"]
)
readers = [reader]
content, failures = parse_input("test.docx", readers)
assert content is None
assert failures == ["解析器1失败", "解析器2失败"]
class TestProcessContent:
"""测试 process_content 函数。"""
def test_process_content_removes_images(self):
"""测试移除图片标记。"""
content = "测试内容 ![alt](image.png) 更多内容"
result = process_content(content)
assert "![alt](image.png)" not in result
assert "测试内容" in result
assert "更多内容" in result
def test_process_content_normalizes_whitespace(self):
"""测试规范化空白字符。"""
content = "line1\n\n\n\nline2\n\n\nline3"
result = process_content(content)
assert "line1\n\nline2\n\nline3" in result
def test_process_content_both_operations(self):
"""测试同时执行两个操作。"""
content = "![img](pic.png)\n\n\n\n正文"
result = process_content(content)
assert "![img](pic.png)" not in result
assert "\n\n\n\n" not in result
class TestOutputResult:
"""测试 output_result 函数。"""
def test_output_default(self, capsys):
"""测试默认输出内容。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=False,
title_content=None,
search=None,
context=2
)
output_result("测试内容", args)
captured = capsys.readouterr()
assert "测试内容" in captured.out
def test_output_count(self, capsys):
"""测试字数统计。"""
args = argparse.Namespace(
count=True,
lines=False,
titles=False,
title_content=None,
search=None,
context=2
)
output_result("测试内容", args)
captured = capsys.readouterr()
assert captured.out.strip() == "4"
def test_output_lines(self, capsys):
"""测试行数统计。"""
args = argparse.Namespace(
count=False,
lines=True,
titles=False,
title_content=None,
search=None,
context=2
)
output_result("line1\nline2\nline3", args)
captured = capsys.readouterr()
assert captured.out.strip() == "3"
def test_output_titles(self, capsys):
"""测试提取标题。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=True,
title_content=None,
search=None,
context=2
)
content = "# 标题1\n正文\n## 标题2\n正文"
output_result(content, args)
captured = capsys.readouterr()
assert "# 标题1" in captured.out
assert "## 标题2" in captured.out
def test_output_title_content_found(self, capsys):
"""测试提取标题内容(找到)。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=False,
title_content="目标标题",
search=None,
context=2
)
content = "# 目标标题\n标题下的内容"
with patch("sys.exit") as mock_exit:
output_result(content, args)
mock_exit.assert_not_called()
captured = capsys.readouterr()
assert "目标标题" in captured.out
assert "标题下的内容" in captured.out
def test_output_title_content_not_found(self, capsys):
"""测试提取标题内容(未找到)。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=False,
title_content="不存在的标题",
search=None,
context=2
)
content = "# 标题1\n内容"
with patch("sys.exit") as mock_exit:
output_result(content, args)
mock_exit.assert_called_once_with(1)
captured = capsys.readouterr()
assert "未找到" in captured.out or "错误" in captured.out
def test_output_search_found(self, capsys):
"""测试搜索功能(找到)。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=False,
title_content=None,
search="关键词",
context=2
)
content = "行1\n行2\n包含关键词的行\n行4\n行5"
with patch("sys.exit") as mock_exit:
output_result(content, args)
mock_exit.assert_not_called()
captured = capsys.readouterr()
assert "关键词" in captured.out
def test_output_search_not_found(self, capsys):
"""测试搜索功能(未找到)。"""
args = argparse.Namespace(
count=False,
lines=False,
titles=False,
title_content=None,
search="不存在的内容",
context=2
)
content = "普通内容"
with patch("sys.exit") as mock_exit:
output_result(content, args)
mock_exit.assert_called_once_with(1)
captured = capsys.readouterr()
assert "未找到" in captured.out or "错误" in captured.out