新增测试文件: - tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result - tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown - tests/test_utils/test_encoding_detection.py - 测试编码检测模块 - tests/test_readers/test_html_downloader.py - 测试HTML下载器 修改: - tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
257 lines
7.4 KiB
Python
257 lines
7.4 KiB
Python
"""测试 parser 模块的解析调度功能。"""
|
||
|
||
import pytest
|
||
from unittest.mock import patch, MagicMock
|
||
import argparse
|
||
import sys
|
||
|
||
from core.parser import parse_input, process_content, output_result
|
||
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||
|
||
|
||
class MockReader:
|
||
"""模拟 Reader 类用于测试。"""
|
||
|
||
def __init__(self, supports=True, content=None, failures=None):
|
||
self._supports = supports
|
||
self._content = content
|
||
self._failures = failures or []
|
||
|
||
def supports(self, file_path):
|
||
return self._supports
|
||
|
||
def parse(self, file_path):
|
||
return self._content, self._failures
|
||
|
||
|
||
class TestParseInput:
|
||
"""测试 parse_input 函数。"""
|
||
|
||
def test_parse_input_success(self):
|
||
"""测试成功解析的情况。"""
|
||
reader = MockReader(supports=True, content="测试内容", failures=[])
|
||
readers = [reader]
|
||
|
||
content, failures = parse_input("test.docx", readers)
|
||
|
||
assert content == "测试内容"
|
||
assert failures == []
|
||
|
||
def test_parse_input_reader_not_found(self):
|
||
"""测试没有找到支持的 reader。"""
|
||
reader = MockReader(supports=False)
|
||
readers = [reader]
|
||
|
||
with pytest.raises(ReaderNotFoundError):
|
||
parse_input("test.docx", readers)
|
||
|
||
def test_parse_input_empty_path(self):
|
||
"""测试空输入路径。"""
|
||
readers = [MockReader()]
|
||
|
||
with pytest.raises(FileDetectionError):
|
||
parse_input("", readers)
|
||
|
||
def test_parse_input_multiple_readers_first_succeeds(self):
|
||
"""测试多个 reader,第一个成功。"""
|
||
reader1 = MockReader(supports=True, content="第一个结果", failures=[])
|
||
reader2 = MockReader(supports=True, content="第二个结果", failures=[])
|
||
readers = [reader1, reader2]
|
||
|
||
content, failures = parse_input("test.docx", readers)
|
||
|
||
assert content == "第一个结果"
|
||
|
||
def test_parse_input_with_failures(self):
|
||
"""测试解析返回失败信息。"""
|
||
reader = MockReader(
|
||
supports=True,
|
||
content=None,
|
||
failures=["解析器1失败", "解析器2失败"]
|
||
)
|
||
readers = [reader]
|
||
|
||
content, failures = parse_input("test.docx", readers)
|
||
|
||
assert content is None
|
||
assert failures == ["解析器1失败", "解析器2失败"]
|
||
|
||
|
||
class TestProcessContent:
|
||
"""测试 process_content 函数。"""
|
||
|
||
def test_process_content_removes_images(self):
|
||
"""测试移除图片标记。"""
|
||
content = "测试内容  更多内容"
|
||
result = process_content(content)
|
||
|
||
assert "" not in result
|
||
assert "测试内容" in result
|
||
assert "更多内容" in result
|
||
|
||
def test_process_content_normalizes_whitespace(self):
|
||
"""测试规范化空白字符。"""
|
||
content = "line1\n\n\n\nline2\n\n\nline3"
|
||
result = process_content(content)
|
||
|
||
assert "line1\n\nline2\n\nline3" in result
|
||
|
||
def test_process_content_both_operations(self):
|
||
"""测试同时执行两个操作。"""
|
||
content = "\n\n\n\n正文"
|
||
result = process_content(content)
|
||
|
||
assert "" not in result
|
||
assert "\n\n\n\n" not in result
|
||
|
||
|
||
class TestOutputResult:
|
||
"""测试 output_result 函数。"""
|
||
|
||
def test_output_default(self, capsys):
|
||
"""测试默认输出内容。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=False,
|
||
title_content=None,
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
output_result("测试内容", args)
|
||
|
||
captured = capsys.readouterr()
|
||
assert "测试内容" in captured.out
|
||
|
||
def test_output_count(self, capsys):
|
||
"""测试字数统计。"""
|
||
args = argparse.Namespace(
|
||
count=True,
|
||
lines=False,
|
||
titles=False,
|
||
title_content=None,
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
output_result("测试内容", args)
|
||
|
||
captured = capsys.readouterr()
|
||
assert captured.out.strip() == "4"
|
||
|
||
def test_output_lines(self, capsys):
|
||
"""测试行数统计。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=True,
|
||
titles=False,
|
||
title_content=None,
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
output_result("line1\nline2\nline3", args)
|
||
|
||
captured = capsys.readouterr()
|
||
assert captured.out.strip() == "3"
|
||
|
||
def test_output_titles(self, capsys):
|
||
"""测试提取标题。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=True,
|
||
title_content=None,
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
content = "# 标题1\n正文\n## 标题2\n正文"
|
||
output_result(content, args)
|
||
|
||
captured = capsys.readouterr()
|
||
assert "# 标题1" in captured.out
|
||
assert "## 标题2" in captured.out
|
||
|
||
def test_output_title_content_found(self, capsys):
|
||
"""测试提取标题内容(找到)。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=False,
|
||
title_content="目标标题",
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
content = "# 目标标题\n标题下的内容"
|
||
|
||
with patch("sys.exit") as mock_exit:
|
||
output_result(content, args)
|
||
mock_exit.assert_not_called()
|
||
|
||
captured = capsys.readouterr()
|
||
assert "目标标题" in captured.out
|
||
assert "标题下的内容" in captured.out
|
||
|
||
def test_output_title_content_not_found(self, capsys):
|
||
"""测试提取标题内容(未找到)。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=False,
|
||
title_content="不存在的标题",
|
||
search=None,
|
||
context=2
|
||
)
|
||
|
||
content = "# 标题1\n内容"
|
||
|
||
with patch("sys.exit") as mock_exit:
|
||
output_result(content, args)
|
||
mock_exit.assert_called_once_with(1)
|
||
|
||
captured = capsys.readouterr()
|
||
assert "未找到" in captured.out or "错误" in captured.out
|
||
|
||
def test_output_search_found(self, capsys):
|
||
"""测试搜索功能(找到)。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=False,
|
||
title_content=None,
|
||
search="关键词",
|
||
context=2
|
||
)
|
||
|
||
content = "行1\n行2\n包含关键词的行\n行4\n行5"
|
||
|
||
with patch("sys.exit") as mock_exit:
|
||
output_result(content, args)
|
||
mock_exit.assert_not_called()
|
||
|
||
captured = capsys.readouterr()
|
||
assert "关键词" in captured.out
|
||
|
||
def test_output_search_not_found(self, capsys):
|
||
"""测试搜索功能(未找到)。"""
|
||
args = argparse.Namespace(
|
||
count=False,
|
||
lines=False,
|
||
titles=False,
|
||
title_content=None,
|
||
search="不存在的内容",
|
||
context=2
|
||
)
|
||
|
||
content = "普通内容"
|
||
|
||
with patch("sys.exit") as mock_exit:
|
||
output_result(content, args)
|
||
mock_exit.assert_called_once_with(1)
|
||
|
||
captured = capsys.readouterr()
|
||
assert "未找到" in captured.out or "错误" in captured.out
|