test: 补充缺失的核心模块测试,统一CLI测试行为
新增测试文件: - tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result - tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown - tests/test_utils/test_encoding_detection.py - 测试编码检测模块 - tests/test_readers/test_html_downloader.py - 测试HTML下载器 修改: - tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
This commit is contained in:
256
tests/test_core/test_parser.py
Normal file
256
tests/test_core/test_parser.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""测试 parser 模块的解析调度功能。"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from core.parser import parse_input, process_content, output_result
|
||||
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
|
||||
|
||||
class MockReader:
|
||||
"""模拟 Reader 类用于测试。"""
|
||||
|
||||
def __init__(self, supports=True, content=None, failures=None):
|
||||
self._supports = supports
|
||||
self._content = content
|
||||
self._failures = failures or []
|
||||
|
||||
def supports(self, file_path):
|
||||
return self._supports
|
||||
|
||||
def parse(self, file_path):
|
||||
return self._content, self._failures
|
||||
|
||||
|
||||
class TestParseInput:
|
||||
"""测试 parse_input 函数。"""
|
||||
|
||||
def test_parse_input_success(self):
|
||||
"""测试成功解析的情况。"""
|
||||
reader = MockReader(supports=True, content="测试内容", failures=[])
|
||||
readers = [reader]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content == "测试内容"
|
||||
assert failures == []
|
||||
|
||||
def test_parse_input_reader_not_found(self):
|
||||
"""测试没有找到支持的 reader。"""
|
||||
reader = MockReader(supports=False)
|
||||
readers = [reader]
|
||||
|
||||
with pytest.raises(ReaderNotFoundError):
|
||||
parse_input("test.docx", readers)
|
||||
|
||||
def test_parse_input_empty_path(self):
|
||||
"""测试空输入路径。"""
|
||||
readers = [MockReader()]
|
||||
|
||||
with pytest.raises(FileDetectionError):
|
||||
parse_input("", readers)
|
||||
|
||||
def test_parse_input_multiple_readers_first_succeeds(self):
|
||||
"""测试多个 reader,第一个成功。"""
|
||||
reader1 = MockReader(supports=True, content="第一个结果", failures=[])
|
||||
reader2 = MockReader(supports=True, content="第二个结果", failures=[])
|
||||
readers = [reader1, reader2]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content == "第一个结果"
|
||||
|
||||
def test_parse_input_with_failures(self):
|
||||
"""测试解析返回失败信息。"""
|
||||
reader = MockReader(
|
||||
supports=True,
|
||||
content=None,
|
||||
failures=["解析器1失败", "解析器2失败"]
|
||||
)
|
||||
readers = [reader]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content is None
|
||||
assert failures == ["解析器1失败", "解析器2失败"]
|
||||
|
||||
|
||||
class TestProcessContent:
|
||||
"""测试 process_content 函数。"""
|
||||
|
||||
def test_process_content_removes_images(self):
|
||||
"""测试移除图片标记。"""
|
||||
content = "测试内容  更多内容"
|
||||
result = process_content(content)
|
||||
|
||||
assert "" not in result
|
||||
assert "测试内容" in result
|
||||
assert "更多内容" in result
|
||||
|
||||
def test_process_content_normalizes_whitespace(self):
|
||||
"""测试规范化空白字符。"""
|
||||
content = "line1\n\n\n\nline2\n\n\nline3"
|
||||
result = process_content(content)
|
||||
|
||||
assert "line1\n\nline2\n\nline3" in result
|
||||
|
||||
def test_process_content_both_operations(self):
|
||||
"""测试同时执行两个操作。"""
|
||||
content = "\n\n\n\n正文"
|
||||
result = process_content(content)
|
||||
|
||||
assert "" not in result
|
||||
assert "\n\n\n\n" not in result
|
||||
|
||||
|
||||
class TestOutputResult:
|
||||
"""测试 output_result 函数。"""
|
||||
|
||||
def test_output_default(self, capsys):
|
||||
"""测试默认输出内容。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("测试内容", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "测试内容" in captured.out
|
||||
|
||||
def test_output_count(self, capsys):
|
||||
"""测试字数统计。"""
|
||||
args = argparse.Namespace(
|
||||
count=True,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("测试内容", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out.strip() == "4"
|
||||
|
||||
def test_output_lines(self, capsys):
|
||||
"""测试行数统计。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=True,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("line1\nline2\nline3", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out.strip() == "3"
|
||||
|
||||
def test_output_titles(self, capsys):
|
||||
"""测试提取标题。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=True,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 标题1\n正文\n## 标题2\n正文"
|
||||
output_result(content, args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "# 标题1" in captured.out
|
||||
assert "## 标题2" in captured.out
|
||||
|
||||
def test_output_title_content_found(self, capsys):
|
||||
"""测试提取标题内容(找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content="目标标题",
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 目标标题\n标题下的内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_not_called()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "目标标题" in captured.out
|
||||
assert "标题下的内容" in captured.out
|
||||
|
||||
def test_output_title_content_not_found(self, capsys):
|
||||
"""测试提取标题内容(未找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content="不存在的标题",
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 标题1\n内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_called_once_with(1)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "未找到" in captured.out or "错误" in captured.out
|
||||
|
||||
def test_output_search_found(self, capsys):
|
||||
"""测试搜索功能(找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search="关键词",
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "行1\n行2\n包含关键词的行\n行4\n行5"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_not_called()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "关键词" in captured.out
|
||||
|
||||
def test_output_search_not_found(self, capsys):
|
||||
"""测试搜索功能(未找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search="不存在的内容",
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "普通内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_called_once_with(1)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "未找到" in captured.out or "错误" in captured.out
|
||||
Reference in New Issue
Block a user