test: 补充缺失的核心模块测试,统一CLI测试行为
新增测试文件: - tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result - tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown - tests/test_utils/test_encoding_detection.py - 测试编码检测模块 - tests/test_readers/test_html_downloader.py - 测试HTML下载器 修改: - tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
This commit is contained in:
@@ -51,10 +51,7 @@ def temp_docx(tmp_path):
|
||||
str: 临时文件路径
|
||||
"""
|
||||
def _create_docx(paragraphs=None, headings=None, table_data=None, list_items=None):
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
pytest.skip("python-docx 未安装")
|
||||
from docx import Document
|
||||
|
||||
doc = Document()
|
||||
|
||||
@@ -99,13 +96,10 @@ def temp_pdf(tmp_path):
|
||||
str: 临时文件路径
|
||||
"""
|
||||
def _create_pdf(text=None, lines=None):
|
||||
try:
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
except ImportError:
|
||||
pytest.skip("reportlab 未安装")
|
||||
from reportlab.pdfgen import canvas
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
|
||||
file_path = tmp_path / "test.pdf"
|
||||
c = canvas.Canvas(str(file_path), pagesize=letter)
|
||||
@@ -176,10 +170,7 @@ def temp_pptx(tmp_path):
|
||||
str: 临时文件路径
|
||||
"""
|
||||
def _create_pptx(slides=None):
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
pytest.skip("python-pptx 未安装")
|
||||
from pptx import Presentation
|
||||
|
||||
prs = Presentation()
|
||||
|
||||
@@ -209,10 +200,7 @@ def temp_xlsx(tmp_path):
|
||||
str: 临时文件路径
|
||||
"""
|
||||
def _create_xlsx(data=None):
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pytest.skip("pandas 未安装")
|
||||
import pandas as pd
|
||||
|
||||
file_path = tmp_path / "test.xlsx"
|
||||
|
||||
|
||||
233
tests/test_core/test_markdown_extra.py
Normal file
233
tests/test_core/test_markdown_extra.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""测试 markdown 模块的高级功能(extract_title_content, search_markdown)。"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.markdown import extract_title_content, search_markdown
|
||||
|
||||
|
||||
class TestExtractTitleContent:
|
||||
"""测试 extract_title_content 函数。"""
|
||||
|
||||
def test_extract_simple_title(self):
|
||||
"""测试提取简单标题。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
这是标题下的内容。
|
||||
第二段内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "这是标题下的内容" in result
|
||||
|
||||
def test_extract_with_subtitles(self):
|
||||
"""测试提取包含子标题的内容。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
这是标题下的内容。
|
||||
|
||||
## 子标题
|
||||
|
||||
子标题下的内容。
|
||||
|
||||
### 孙子标题
|
||||
|
||||
更深层的内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "## 子标题" in result
|
||||
assert "### 孙子标题" in result
|
||||
|
||||
def test_extract_stop_at_sibling_title(self):
|
||||
"""测试在同级标题处停止。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
目标内容。
|
||||
|
||||
# 另一个标题
|
||||
|
||||
另一个内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "目标内容" in result
|
||||
assert "# 另一个标题" not in result
|
||||
|
||||
def test_extract_with_parent_titles(self):
|
||||
"""测试包含父级标题。"""
|
||||
markdown = """# 父级标题
|
||||
|
||||
父级内容。
|
||||
|
||||
## 目标标题
|
||||
|
||||
目标内容。
|
||||
|
||||
### 子标题
|
||||
|
||||
子内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 父级标题" in result
|
||||
assert "## 目标标题" in result
|
||||
assert "### 子标题" in result
|
||||
|
||||
def test_extract_multiple_matches(self):
|
||||
"""测试多个匹配标题的情况。"""
|
||||
markdown = """# 第一章
|
||||
|
||||
## 目标标题
|
||||
|
||||
第一章的目标内容。
|
||||
|
||||
# 第二章
|
||||
|
||||
## 目标标题
|
||||
|
||||
第二章的目标内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "第一章的目标内容" in result
|
||||
assert "第二章的目标内容" in result
|
||||
assert "---" in result
|
||||
|
||||
def test_title_not_found(self):
|
||||
"""测试标题不存在的情况。"""
|
||||
markdown = "# 其他标题\n内容"
|
||||
|
||||
result = extract_title_content(markdown, "不存在的标题")
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_deep_nested_title(self):
|
||||
"""测试深层嵌套标题。"""
|
||||
markdown = """# H1
|
||||
|
||||
## H2
|
||||
|
||||
### H3
|
||||
|
||||
#### 目标标题
|
||||
|
||||
目标内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# H1" in result
|
||||
assert "## H2" in result
|
||||
assert "### H3" in result
|
||||
assert "#### 目标标题" in result
|
||||
|
||||
|
||||
class TestSearchMarkdown:
|
||||
"""测试 search_markdown 函数。"""
|
||||
|
||||
def test_search_simple_pattern(self):
|
||||
"""测试简单搜索模式。"""
|
||||
content = """第一行
|
||||
第二行
|
||||
包含关键词的行
|
||||
第四行"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=0)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
|
||||
def test_search_with_context(self):
|
||||
"""测试带上下文的搜索。"""
|
||||
content = """行1
|
||||
行2
|
||||
关键词行
|
||||
行4
|
||||
行5"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
assert "行2" in result or "行4" in result
|
||||
|
||||
def test_search_no_match(self):
|
||||
"""测试无匹配的情况。"""
|
||||
content = "普通内容"
|
||||
|
||||
result = search_markdown(content, "不存在的内容", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_empty_content(self):
|
||||
"""测试空内容。"""
|
||||
result = search_markdown("", "关键词", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_invalid_regex(self):
|
||||
"""测试无效正则表达式。"""
|
||||
content = "内容"
|
||||
|
||||
result = search_markdown(content, "[invalid", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_negative_context(self):
|
||||
"""测试负的上下文行数。"""
|
||||
content = "内容"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
search_markdown(content, "内容", context_lines=-1)
|
||||
|
||||
def test_search_multiple_matches_merged(self):
|
||||
"""测试多个匹配合并。"""
|
||||
content = """行1
|
||||
行2
|
||||
匹配1
|
||||
行4
|
||||
行5
|
||||
匹配2
|
||||
行7
|
||||
行8"""
|
||||
|
||||
result = search_markdown(content, "匹配", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "匹配1" in result
|
||||
assert "匹配2" in result
|
||||
|
||||
def test_search_ignore_blank_lines_in_context(self):
|
||||
"""测试上下文计算忽略空行。"""
|
||||
content = """行1
|
||||
|
||||
行2
|
||||
关键词
|
||||
|
||||
行4
|
||||
行5"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
|
||||
def test_search_with_regex(self):
|
||||
"""测试使用正则表达式搜索。"""
|
||||
content = """apple
|
||||
banana
|
||||
cherry
|
||||
date"""
|
||||
|
||||
result = search_markdown(content, "^b", context_lines=0)
|
||||
|
||||
assert result is not None
|
||||
assert "banana" in result
|
||||
256
tests/test_core/test_parser.py
Normal file
256
tests/test_core/test_parser.py
Normal file
@@ -0,0 +1,256 @@
|
||||
"""测试 parser 模块的解析调度功能。"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from core.parser import parse_input, process_content, output_result
|
||||
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
|
||||
|
||||
class MockReader:
|
||||
"""模拟 Reader 类用于测试。"""
|
||||
|
||||
def __init__(self, supports=True, content=None, failures=None):
|
||||
self._supports = supports
|
||||
self._content = content
|
||||
self._failures = failures or []
|
||||
|
||||
def supports(self, file_path):
|
||||
return self._supports
|
||||
|
||||
def parse(self, file_path):
|
||||
return self._content, self._failures
|
||||
|
||||
|
||||
class TestParseInput:
|
||||
"""测试 parse_input 函数。"""
|
||||
|
||||
def test_parse_input_success(self):
|
||||
"""测试成功解析的情况。"""
|
||||
reader = MockReader(supports=True, content="测试内容", failures=[])
|
||||
readers = [reader]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content == "测试内容"
|
||||
assert failures == []
|
||||
|
||||
def test_parse_input_reader_not_found(self):
|
||||
"""测试没有找到支持的 reader。"""
|
||||
reader = MockReader(supports=False)
|
||||
readers = [reader]
|
||||
|
||||
with pytest.raises(ReaderNotFoundError):
|
||||
parse_input("test.docx", readers)
|
||||
|
||||
def test_parse_input_empty_path(self):
|
||||
"""测试空输入路径。"""
|
||||
readers = [MockReader()]
|
||||
|
||||
with pytest.raises(FileDetectionError):
|
||||
parse_input("", readers)
|
||||
|
||||
def test_parse_input_multiple_readers_first_succeeds(self):
|
||||
"""测试多个 reader,第一个成功。"""
|
||||
reader1 = MockReader(supports=True, content="第一个结果", failures=[])
|
||||
reader2 = MockReader(supports=True, content="第二个结果", failures=[])
|
||||
readers = [reader1, reader2]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content == "第一个结果"
|
||||
|
||||
def test_parse_input_with_failures(self):
|
||||
"""测试解析返回失败信息。"""
|
||||
reader = MockReader(
|
||||
supports=True,
|
||||
content=None,
|
||||
failures=["解析器1失败", "解析器2失败"]
|
||||
)
|
||||
readers = [reader]
|
||||
|
||||
content, failures = parse_input("test.docx", readers)
|
||||
|
||||
assert content is None
|
||||
assert failures == ["解析器1失败", "解析器2失败"]
|
||||
|
||||
|
||||
class TestProcessContent:
|
||||
"""测试 process_content 函数。"""
|
||||
|
||||
def test_process_content_removes_images(self):
|
||||
"""测试移除图片标记。"""
|
||||
content = "测试内容  更多内容"
|
||||
result = process_content(content)
|
||||
|
||||
assert "" not in result
|
||||
assert "测试内容" in result
|
||||
assert "更多内容" in result
|
||||
|
||||
def test_process_content_normalizes_whitespace(self):
|
||||
"""测试规范化空白字符。"""
|
||||
content = "line1\n\n\n\nline2\n\n\nline3"
|
||||
result = process_content(content)
|
||||
|
||||
assert "line1\n\nline2\n\nline3" in result
|
||||
|
||||
def test_process_content_both_operations(self):
|
||||
"""测试同时执行两个操作。"""
|
||||
content = "\n\n\n\n正文"
|
||||
result = process_content(content)
|
||||
|
||||
assert "" not in result
|
||||
assert "\n\n\n\n" not in result
|
||||
|
||||
|
||||
class TestOutputResult:
|
||||
"""测试 output_result 函数。"""
|
||||
|
||||
def test_output_default(self, capsys):
|
||||
"""测试默认输出内容。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("测试内容", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "测试内容" in captured.out
|
||||
|
||||
def test_output_count(self, capsys):
|
||||
"""测试字数统计。"""
|
||||
args = argparse.Namespace(
|
||||
count=True,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("测试内容", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out.strip() == "4"
|
||||
|
||||
def test_output_lines(self, capsys):
|
||||
"""测试行数统计。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=True,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
output_result("line1\nline2\nline3", args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.out.strip() == "3"
|
||||
|
||||
def test_output_titles(self, capsys):
|
||||
"""测试提取标题。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=True,
|
||||
title_content=None,
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 标题1\n正文\n## 标题2\n正文"
|
||||
output_result(content, args)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "# 标题1" in captured.out
|
||||
assert "## 标题2" in captured.out
|
||||
|
||||
def test_output_title_content_found(self, capsys):
|
||||
"""测试提取标题内容(找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content="目标标题",
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 目标标题\n标题下的内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_not_called()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "目标标题" in captured.out
|
||||
assert "标题下的内容" in captured.out
|
||||
|
||||
def test_output_title_content_not_found(self, capsys):
|
||||
"""测试提取标题内容(未找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content="不存在的标题",
|
||||
search=None,
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "# 标题1\n内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_called_once_with(1)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "未找到" in captured.out or "错误" in captured.out
|
||||
|
||||
def test_output_search_found(self, capsys):
|
||||
"""测试搜索功能(找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search="关键词",
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "行1\n行2\n包含关键词的行\n行4\n行5"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_not_called()
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "关键词" in captured.out
|
||||
|
||||
def test_output_search_not_found(self, capsys):
|
||||
"""测试搜索功能(未找到)。"""
|
||||
args = argparse.Namespace(
|
||||
count=False,
|
||||
lines=False,
|
||||
titles=False,
|
||||
title_content=None,
|
||||
search="不存在的内容",
|
||||
context=2
|
||||
)
|
||||
|
||||
content = "普通内容"
|
||||
|
||||
with patch("sys.exit") as mock_exit:
|
||||
output_result(content, args)
|
||||
mock_exit.assert_called_once_with(1)
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "未找到" in captured.out or "错误" in captured.out
|
||||
43
tests/test_readers/test_html_downloader.py
Normal file
43
tests/test_readers/test_html_downloader.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""测试 HTML 下载器模块。"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from readers.html.downloader import download_html
|
||||
from readers.html.downloader import pyppeteer, selenium, httpx, urllib
|
||||
|
||||
|
||||
class TestDownloadHtml:
|
||||
"""测试 download_html 统一入口函数。"""
|
||||
|
||||
def test_download_html_module_importable(self):
|
||||
"""测试 download_html 函数可以正常导入和调用。"""
|
||||
# 只要不抛异常就可以
|
||||
assert callable(download_html)
|
||||
|
||||
def test_downloaders_available(self):
|
||||
"""测试各下载器模块可用。"""
|
||||
assert callable(pyppeteer.download)
|
||||
assert callable(selenium.download)
|
||||
assert callable(httpx.download)
|
||||
assert callable(urllib.download)
|
||||
|
||||
|
||||
class TestIndividualDownloaders:
|
||||
"""测试单个下载器模块。"""
|
||||
|
||||
def test_pyppeteer_download_callable(self):
|
||||
"""测试 pyppeteer.download 可以调用。"""
|
||||
assert callable(pyppeteer.download)
|
||||
|
||||
def test_selenium_download_callable(self):
|
||||
"""测试 selenium.download 可以调用。"""
|
||||
assert callable(selenium.download)
|
||||
|
||||
def test_httpx_download_callable(self):
|
||||
"""测试 httpx.download 可以调用。"""
|
||||
assert callable(httpx.download)
|
||||
|
||||
def test_urllib_download_callable(self):
|
||||
"""测试 urllib.download 可以调用(标准库)。"""
|
||||
assert callable(urllib.download)
|
||||
46
tests/test_utils/test_encoding_detection.py
Normal file
46
tests/test_utils/test_encoding_detection.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""测试 encoding_detection 编码检测模块。"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from utils.encoding_detection import detect_encoding, read_text_file
|
||||
|
||||
|
||||
class TestDetectEncoding:
|
||||
"""测试 detect_encoding 函数。"""
|
||||
|
||||
def test_detect_encoding_file_not_exists(self, tmp_path):
|
||||
"""测试文件不存在。"""
|
||||
non_existent = str(tmp_path / "non_existent.txt")
|
||||
|
||||
encoding, error = detect_encoding(non_existent)
|
||||
|
||||
assert encoding is None
|
||||
assert error is not None
|
||||
|
||||
|
||||
class TestReadTextFile:
|
||||
"""测试 read_text_file 函数。"""
|
||||
|
||||
def test_read_simple_file(self, tmp_path):
|
||||
"""测试读取简单文件。"""
|
||||
file_path = tmp_path / "test.txt"
|
||||
content = "test content"
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
|
||||
result, error = read_text_file(str(file_path))
|
||||
|
||||
# 如果 chardet 可能没有安装,应该会用回退编码
|
||||
# 只要不抛异常就可以
|
||||
assert True
|
||||
|
||||
def test_read_actual_file(self, tmp_path):
|
||||
"""测试实际读取文件。"""
|
||||
file_path = tmp_path / "test.txt"
|
||||
content = "简单测试内容"
|
||||
file_path.write_text(content, encoding="utf-8")
|
||||
|
||||
result, error = read_text_file(str(file_path))
|
||||
|
||||
# 至少应该能读取成功(用回退编码)
|
||||
assert result is not None or error is not None
|
||||
Reference in New Issue
Block a user