test: 补充缺失的核心模块测试,统一CLI测试行为
新增测试文件: - tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result - tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown - tests/test_utils/test_encoding_detection.py - 测试编码检测模块 - tests/test_readers/test_html_downloader.py - 测试HTML下载器 修改: - tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
This commit is contained in:
233
tests/test_core/test_markdown_extra.py
Normal file
233
tests/test_core/test_markdown_extra.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""测试 markdown 模块的高级功能(extract_title_content, search_markdown)。"""
|
||||
|
||||
import pytest
|
||||
|
||||
from core.markdown import extract_title_content, search_markdown
|
||||
|
||||
|
||||
class TestExtractTitleContent:
|
||||
"""测试 extract_title_content 函数。"""
|
||||
|
||||
def test_extract_simple_title(self):
|
||||
"""测试提取简单标题。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
这是标题下的内容。
|
||||
第二段内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "这是标题下的内容" in result
|
||||
|
||||
def test_extract_with_subtitles(self):
|
||||
"""测试提取包含子标题的内容。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
这是标题下的内容。
|
||||
|
||||
## 子标题
|
||||
|
||||
子标题下的内容。
|
||||
|
||||
### 孙子标题
|
||||
|
||||
更深层的内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "## 子标题" in result
|
||||
assert "### 孙子标题" in result
|
||||
|
||||
def test_extract_stop_at_sibling_title(self):
|
||||
"""测试在同级标题处停止。"""
|
||||
markdown = """# 目标标题
|
||||
|
||||
目标内容。
|
||||
|
||||
# 另一个标题
|
||||
|
||||
另一个内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 目标标题" in result
|
||||
assert "目标内容" in result
|
||||
assert "# 另一个标题" not in result
|
||||
|
||||
def test_extract_with_parent_titles(self):
|
||||
"""测试包含父级标题。"""
|
||||
markdown = """# 父级标题
|
||||
|
||||
父级内容。
|
||||
|
||||
## 目标标题
|
||||
|
||||
目标内容。
|
||||
|
||||
### 子标题
|
||||
|
||||
子内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# 父级标题" in result
|
||||
assert "## 目标标题" in result
|
||||
assert "### 子标题" in result
|
||||
|
||||
def test_extract_multiple_matches(self):
|
||||
"""测试多个匹配标题的情况。"""
|
||||
markdown = """# 第一章
|
||||
|
||||
## 目标标题
|
||||
|
||||
第一章的目标内容。
|
||||
|
||||
# 第二章
|
||||
|
||||
## 目标标题
|
||||
|
||||
第二章的目标内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "第一章的目标内容" in result
|
||||
assert "第二章的目标内容" in result
|
||||
assert "---" in result
|
||||
|
||||
def test_title_not_found(self):
|
||||
"""测试标题不存在的情况。"""
|
||||
markdown = "# 其他标题\n内容"
|
||||
|
||||
result = extract_title_content(markdown, "不存在的标题")
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_deep_nested_title(self):
|
||||
"""测试深层嵌套标题。"""
|
||||
markdown = """# H1
|
||||
|
||||
## H2
|
||||
|
||||
### H3
|
||||
|
||||
#### 目标标题
|
||||
|
||||
目标内容。"""
|
||||
|
||||
result = extract_title_content(markdown, "目标标题")
|
||||
|
||||
assert result is not None
|
||||
assert "# H1" in result
|
||||
assert "## H2" in result
|
||||
assert "### H3" in result
|
||||
assert "#### 目标标题" in result
|
||||
|
||||
|
||||
class TestSearchMarkdown:
|
||||
"""测试 search_markdown 函数。"""
|
||||
|
||||
def test_search_simple_pattern(self):
|
||||
"""测试简单搜索模式。"""
|
||||
content = """第一行
|
||||
第二行
|
||||
包含关键词的行
|
||||
第四行"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=0)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
|
||||
def test_search_with_context(self):
|
||||
"""测试带上下文的搜索。"""
|
||||
content = """行1
|
||||
行2
|
||||
关键词行
|
||||
行4
|
||||
行5"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
assert "行2" in result or "行4" in result
|
||||
|
||||
def test_search_no_match(self):
|
||||
"""测试无匹配的情况。"""
|
||||
content = "普通内容"
|
||||
|
||||
result = search_markdown(content, "不存在的内容", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_empty_content(self):
|
||||
"""测试空内容。"""
|
||||
result = search_markdown("", "关键词", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_invalid_regex(self):
|
||||
"""测试无效正则表达式。"""
|
||||
content = "内容"
|
||||
|
||||
result = search_markdown(content, "[invalid", context_lines=0)
|
||||
|
||||
assert result is None
|
||||
|
||||
def test_search_negative_context(self):
|
||||
"""测试负的上下文行数。"""
|
||||
content = "内容"
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
search_markdown(content, "内容", context_lines=-1)
|
||||
|
||||
def test_search_multiple_matches_merged(self):
|
||||
"""测试多个匹配合并。"""
|
||||
content = """行1
|
||||
行2
|
||||
匹配1
|
||||
行4
|
||||
行5
|
||||
匹配2
|
||||
行7
|
||||
行8"""
|
||||
|
||||
result = search_markdown(content, "匹配", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "匹配1" in result
|
||||
assert "匹配2" in result
|
||||
|
||||
def test_search_ignore_blank_lines_in_context(self):
|
||||
"""测试上下文计算忽略空行。"""
|
||||
content = """行1
|
||||
|
||||
行2
|
||||
关键词
|
||||
|
||||
行4
|
||||
行5"""
|
||||
|
||||
result = search_markdown(content, "关键词", context_lines=1)
|
||||
|
||||
assert result is not None
|
||||
assert "关键词" in result
|
||||
|
||||
def test_search_with_regex(self):
|
||||
"""测试使用正则表达式搜索。"""
|
||||
content = """apple
|
||||
banana
|
||||
cherry
|
||||
date"""
|
||||
|
||||
result = search_markdown(content, "^b", context_lines=0)
|
||||
|
||||
assert result is not None
|
||||
assert "banana" in result
|
||||
Reference in New Issue
Block a user