Files
lyxy-document/tests/test_core/test_markdown_extra.py
lanyuanxiaoyao c90e1c98be test: 补充缺失的核心模块测试,统一CLI测试行为
新增测试文件:
- tests/test_core/test_parser.py - 测试 parse_input/process_content/output_result
- tests/test_core/test_markdown_extra.py - 测试 extract_title_content/search_markdown
- tests/test_utils/test_encoding_detection.py - 测试编码检测模块
- tests/test_readers/test_html_downloader.py - 测试HTML下载器

修改:
- tests/conftest.py - 移除pytest.skip(),所有CLI测试在缺少依赖时直接失败(与HTML测试行为一致)
2026-03-12 01:18:13 +08:00

234 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""测试 markdown 模块的高级功能extract_title_content, search_markdown"""
import pytest
from core.markdown import extract_title_content, search_markdown
class TestExtractTitleContent:
"""测试 extract_title_content 函数。"""
def test_extract_simple_title(self):
"""测试提取简单标题。"""
markdown = """# 目标标题
这是标题下的内容。
第二段内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "# 目标标题" in result
assert "这是标题下的内容" in result
def test_extract_with_subtitles(self):
"""测试提取包含子标题的内容。"""
markdown = """# 目标标题
这是标题下的内容。
## 子标题
子标题下的内容。
### 孙子标题
更深层的内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "# 目标标题" in result
assert "## 子标题" in result
assert "### 孙子标题" in result
def test_extract_stop_at_sibling_title(self):
"""测试在同级标题处停止。"""
markdown = """# 目标标题
目标内容。
# 另一个标题
另一个内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "# 目标标题" in result
assert "目标内容" in result
assert "# 另一个标题" not in result
def test_extract_with_parent_titles(self):
"""测试包含父级标题。"""
markdown = """# 父级标题
父级内容。
## 目标标题
目标内容。
### 子标题
子内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "# 父级标题" in result
assert "## 目标标题" in result
assert "### 子标题" in result
def test_extract_multiple_matches(self):
"""测试多个匹配标题的情况。"""
markdown = """# 第一章
## 目标标题
第一章的目标内容。
# 第二章
## 目标标题
第二章的目标内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "第一章的目标内容" in result
assert "第二章的目标内容" in result
assert "---" in result
def test_title_not_found(self):
"""测试标题不存在的情况。"""
markdown = "# 其他标题\n内容"
result = extract_title_content(markdown, "不存在的标题")
assert result is None
def test_deep_nested_title(self):
"""测试深层嵌套标题。"""
markdown = """# H1
## H2
### H3
#### 目标标题
目标内容。"""
result = extract_title_content(markdown, "目标标题")
assert result is not None
assert "# H1" in result
assert "## H2" in result
assert "### H3" in result
assert "#### 目标标题" in result
class TestSearchMarkdown:
"""测试 search_markdown 函数。"""
def test_search_simple_pattern(self):
"""测试简单搜索模式。"""
content = """第一行
第二行
包含关键词的行
第四行"""
result = search_markdown(content, "关键词", context_lines=0)
assert result is not None
assert "关键词" in result
def test_search_with_context(self):
"""测试带上下文的搜索。"""
content = """行1
行2
关键词行
行4
行5"""
result = search_markdown(content, "关键词", context_lines=1)
assert result is not None
assert "关键词" in result
assert "行2" in result or "行4" in result
def test_search_no_match(self):
"""测试无匹配的情况。"""
content = "普通内容"
result = search_markdown(content, "不存在的内容", context_lines=0)
assert result is None
def test_search_empty_content(self):
"""测试空内容。"""
result = search_markdown("", "关键词", context_lines=0)
assert result is None
def test_search_invalid_regex(self):
"""测试无效正则表达式。"""
content = "内容"
result = search_markdown(content, "[invalid", context_lines=0)
assert result is None
def test_search_negative_context(self):
"""测试负的上下文行数。"""
content = "内容"
with pytest.raises(ValueError):
search_markdown(content, "内容", context_lines=-1)
def test_search_multiple_matches_merged(self):
"""测试多个匹配合并。"""
content = """行1
行2
匹配1
行4
行5
匹配2
行7
行8"""
result = search_markdown(content, "匹配", context_lines=1)
assert result is not None
assert "匹配1" in result
assert "匹配2" in result
def test_search_ignore_blank_lines_in_context(self):
"""测试上下文计算忽略空行。"""
content = """行1
行2
关键词
行4
行5"""
result = search_markdown(content, "关键词", context_lines=1)
assert result is not None
assert "关键词" in result
def test_search_with_regex(self):
"""测试使用正则表达式搜索。"""
content = """apple
banana
cherry
date"""
result = search_markdown(content, "^b", context_lines=0)
assert result is not None
assert "banana" in result