"""测试 CLI 主函数功能。""" import pytest import os class TestCLIDefaultOutput: """测试 CLI 默认输出功能。""" def test_default_output_docx(self, cli_runner, temp_docx): """测试默认输出 DOCX 文件的 Markdown 内容。""" file_path = temp_docx(paragraphs=["测试内容段落"]) stdout, stderr, exit_code = cli_runner([file_path]) assert exit_code == 0 assert "测试内容段落" in stdout assert len(stdout.strip()) > 0 def test_default_output_pdf(self, cli_runner, temp_pdf): """测试默认输出 PDF 文件的 Markdown 内容。""" file_path = temp_pdf(text="PDF测试内容") stdout, stderr, exit_code = cli_runner([file_path]) assert exit_code == 0 # PDF 解析可能有格式差异,只验证有输出 assert len(stdout.strip()) > 0 def test_default_output_html(self, cli_runner, temp_html): """测试默认输出 HTML 文件的 Markdown 内容。""" file_path = temp_html(content="

HTML标题

HTML内容

") stdout, stderr, exit_code = cli_runner([file_path]) assert exit_code == 0 assert "HTML标题" in stdout or "HTML内容" in stdout class TestCLICountOption: """测试 CLI 字数统计功能。""" def test_count_option(self, cli_runner, temp_docx): """测试 -c 选项统计字数。""" file_path = temp_docx(paragraphs=["测试内容"]) stdout, stderr, exit_code = cli_runner([file_path, "-c"]) assert exit_code == 0 # 输出应该是一个数字 assert stdout.strip().isdigit() count = int(stdout.strip()) assert count > 0 def test_count_option_long_form(self, cli_runner, temp_docx): """测试 --count 选项。""" file_path = temp_docx(paragraphs=["测试"]) stdout, stderr, exit_code = cli_runner([file_path, "--count"]) assert exit_code == 0 assert stdout.strip().isdigit() class TestCLILinesOption: """测试 CLI 行数统计功能。""" def test_lines_option(self, cli_runner, temp_docx): """测试 -l 选项统计行数。""" file_path = temp_docx(paragraphs=["第一行", "第二行", "第三行"]) stdout, stderr, exit_code = cli_runner([file_path, "-l"]) assert exit_code == 0 # 输出应该是一个数字 assert stdout.strip().isdigit() lines = int(stdout.strip()) assert lines > 0 class TestCLITitlesOption: """测试 CLI 标题提取功能。""" def test_titles_option(self, cli_runner, temp_docx): """测试 -t 选项提取标题。""" file_path = temp_docx( headings=[(1, "一级标题"), (2, "二级标题")], paragraphs=["普通段落"] ) stdout, stderr, exit_code = cli_runner([file_path, "-t"]) assert exit_code == 0 # 输出应该包含标题 assert "一级标题" in stdout assert "二级标题" in stdout # 不应该包含普通段落 assert "普通段落" not in stdout class TestCLITitleContentOption: """测试 CLI 标题内容提取功能。""" def test_title_content_option(self, cli_runner, temp_docx): """测试 -tc 选项提取标题内容。""" file_path = temp_docx( headings=[(1, "目标标题")], paragraphs=["标题下的内容"] ) stdout, stderr, exit_code = cli_runner([file_path, "-tc", "目标标题"]) assert exit_code == 0 assert "目标标题" in stdout assert "标题下的内容" in stdout def test_title_content_not_found(self, cli_runner, temp_docx): """测试标题不存在时的错误处理。""" file_path = temp_docx(paragraphs=["测试内容"]) stdout, stderr, exit_code = cli_runner([file_path, "-tc", "不存在的标题"]) assert exit_code != 0 # 应该输出错误信息 output = stdout + stderr assert "未找到" in output or "不存在" in output or "错误" in output class TestCLISearchOption: """测试 CLI 搜索功能。""" def test_search_option(self, cli_runner, temp_docx): """测试 -s 选项搜索内容。""" file_path = temp_docx(paragraphs=["包含关键词的段落", "其他内容"]) stdout, stderr, exit_code = cli_runner([file_path, "-s", "关键词"]) assert exit_code == 0 assert "关键词" in stdout def test_search_no_match(self, cli_runner, temp_docx): """测试搜索无匹配时的错误处理。""" file_path = temp_docx(paragraphs=["测试内容"]) stdout, stderr, exit_code = cli_runner([file_path, "-s", "不存在的内容"]) assert exit_code != 0 # 应该输出错误信息 output = stdout + stderr assert "未找到" in output or "无匹配" in output or "错误" in output def test_search_with_context(self, cli_runner, temp_docx): """测试 -n 选项设置上下文行数。""" file_path = temp_docx( paragraphs=["第一行", "第二行", "包含关键词的行", "第四行", "第五行"] ) stdout, stderr, exit_code = cli_runner([file_path, "-s", "关键词", "-n", "2"]) assert exit_code == 0 assert "关键词" in stdout # 应该包含上下文 assert "第二行" in stdout or "第四行" in stdout class TestCLIErrorHandling: """测试 CLI 错误处理。""" def test_file_not_exists(self, cli_runner, tmp_path): """测试文件不存在时的错误处理。""" non_existent = str(tmp_path / "non_existent.docx") stdout, stderr, exit_code = cli_runner([non_existent]) assert exit_code != 0 output = stdout + stderr assert "错误" in output or "不存在" in output def test_unsupported_format(self, cli_runner, tmp_path): """测试不支持的文件类型。""" unsupported_file = tmp_path / "test.xyz" unsupported_file.write_text("test content") stdout, stderr, exit_code = cli_runner([str(unsupported_file)]) assert exit_code != 0 output = stdout + stderr assert "reader" in output.lower() or "支持" in output def test_all_readers_failed(self, cli_runner, tmp_path): """测试所有 Reader 失败时的错误输出。""" # 创建一个看起来像 DOCX 但实际损坏的文件 fake_docx = tmp_path / "fake.docx" fake_docx.write_bytes(b"not a real docx file") stdout, stderr, exit_code = cli_runner([str(fake_docx)]) assert exit_code != 0 output = stdout + stderr # 应该列出失败原因 assert "失败" in output or "错误" in output