"""测试 MarkItDown DOC Reader 的解析功能。""" import pytest import os from readers.doc import markitdown class TestMarkitdownDocReaderParse: """测试 MarkItDown DOC Reader 的 parse 方法。""" def test_module_importable(self): """测试模块可以正确导入。""" assert markitdown is not None assert hasattr(markitdown, 'parse') assert callable(markitdown.parse) def test_file_not_exists(self, tmp_path): """测试文件不存在的情况。""" non_existent_file = str(tmp_path / "non_existent.doc") content, error = markitdown.parse(non_existent_file) # 验证返回 None 和错误信息 assert content is None assert error is not None def test_parse_simple_doc(self, simple_doc_path): """测试解析简单 DOC 文件。""" content, error = markitdown.parse(simple_doc_path) # 只要不崩溃即可,不强制要求成功解析 if content is not None: assert len(content.strip()) > 0 def test_parse_with_headings_doc(self, with_headings_doc_path): """测试解析带标题的 DOC 文件。""" content, error = markitdown.parse(with_headings_doc_path) # 只要不崩溃即可 if content is not None: assert len(content.strip()) > 0 def test_parse_with_table_doc(self, with_table_doc_path): """测试解析带表格的 DOC 文件。""" content, error = markitdown.parse(with_table_doc_path) # 只要不崩溃即可 if content is not None: assert len(content.strip()) > 0