"""使用 pypandoc-binary 库解析 DOCX 文件""" from typing import Optional, Tuple def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 pypandoc-binary 库解析 DOCX 文件""" try: import pypandoc except ImportError: return None, "pypandoc-binary 库未安装" try: content = pypandoc.convert_file( source_file=file_path, to="md", format="docx", outputfile=None, extra_args=["--wrap=none"], ) except OSError as exc: return None, f"pypandoc-binary 缺少 Pandoc 可执行文件: {exc}" except RuntimeError as exc: return None, f"pypandoc-binary 解析失败: {exc}" content = content.strip() if not content: return None, "文档为空" return content, None