"""使用 pypdf 库解析 PDF 文件""" from typing import Optional, Tuple def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 pypdf 库解析 PDF 文件""" try: from pypdf import PdfReader except ImportError: return None, "pypdf 库未安装" try: reader = PdfReader(file_path) md_content = [] for page in reader.pages: text = page.extract_text(extraction_mode="plain") if text and text.strip(): md_content.append(text.strip()) md_content.append("") content = "\n".join(md_content).strip() if not content: return None, "文档为空" return content, None except Exception as e: return None, f"pypdf 解析失败: {str(e)}"