"""使用 unstructured 库解析 PDF 文件(fast 策略)""" from typing import Optional, Tuple from core import _unstructured_elements_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 unstructured 库解析 PDF 文件(fast 策略)""" try: from unstructured.partition.pdf import partition_pdf except ImportError: return None, "unstructured 库未安装" try: elements = partition_pdf( filename=file_path, infer_table_structure=True, strategy="fast", languages=["chi_sim"], ) # fast 策略不做版面分析,Title 类型标注不可靠 content = _unstructured_elements_to_markdown(elements, trust_titles=False) if not content.strip(): return None, "文档为空" return content, None except Exception as e: return None, f"unstructured 解析失败: {str(e)}"