整合代码
This commit is contained in:
@@ -3,21 +3,12 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from common import parse_with_markitdown
|
||||
|
||||
|
||||
def parse_pdf_with_markitdown(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""使用 MarkItDown 库解析 PDF 文件"""
|
||||
try:
|
||||
from markitdown import MarkItDown
|
||||
|
||||
md = MarkItDown()
|
||||
result = md.convert(file_path)
|
||||
if not result.text_content.strip():
|
||||
return None, "文档为空"
|
||||
return result.text_content, None
|
||||
except ImportError:
|
||||
return None, "MarkItDown 库未安装"
|
||||
except Exception as e:
|
||||
return None, f"MarkItDown 解析失败: {str(e)}"
|
||||
return parse_with_markitdown(file_path)
|
||||
|
||||
|
||||
def parse_pdf_with_unstructured(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
Reference in New Issue
Block a user