增加docling作为解析器
This commit is contained in:
@@ -27,6 +27,24 @@ def parse_with_markitdown(
|
||||
return None, f"MarkItDown 解析失败: {str(e)}"
|
||||
|
||||
|
||||
def parse_with_docling(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""使用 docling 库解析文件"""
|
||||
try:
|
||||
from docling.document_converter import DocumentConverter
|
||||
except ImportError:
|
||||
return None, "docling 库未安装"
|
||||
|
||||
try:
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(file_path)
|
||||
markdown_content = result.document.export_to_markdown()
|
||||
if not markdown_content.strip():
|
||||
return None, "文档为空"
|
||||
return markdown_content, None
|
||||
except Exception as e:
|
||||
return None, f"docling 解析失败: {str(e)}"
|
||||
|
||||
|
||||
def build_markdown_table(rows_data: List[List[str]]) -> str:
|
||||
"""将二维列表转换为 Markdown 表格格式"""
|
||||
if not rows_data or not rows_data[0]:
|
||||
|
||||
Reference in New Issue
Block a user