整合代码
This commit is contained in:
@@ -5,23 +5,12 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from common import build_markdown_table, safe_open_zip
|
||||
from common import build_markdown_table, parse_with_markitdown, safe_open_zip
|
||||
|
||||
|
||||
def parse_docx_with_markitdown(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""使用 MarkItDown 库解析 DOCX 文件"""
|
||||
try:
|
||||
from markitdown import MarkItDown
|
||||
|
||||
md = MarkItDown()
|
||||
result = md.convert(file_path)
|
||||
if not result.text_content.strip():
|
||||
return None, "文档为空"
|
||||
return result.text_content, None
|
||||
except ImportError:
|
||||
return None, "MarkItDown 库未安装"
|
||||
except Exception as e:
|
||||
return None, f"MarkItDown 解析失败: {str(e)}"
|
||||
return parse_with_markitdown(file_path)
|
||||
|
||||
|
||||
def parse_docx_with_python_docx(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
Reference in New Issue
Block a user