1
0

增加docling作为解析器

This commit is contained in:
2026-02-15 23:17:41 +08:00
parent 4324699a3d
commit 5b362686e2
7 changed files with 106 additions and 20 deletions

View File

@@ -5,7 +5,17 @@ import xml.etree.ElementTree as ET
import zipfile
from typing import Any, List, Optional, Tuple
from common import build_markdown_table, parse_with_markitdown, safe_open_zip
from common import (
build_markdown_table,
parse_with_docling,
parse_with_markitdown,
safe_open_zip,
)
def parse_docx_with_docling(file_path: str) -> Tuple[Optional[str], Optional[str]]:
"""使用 docling 库解析 DOCX 文件"""
return parse_with_docling(file_path)
def parse_docx_with_pypandoc(file_path: str) -> Tuple[Optional[str], Optional[str]]: