1
0

整合代码

This commit is contained in:
2026-02-15 20:25:28 +08:00
parent f30ea08805
commit f167aa2111
5 changed files with 28 additions and 96 deletions

View File

@@ -6,28 +6,12 @@ import xml.etree.ElementTree as ET
import zipfile
from typing import Any, List, Optional, Tuple
from common import build_markdown_table, filter_markdown_content, flush_list_stack
from common import build_markdown_table, flush_list_stack, parse_with_markitdown
def parse_pptx_with_markitdown(file_path: str) -> Tuple[Optional[str], Optional[str]]:
"""使用 MarkItDown 库解析 PPTX 文件"""
try:
from markitdown import MarkItDown
md = MarkItDown()
result = md.convert(file_path)
if not result.text_content.strip():
return None, "文档为空"
filtered_content = filter_markdown_content(result.text_content)
if not filtered_content.strip():
return None, "过滤后文档为空"
return filtered_content, None
except ImportError:
return None, "MarkItDown 库未安装"
except Exception as e:
return None, f"MarkItDown 解析失败: {str(e)}"
return parse_with_markitdown(file_path)
def extract_formatted_text_pptx(runs: List[Any]) -> str: