"""使用 MarkItDown 解析 HTML""" from typing import Optional, Tuple def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 MarkItDown 解析 HTML 文件""" try: from markitdown import MarkItDown except ImportError: return None, "MarkItDown 库未安装" try: md = MarkItDown() result = md.convert( file_path, heading_style="ATX", strip=["img", "script", "style", "noscript"], ) markdown_content = result.text_content if not markdown_content.strip(): return None, "解析内容为空" return markdown_content, None except Exception as e: return None, f"MarkItDown 解析失败: {str(e)}"