"""使用 html2text 解析 HTML(兜底方案)""" from typing import Optional, Tuple def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 html2text 解析 HTML 文件(兜底方案)""" try: import html2text except ImportError: return None, "html2text 库未安装" try: with open(file_path, 'r', encoding='utf-8') as f: html_content = f.read() except FileNotFoundError: return None, f"文件不存在: {file_path}" except Exception as e: return None, f"读取文件失败: {str(e)}" try: converter = html2text.HTML2Text() converter.ignore_emphasis = False converter.ignore_links = False converter.ignore_images = True converter.body_width = 0 converter.skip_internal_links = True markdown_content = converter.handle(html_content) if not markdown_content.strip(): return None, "解析内容为空" return markdown_content, None except Exception as e: return None, f"html2text 解析失败: {str(e)}"