"""使用 domscribe 解析 HTML""" from typing import Optional, Tuple def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: """使用 domscribe 解析 HTML 文件""" try: from domscribe import html_to_markdown except ImportError: return None, "domscribe 库未安装" try: with open(file_path, 'r', encoding='utf-8') as f: html_content = f.read() except FileNotFoundError: return None, f"文件不存在: {file_path}" except Exception as e: return None, f"读取文件失败: {str(e)}" try: options = { 'extract_main_content': True, } markdown_content = html_to_markdown(html_content, options) if not markdown_content.strip(): return None, "解析内容为空" return markdown_content, None except Exception as e: return None, f"domscribe 解析失败: {str(e)}"