"""HTML 下载器子包,支持多种下载方式按优先级降级""" from typing import Optional, Tuple, List from . import pyppeteer from . import selenium from . import httpx from . import urllib DOWNLOADERS = [ ("pyppeteer", pyppeteer.download), ("selenium", selenium.download), ("httpx", httpx.download), ("urllib", urllib.download), ] def download_html(url: str) -> Tuple[Optional[str], List[str]]: """ 统一的 HTML 下载入口,按优先级尝试各下载器 Args: url: 目标 URL Returns: (content, failures): content 成功时为 HTML 内容,所有失败时为 None failures 各下载器的失败原因列表 """ failures: List[str] = [] for name, func in DOWNLOADERS: content, error = func(url) if content is not None: return content, failures else: failures.append(f"- {name}: {error}") return None, failures