"""HTML 下载器子包,支持多种下载方式按优先级降级"""
from typing import Optional, Tuple, List
from . import pyppeteer
from . import selenium
from . import httpx
from . import urllib
DOWNLOADERS = [
("pyppeteer", pyppeteer.download),
("selenium", selenium.download),
("httpx", httpx.download),
("urllib", urllib.download),
]
def download_html(url: str) -> Tuple[Optional[str], List[str]]:
"""
统一的 HTML 下载入口,按优先级尝试各下载器
Args:
url: 目标 URL
Returns:
(content, failures): content 成功时为 HTML 内容,所有失败时为 None
failures 各下载器的失败原因列表
"""
failures: List[str] = []
for name, func in DOWNLOADERS:
content, error = func(url)
if content is not None:
return content, failures
else:
failures.append(f"- {name}: {error}")
return None, failures