lyxy-document/scripts/readers/html/downloader/httpx.py

"""使用 httpx 下载 URL（轻量级 HTTP 客户端）"""

from typing import Optional, Tuple

from .common import USER_AGENT


def download(url: str) -> Tuple[Optional[str], Optional[str]]:
    """
    使用 httpx 下载 URL（轻量级 HTTP 客户端）

    Args:
        url: 目标 URL

    Returns:
        (content, error): content 成功时为 HTML 内容，失败时为 None
                         error 成功时为 None，失败时为错误信息
    """
    try:
        import httpx
    except ImportError:
        return None, "httpx 库未安装"

    headers = {
        "User-Agent": USER_AGENT
    }

    try:
        with httpx.Client(timeout=30.0) as client:
            response = client.get(url, headers=headers)
            if response.status_code == 200:
                content = response.text
                if not content or not content.strip():
                    return None, "下载内容为空"
                return content, None
            return None, f"HTTP {response.status_code}"
    except Exception as e:
        return None, f"httpx 下载失败: {str(e)}"