"""使用 pyppeteer 下载 URL(支持 JS 渲染)""" import os import asyncio import tempfile from typing import Optional, Tuple from .common import ( USER_AGENT, CHROME_ARGS, HIDE_AUTOMATION_SCRIPT_PUPPETEER ) def download(url: str) -> Tuple[Optional[str], Optional[str]]: """ 使用 pyppeteer 下载 URL(支持 JS 渲染) Args: url: 目标 URL Returns: (content, error): content 成功时为 HTML 内容,失败时为 None error 成功时为 None,失败时为错误信息 """ try: from pyppeteer import launch except ImportError: return None, "pyppeteer 库未安装" async def _download(): pyppeteer_temp_dir = os.path.join(tempfile.gettempdir(), "pyppeteer_home") chromium_path = os.environ.get("LYXY_CHROMIUM_BINARY") if not chromium_path: os.environ["PYPPETEER_HOME"] = pyppeteer_temp_dir executable_path = chromium_path if (chromium_path and os.path.exists(chromium_path)) else None browser = None try: browser = await launch( headless=True, executablePath=executable_path, args=CHROME_ARGS ) page = await browser.newPage() await page.evaluateOnNewDocument(HIDE_AUTOMATION_SCRIPT_PUPPETEER) await page.setJavaScriptEnabled(True) await page.goto(url, {"waitUntil": "networkidle2", "timeout": 30000}) return await page.content() finally: if browser is not None: try: await browser.close() except Exception: pass try: content = asyncio.run(_download()) if not content or not content.strip(): return None, "下载内容为空" return content, None except Exception as e: return None, f"pyppeteer 下载失败: {str(e)}"