"""使用 selenium 下载 URL(支持 JS 渲染)""" import os from typing import Optional, Tuple from .common import ( USER_AGENT, CHROME_ARGS, HIDE_AUTOMATION_SCRIPT ) def download(url: str) -> Tuple[Optional[str], Optional[str]]: """ 使用 selenium 下载 URL(支持 JS 渲染) Args: url: 目标 URL Returns: (content, error): content 成功时为 HTML 内容,失败时为 None error 成功时为 None,失败时为错误信息 """ try: from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait except ImportError: return None, "selenium 库未安装" driver_path = os.environ.get("LYXY_CHROMIUM_DRIVER") binary_path = os.environ.get("LYXY_CHROMIUM_BINARY") if not driver_path or not os.path.exists(driver_path): return None, "LYXY_CHROMIUM_DRIVER 环境变量未设置或文件不存在" if not binary_path or not os.path.exists(binary_path): return None, "LYXY_CHROMIUM_BINARY 环境变量未设置或文件不存在" chrome_options = Options() chrome_options.binary_location = binary_path chrome_options.add_argument("--headless=new") for arg in CHROME_ARGS: chrome_options.add_argument(arg) # 隐藏自动化特征 chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option("useAutomationExtension", False) driver = None try: import time service = Service(driver_path) driver = webdriver.Chrome(service=service, options=chrome_options) # 隐藏 webdriver 属性 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": HIDE_AUTOMATION_SCRIPT }) driver.get(url) # 等待页面内容稳定 WebDriverWait(driver, 30).until( lambda d: d.execute_script("return document.readyState") == "complete" ) last_len = 0 stable_count = 0 for _ in range(30): current_len = len(driver.page_source) if current_len == last_len: stable_count += 1 if stable_count >= 2: break else: stable_count = 0 last_len = current_len time.sleep(0.5) content = driver.page_source if not content or not content.strip(): return None, "下载内容为空" return content, None except Exception as e: return None, f"selenium 下载失败: {str(e)}" finally: if driver is not None: try: driver.quit() except Exception: pass