refactor: 简化代码,消除重复逻辑

- 删除 tests/test_readers/conftest.py 中重复的 temp_html fixture
- 为 generate_uv_command/generate_python_command 添加 include_pyarmor 参数
- 新增 generate_uv_args 函数用于生成 subprocess 可用的参数列表
- lyxy_document_reader.py 复用 generate_uv_args 函数
This commit is contained in:
2026-03-15 10:28:04 +08:00
parent 82b09614d3
commit a5c0b67360
3 changed files with 53 additions and 45 deletions

View File

@@ -92,7 +92,8 @@ def generate_uv_command(
dependencies: list,
input_path: str,
python_version: Optional[str] = None,
script_path: str = "scripts/lyxy_document_reader.py"
script_path: str = "scripts/lyxy_document_reader.py",
include_pyarmor: bool = True
) -> str:
"""
生成 uv run 命令。
@@ -102,6 +103,7 @@ def generate_uv_command(
input_path: 输入文件路径或 URL
python_version: 需要的 python 版本None 表示不指定
script_path: 脚本路径
include_pyarmor: 是否包含 pyarmor 依赖
Returns:
uv run 命令字符串
@@ -111,8 +113,8 @@ def generate_uv_command(
if python_version:
parts.append(f"--python {python_version}")
# 始终添加 pyarmor 依赖(混淆后脚本需要)
parts.append("--with pyarmor")
if include_pyarmor:
parts.append("--with pyarmor")
for dep in dependencies:
# 处理包含空格的依赖(如 unstructured[pdf]),需要加引号
@@ -126,10 +128,45 @@ def generate_uv_command(
return " ".join(parts)
def generate_uv_args(
dependencies: list,
script_path: str,
python_version: Optional[str] = None,
include_pyarmor: bool = True
) -> list:
"""
生成 uv run 命令参数列表(用于 subprocess.run
Args:
dependencies: 依赖包列表
script_path: 脚本路径
python_version: 需要的 python 版本None 表示不指定
include_pyarmor: 是否包含 pyarmor 依赖
Returns:
uv run 命令参数列表
"""
args = ["uv", "run"]
if python_version:
args.extend(["--python", python_version])
if include_pyarmor:
args.extend(["--with", "pyarmor"])
for dep in dependencies:
args.extend(["--with", dep])
args.append(script_path)
return args
def generate_python_command(
dependencies: list,
input_path: str,
script_path: str = "scripts/lyxy_document_reader.py"
script_path: str = "scripts/lyxy_document_reader.py",
include_pyarmor: bool = True
) -> Tuple[str, str]:
"""
生成 python 命令和 pip 安装命令。
@@ -138,14 +175,17 @@ def generate_python_command(
dependencies: 依赖包列表
input_path: 输入文件路径或 URL
script_path: 脚本路径
include_pyarmor: 是否包含 pyarmor 依赖
Returns:
(python_command, pip_command) 元组
"""
python_cmd = f"python {script_path} {input_path}"
# 构建 pip install 命令,处理带引号的依赖,始终包含 pyarmor
pip_parts = ["pip install", "pyarmor"]
# 构建 pip install 命令,处理带引号的依赖
pip_parts = ["pip install"]
if include_pyarmor:
pip_parts.append("pyarmor")
for dep in dependencies:
pip_parts.append(dep)
pip_cmd = " ".join(pip_parts)

View File

@@ -75,6 +75,7 @@ def main():
detect_file_type_light,
get_platform,
get_dependencies,
generate_uv_args,
)
from readers import READERS
@@ -93,19 +94,12 @@ def main():
python_version, dependencies = get_dependencies(reader_cls, platform_id)
# 生成 uv 命令参数列表
uv_args = ["uv", "run"]
if python_version:
uv_args.extend(["--python", python_version])
# 始终添加 pyarmor 依赖(混淆后脚本需要)
uv_args.extend(["--with", "pyarmor"])
for dep in dependencies:
uv_args.extend(["--with", dep])
# 目标脚本是 bootstrap.py
uv_args.append("scripts/bootstrap.py")
uv_args = generate_uv_args(
dependencies=dependencies,
script_path="scripts/bootstrap.py",
python_version=python_version,
include_pyarmor=True
)
# 添加所有命令行参数
uv_args.extend(sys.argv[1:])

View File

@@ -4,32 +4,6 @@ import pytest
from pathlib import Path
@pytest.fixture
def temp_html(tmp_path):
"""创建临时 HTML 文件的 fixture 工厂。
Args:
content: HTML 内容字符串
encoding: 文件编码,默认 'utf-8'
Returns:
str: 临时文件路径
"""
def _create_html(content="<html><body><p>Test</p></body></html>", encoding='utf-8'):
file_path = tmp_path / "test.html"
# 如果内容不包含完整的 HTML 结构,添加基本结构
if not content.strip().startswith('<html'):
content = f"<html><head><meta charset='{encoding}'></head><body>{content}</body></html>"
with open(file_path, 'w', encoding=encoding) as f:
f.write(content)
return str(file_path)
return _create_html
# 静态测试文件目录
FIXTURES_DIR = Path(__file__).parent / "fixtures"