refactor: 简化代码,消除重复逻辑
- 删除 tests/test_readers/conftest.py 中重复的 temp_html fixture - 为 generate_uv_command/generate_python_command 添加 include_pyarmor 参数 - 新增 generate_uv_args 函数用于生成 subprocess 可用的参数列表 - lyxy_document_reader.py 复用 generate_uv_args 函数
This commit is contained in:
@@ -92,7 +92,8 @@ def generate_uv_command(
|
|||||||
dependencies: list,
|
dependencies: list,
|
||||||
input_path: str,
|
input_path: str,
|
||||||
python_version: Optional[str] = None,
|
python_version: Optional[str] = None,
|
||||||
script_path: str = "scripts/lyxy_document_reader.py"
|
script_path: str = "scripts/lyxy_document_reader.py",
|
||||||
|
include_pyarmor: bool = True
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
生成 uv run 命令。
|
生成 uv run 命令。
|
||||||
@@ -102,6 +103,7 @@ def generate_uv_command(
|
|||||||
input_path: 输入文件路径或 URL
|
input_path: 输入文件路径或 URL
|
||||||
python_version: 需要的 python 版本,None 表示不指定
|
python_version: 需要的 python 版本,None 表示不指定
|
||||||
script_path: 脚本路径
|
script_path: 脚本路径
|
||||||
|
include_pyarmor: 是否包含 pyarmor 依赖
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
uv run 命令字符串
|
uv run 命令字符串
|
||||||
@@ -111,8 +113,8 @@ def generate_uv_command(
|
|||||||
if python_version:
|
if python_version:
|
||||||
parts.append(f"--python {python_version}")
|
parts.append(f"--python {python_version}")
|
||||||
|
|
||||||
# 始终添加 pyarmor 依赖(混淆后脚本需要)
|
if include_pyarmor:
|
||||||
parts.append("--with pyarmor")
|
parts.append("--with pyarmor")
|
||||||
|
|
||||||
for dep in dependencies:
|
for dep in dependencies:
|
||||||
# 处理包含空格的依赖(如 unstructured[pdf]),需要加引号
|
# 处理包含空格的依赖(如 unstructured[pdf]),需要加引号
|
||||||
@@ -126,10 +128,45 @@ def generate_uv_command(
|
|||||||
return " ".join(parts)
|
return " ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_uv_args(
|
||||||
|
dependencies: list,
|
||||||
|
script_path: str,
|
||||||
|
python_version: Optional[str] = None,
|
||||||
|
include_pyarmor: bool = True
|
||||||
|
) -> list:
|
||||||
|
"""
|
||||||
|
生成 uv run 命令参数列表(用于 subprocess.run)。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dependencies: 依赖包列表
|
||||||
|
script_path: 脚本路径
|
||||||
|
python_version: 需要的 python 版本,None 表示不指定
|
||||||
|
include_pyarmor: 是否包含 pyarmor 依赖
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
uv run 命令参数列表
|
||||||
|
"""
|
||||||
|
args = ["uv", "run"]
|
||||||
|
|
||||||
|
if python_version:
|
||||||
|
args.extend(["--python", python_version])
|
||||||
|
|
||||||
|
if include_pyarmor:
|
||||||
|
args.extend(["--with", "pyarmor"])
|
||||||
|
|
||||||
|
for dep in dependencies:
|
||||||
|
args.extend(["--with", dep])
|
||||||
|
|
||||||
|
args.append(script_path)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
def generate_python_command(
|
def generate_python_command(
|
||||||
dependencies: list,
|
dependencies: list,
|
||||||
input_path: str,
|
input_path: str,
|
||||||
script_path: str = "scripts/lyxy_document_reader.py"
|
script_path: str = "scripts/lyxy_document_reader.py",
|
||||||
|
include_pyarmor: bool = True
|
||||||
) -> Tuple[str, str]:
|
) -> Tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
生成 python 命令和 pip 安装命令。
|
生成 python 命令和 pip 安装命令。
|
||||||
@@ -138,14 +175,17 @@ def generate_python_command(
|
|||||||
dependencies: 依赖包列表
|
dependencies: 依赖包列表
|
||||||
input_path: 输入文件路径或 URL
|
input_path: 输入文件路径或 URL
|
||||||
script_path: 脚本路径
|
script_path: 脚本路径
|
||||||
|
include_pyarmor: 是否包含 pyarmor 依赖
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
(python_command, pip_command) 元组
|
(python_command, pip_command) 元组
|
||||||
"""
|
"""
|
||||||
python_cmd = f"python {script_path} {input_path}"
|
python_cmd = f"python {script_path} {input_path}"
|
||||||
|
|
||||||
# 构建 pip install 命令,处理带引号的依赖,始终包含 pyarmor
|
# 构建 pip install 命令,处理带引号的依赖
|
||||||
pip_parts = ["pip install", "pyarmor"]
|
pip_parts = ["pip install"]
|
||||||
|
if include_pyarmor:
|
||||||
|
pip_parts.append("pyarmor")
|
||||||
for dep in dependencies:
|
for dep in dependencies:
|
||||||
pip_parts.append(dep)
|
pip_parts.append(dep)
|
||||||
pip_cmd = " ".join(pip_parts)
|
pip_cmd = " ".join(pip_parts)
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ def main():
|
|||||||
detect_file_type_light,
|
detect_file_type_light,
|
||||||
get_platform,
|
get_platform,
|
||||||
get_dependencies,
|
get_dependencies,
|
||||||
|
generate_uv_args,
|
||||||
)
|
)
|
||||||
from readers import READERS
|
from readers import READERS
|
||||||
|
|
||||||
@@ -93,19 +94,12 @@ def main():
|
|||||||
python_version, dependencies = get_dependencies(reader_cls, platform_id)
|
python_version, dependencies = get_dependencies(reader_cls, platform_id)
|
||||||
|
|
||||||
# 生成 uv 命令参数列表
|
# 生成 uv 命令参数列表
|
||||||
uv_args = ["uv", "run"]
|
uv_args = generate_uv_args(
|
||||||
|
dependencies=dependencies,
|
||||||
if python_version:
|
script_path="scripts/bootstrap.py",
|
||||||
uv_args.extend(["--python", python_version])
|
python_version=python_version,
|
||||||
|
include_pyarmor=True
|
||||||
# 始终添加 pyarmor 依赖(混淆后脚本需要)
|
)
|
||||||
uv_args.extend(["--with", "pyarmor"])
|
|
||||||
|
|
||||||
for dep in dependencies:
|
|
||||||
uv_args.extend(["--with", dep])
|
|
||||||
|
|
||||||
# 目标脚本是 bootstrap.py
|
|
||||||
uv_args.append("scripts/bootstrap.py")
|
|
||||||
|
|
||||||
# 添加所有命令行参数
|
# 添加所有命令行参数
|
||||||
uv_args.extend(sys.argv[1:])
|
uv_args.extend(sys.argv[1:])
|
||||||
|
|||||||
@@ -4,32 +4,6 @@ import pytest
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def temp_html(tmp_path):
|
|
||||||
"""创建临时 HTML 文件的 fixture 工厂。
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: HTML 内容字符串
|
|
||||||
encoding: 文件编码,默认 'utf-8'
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: 临时文件路径
|
|
||||||
"""
|
|
||||||
def _create_html(content="<html><body><p>Test</p></body></html>", encoding='utf-8'):
|
|
||||||
file_path = tmp_path / "test.html"
|
|
||||||
|
|
||||||
# 如果内容不包含完整的 HTML 结构,添加基本结构
|
|
||||||
if not content.strip().startswith('<html'):
|
|
||||||
content = f"<html><head><meta charset='{encoding}'></head><body>{content}</body></html>"
|
|
||||||
|
|
||||||
with open(file_path, 'w', encoding=encoding) as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
return str(file_path)
|
|
||||||
|
|
||||||
return _create_html
|
|
||||||
|
|
||||||
|
|
||||||
# 静态测试文件目录
|
# 静态测试文件目录
|
||||||
FIXTURES_DIR = Path(__file__).parent / "fixtures"
|
FIXTURES_DIR = Path(__file__).parent / "fixtures"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user