feat: 新增测试运行器脚本 run_tests.py
- 新增根目录 run_tests.py,自动根据测试类型加载依赖 - 支持所有测试类型:pdf/docx/xlsx/pptx/html/xls/doc/ppt/cli/core/utils/all - 支持透传 pytest 参数(-v/--cov 等) - 补全 advice_generator.py 中的 DocReader 和 PptReader 映射 - 更新 README.md,简化测试命令说明
This commit is contained in:
204
README.md
204
README.md
@@ -115,10 +115,8 @@ python scripts/lyxy_document_reader.py "https://example.com"
|
||||
### 运行基础测试
|
||||
|
||||
```bash
|
||||
# 运行 CLI 测试(验证项目基本功能)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_cli/ -v
|
||||
# 使用 run_tests.py 自动加载依赖并运行测试
|
||||
python run_tests.py cli -v
|
||||
```
|
||||
|
||||
## 开发指南
|
||||
@@ -138,7 +136,7 @@ uv run \
|
||||
|
||||
### 如何测试
|
||||
|
||||
项目包含完整的测试套件,覆盖 CLI、核心模块、工具函数和所有 Reader 实现。根据测试类型使用对应的 `uv run --with` 命令。
|
||||
项目包含完整的测试套件,覆盖 CLI、核心模块、工具函数和所有 Reader 实现。使用 `run_tests.py` 自动加载对应依赖并运行测试。
|
||||
|
||||
#### 测试目录结构
|
||||
- tests/test_cli/ - CLI 功能测试
|
||||
@@ -146,187 +144,37 @@ uv run \
|
||||
- tests/test_readers/ - 各格式 Reader 测试
|
||||
- tests/test_utils/ - 工具函数测试(file_detection, encoding_detection)
|
||||
|
||||
#### 运行所有测试
|
||||
#### run_tests.py 使用说明
|
||||
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with pytest-cov \
|
||||
--with docling \
|
||||
--with "unstructured[pdf]" \
|
||||
--with "unstructured[docx]" \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "unstructured[pptx]" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with "markitdown[docx]" \
|
||||
--with "markitdown[xlsx]" \
|
||||
--with "markitdown[pptx]" \
|
||||
--with "markitdown[xls]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
--with pypandoc-binary \
|
||||
--with python-docx \
|
||||
--with python-pptx \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
--with xlrd \
|
||||
--with olefile \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
--with pyppeteer \
|
||||
--with selenium \
|
||||
pytest
|
||||
```
|
||||
# 查看帮助
|
||||
python run_tests.py -h
|
||||
|
||||
注:由于依赖较多,也可以按测试类别分别运行(见下文)。
|
||||
# 运行所有测试
|
||||
python run_tests.py all
|
||||
|
||||
#### 测试 DOCX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[docx]" \
|
||||
--with "markitdown[docx]" \
|
||||
--with pypandoc-binary \
|
||||
--with python-docx \
|
||||
--with markdownify \
|
||||
pytest tests/test_readers/test_docx/
|
||||
```
|
||||
# 运行特定类型测试
|
||||
python run_tests.py pdf
|
||||
python run_tests.py docx
|
||||
python run_tests.py xlsx
|
||||
python run_tests.py pptx
|
||||
python run_tests.py html
|
||||
python run_tests.py xls
|
||||
python run_tests.py doc
|
||||
python run_tests.py ppt
|
||||
python run_tests.py cli
|
||||
python run_tests.py core
|
||||
python run_tests.py utils
|
||||
|
||||
#### 测试 XLSX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "markitdown[xlsx]" \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
pytest tests/test_readers/test_xlsx/
|
||||
```
|
||||
|
||||
#### 测试 PPTX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[pptx]" \
|
||||
--with "markitdown[pptx]" \
|
||||
--with python-pptx \
|
||||
--with markdownify \
|
||||
pytest tests/test_readers/test_pptx/
|
||||
```
|
||||
|
||||
#### 测试 PDF reader
|
||||
```bash
|
||||
# 默认命令(macOS ARM、Linux、Windows)
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[pdf]" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
pytest tests/test_readers/test_pdf/
|
||||
|
||||
# macOS x86_64 (Intel) 特殊命令
|
||||
uv run \
|
||||
--python 3.12 \
|
||||
--with pytest \
|
||||
--with "docling==2.40.0" \
|
||||
--with "docling-parse==4.0.0" \
|
||||
--with "numpy<2" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
pytest tests/test_readers/test_pdf/
|
||||
```
|
||||
|
||||
#### 测试 HTML reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with markitdown \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
pytest tests/test_readers/test_html/
|
||||
```
|
||||
|
||||
#### 测试 XLS reader(旧格式,使用静态文件)
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "markitdown[xls]" \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
--with xlrd \
|
||||
pytest tests/test_readers/test_xls/
|
||||
```
|
||||
|
||||
#### 测试 Core 模块
|
||||
```bash
|
||||
# 测试核心模块(无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_core/
|
||||
```
|
||||
|
||||
#### 测试 Utils 模块
|
||||
```bash
|
||||
# 测试工具函数(无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_utils/
|
||||
```
|
||||
|
||||
#### 测试 HTML 下载器
|
||||
```bash
|
||||
# 测试 HTML 下载器
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with markitdown \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
--with pyppeteer \
|
||||
--with selenium \
|
||||
pytest tests/test_readers/test_html_downloader.py
|
||||
```
|
||||
|
||||
#### 运行特定测试文件或方法
|
||||
```bash
|
||||
# 运行特定测试文件(CLI 测试无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_cli/test_main.py
|
||||
|
||||
# 运行特定测试类或方法
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
pytest tests/test_cli/test_main.py::TestCLIDefaultOutput::test_default_output_docx
|
||||
# 透传 pytest 参数
|
||||
python run_tests.py pdf -v
|
||||
python run_tests.py pdf --cov=scripts
|
||||
python run_tests.py pdf tests/test_readers/test_pdf/test_docling_pdf.py
|
||||
```
|
||||
|
||||
#### 查看测试覆盖率
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with pytest-cov \
|
||||
pytest --cov=scripts --cov-report=term-missing
|
||||
python run_tests.py all --with pytest-cov --cov=scripts --cov-report=term-missing
|
||||
```
|
||||
|
||||
### 代码规范
|
||||
|
||||
64
openspec/specs/test-runner/spec.md
Normal file
64
openspec/specs/test-runner/spec.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# Test Runner Specification
|
||||
|
||||
## Purpose
|
||||
|
||||
定义自动化测试运行器的功能规范,包括测试类型选择、依赖自动加载、pytest 参数透传等。
|
||||
|
||||
## Requirements
|
||||
|
||||
### Requirement: 测试运行器支持指定测试类型
|
||||
测试运行器 SHALL 支持通过命令行参数指定测试类型,自动加载对应依赖并运行 pytest。
|
||||
|
||||
#### Scenario: 运行 PDF 测试
|
||||
- **WHEN** 用户执行 `python run_tests.py pdf`
|
||||
- **THEN** 自动加载 config.DEPENDENCIES["pdf"] 中的依赖
|
||||
- **AND** 运行 tests/test_readers/test_pdf/ 目录下的测试
|
||||
|
||||
#### Scenario: 运行 DOCX 测试
|
||||
- **WHEN** 用户执行 `python run_tests.py docx`
|
||||
- **THEN** 自动加载 config.DEPENDENCIES["docx"] 中的依赖
|
||||
- **AND** 运行 tests/test_readers/test_docx/ 目录下的测试
|
||||
|
||||
#### Scenario: 运行 CLI 测试(无特殊依赖)
|
||||
- **WHEN** 用户执行 `python run_tests.py cli`
|
||||
- **THEN** 仅加载 pytest 依赖
|
||||
- **AND** 运行 tests/test_cli/ 目录下的测试
|
||||
|
||||
#### Scenario: 运行所有测试
|
||||
- **WHEN** 用户执行 `python run_tests.py all`
|
||||
- **THEN** 加载 config.DEPENDENCIES 中所有类型的依赖(去重)
|
||||
- **AND** 运行 tests/ 目录下的所有测试
|
||||
|
||||
### Requirement: 测试运行器支持透传 pytest 参数
|
||||
测试运行器 SHALL 支持将额外的命令行参数透传给 pytest。
|
||||
|
||||
#### Scenario: 传递 -v 参数
|
||||
- **WHEN** 用户执行 `python run_tests.py pdf -v`
|
||||
- **THEN** pytest 以 verbose 模式运行
|
||||
|
||||
#### Scenario: 传递 --cov 参数
|
||||
- **WHEN** 用户执行 `python run_tests.py pdf --cov=scripts`
|
||||
- **THEN** pytest 生成测试覆盖率报告
|
||||
|
||||
#### Scenario: 运行特定测试文件
|
||||
- **WHEN** 用户执行 `python run_tests.py pdf tests/test_readers/test_pdf/test_docling_pdf.py`
|
||||
- **THEN** 仅运行指定的测试文件
|
||||
|
||||
### Requirement: 测试运行器支持平台特定配置
|
||||
测试运行器 SHALL 根据当前平台自动选择对应的依赖配置(如 Darwin-x86_64)。
|
||||
|
||||
#### Scenario: 在 Darwin-x86_64 平台运行 PDF 测试
|
||||
- **WHEN** 用户在 Darwin-x86_64 平台执行 `python run_tests.py pdf`
|
||||
- **THEN** 使用 config.DEPENDENCIES["pdf"]["Darwin-x86_64"] 配置(如果存在)
|
||||
- **AND** 使用 python 3.12(如配置中指定)
|
||||
|
||||
### Requirement: advice_generator 包含完整 Reader 映射
|
||||
advice_generator.py 中的 _READER_KEY_MAP SHALL 包含所有 Reader 类的映射,包括 DocReader 和 PptReader。
|
||||
|
||||
#### Scenario: DocReader 映射存在
|
||||
- **WHEN** 查询 _READER_KEY_MAP[DocReader]
|
||||
- **THEN** 返回 "doc"
|
||||
|
||||
#### Scenario: PptReader 映射存在
|
||||
- **WHEN** 查询 _READER_KEY_MAP[PptReader]
|
||||
- **THEN** 返回 "ppt"
|
||||
220
run_tests.py
Normal file
220
run_tests.py
Normal file
@@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env python3
|
||||
"""测试运行器 - 自动根据测试类型加载依赖并运行 pytest"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 确定项目根目录和脚本路径
|
||||
script_file = Path(__file__).resolve()
|
||||
project_root = script_file.parent
|
||||
scripts_dir = project_root / "scripts"
|
||||
bootstrap_path = str(scripts_dir / "bootstrap.py")
|
||||
|
||||
# 将 scripts/ 目录添加到 sys.path
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.append(str(scripts_dir))
|
||||
|
||||
# 抑制第三方库日志
|
||||
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
||||
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
||||
os.environ["TQDM_DISABLE"] = "1"
|
||||
|
||||
# 测试类型映射
|
||||
_TEST_TYPES = {
|
||||
# 文件类型测试(有依赖配置)
|
||||
"pdf": {"key": "pdf", "path": "tests/test_readers/test_pdf/"},
|
||||
"docx": {"key": "docx", "path": "tests/test_readers/test_docx/"},
|
||||
"xlsx": {"key": "xlsx", "path": "tests/test_readers/test_xlsx/"},
|
||||
"pptx": {"key": "pptx", "path": "tests/test_readers/test_pptx/"},
|
||||
"html": {"key": "html", "path": "tests/test_readers/test_html/"},
|
||||
"xls": {"key": "xls", "path": "tests/test_readers/test_xls/"},
|
||||
"doc": {"key": "doc", "path": "tests/test_readers/test_doc/"},
|
||||
"ppt": {"key": "ppt", "path": "tests/test_readers/test_ppt/"},
|
||||
# 核心测试(无特殊依赖)
|
||||
"cli": {"key": None, "path": "tests/test_cli/"},
|
||||
"core": {"key": None, "path": "tests/test_core/"},
|
||||
"utils": {"key": None, "path": "tests/test_utils/"},
|
||||
# 所有测试(合并所有依赖)
|
||||
"all": {"key": "all", "path": "tests/"},
|
||||
}
|
||||
|
||||
|
||||
def get_dependencies_for_type(test_type: str, platform_id: str):
|
||||
"""
|
||||
获取指定测试类型的依赖配置。
|
||||
|
||||
Args:
|
||||
test_type: 测试类型(pdf/docx/.../all)
|
||||
platform_id: 平台标识
|
||||
|
||||
Returns:
|
||||
(python_version, dependencies) 元组
|
||||
"""
|
||||
from config import DEPENDENCIES
|
||||
|
||||
config = _TEST_TYPES.get(test_type)
|
||||
if not config:
|
||||
return None, []
|
||||
|
||||
key = config["key"]
|
||||
|
||||
if key is None:
|
||||
# 无特殊依赖的测试类型(cli/core/utils)
|
||||
return None, []
|
||||
|
||||
if key == "all":
|
||||
# 收集所有类型的依赖并去重
|
||||
python_version = None
|
||||
all_deps = set()
|
||||
for type_key, type_config in DEPENDENCIES.items():
|
||||
# 先尝试特定平台配置
|
||||
if platform_id in type_config:
|
||||
cfg = type_config[platform_id]
|
||||
elif "default" in type_config:
|
||||
cfg = type_config["default"]
|
||||
else:
|
||||
continue
|
||||
# 记录 python 版本(优先使用有特殊要求的)
|
||||
if cfg.get("python"):
|
||||
python_version = cfg["python"]
|
||||
# 收集依赖
|
||||
for dep in cfg.get("dependencies", []):
|
||||
all_deps.add(dep)
|
||||
return python_version, list(all_deps)
|
||||
|
||||
# 单个类型的依赖
|
||||
if key not in DEPENDENCIES:
|
||||
return None, []
|
||||
|
||||
type_config = DEPENDENCIES[key]
|
||||
if platform_id in type_config:
|
||||
cfg = type_config[platform_id]
|
||||
elif "default" in type_config:
|
||||
cfg = type_config["default"]
|
||||
else:
|
||||
return None, []
|
||||
|
||||
return cfg.get("python"), cfg.get("dependencies", [])
|
||||
|
||||
|
||||
def generate_uv_args(
|
||||
dependencies: list,
|
||||
test_path: str,
|
||||
pytest_args: list,
|
||||
python_version: str = None,
|
||||
):
|
||||
"""
|
||||
生成 uv run 命令参数列表(用于 subprocess.run)。
|
||||
|
||||
Args:
|
||||
dependencies: 依赖包列表
|
||||
test_path: 测试路径
|
||||
pytest_args: 透传给 pytest 的参数
|
||||
python_version: 需要的 python 版本,None 表示不指定
|
||||
|
||||
Returns:
|
||||
uv run 命令参数列表
|
||||
"""
|
||||
args = ["uv", "run"]
|
||||
|
||||
if python_version:
|
||||
args.extend(["--python", python_version])
|
||||
|
||||
# 添加 pytest
|
||||
args.extend(["--with", "pytest"])
|
||||
|
||||
# 添加其他依赖
|
||||
for dep in dependencies:
|
||||
args.extend(["--with", dep])
|
||||
|
||||
# 添加 pytest 命令
|
||||
args.append("pytest")
|
||||
|
||||
# 添加测试路径
|
||||
args.append(test_path)
|
||||
|
||||
# 添加透传的 pytest 参数
|
||||
args.extend(pytest_args)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
"""主函数:解析参数并运行测试"""
|
||||
# 解析命令行参数
|
||||
parser = argparse.ArgumentParser(
|
||||
description="自动根据测试类型加载依赖并运行 pytest",
|
||||
usage="%(prog)s <test_type> [pytest_args...]",
|
||||
)
|
||||
parser.add_argument(
|
||||
"test_type",
|
||||
choices=list(_TEST_TYPES.keys()),
|
||||
help="测试类型: " + ", ".join(_TEST_TYPES.keys()),
|
||||
)
|
||||
parser.add_argument(
|
||||
"pytest_args",
|
||||
nargs=argparse.REMAINDER,
|
||||
help="透传给 pytest 的参数(如 -v, --cov 等)",
|
||||
)
|
||||
|
||||
# 如果没有参数,显示帮助
|
||||
if len(sys.argv) == 1:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# 特殊处理:如果第一个参数是帮助选项
|
||||
if sys.argv[1] in ("-h", "--help"):
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
# 使用 parse_known_args 来正确处理透传参数
|
||||
# 因为 argparse.REMAINDER 会吃掉 --help,我们手动处理
|
||||
test_type = sys.argv[1]
|
||||
pytest_args = sys.argv[2:]
|
||||
|
||||
# 验证 test_type
|
||||
if test_type not in _TEST_TYPES:
|
||||
print(f"错误: 未知的测试类型 '{test_type}'")
|
||||
print(f"可用类型: {', '.join(_TEST_TYPES.keys())}")
|
||||
sys.exit(1)
|
||||
|
||||
# 检测 uv 是否可用
|
||||
uv_path = shutil.which("uv")
|
||||
if not uv_path:
|
||||
print("错误: 未找到 uv,请先安装 uv")
|
||||
sys.exit(1)
|
||||
|
||||
# 获取测试配置
|
||||
test_config = _TEST_TYPES[test_type]
|
||||
test_path = test_config["path"]
|
||||
|
||||
# 导入需要的模块
|
||||
from core.advice_generator import get_platform
|
||||
|
||||
# 获取平台和依赖配置
|
||||
platform_id = get_platform()
|
||||
python_version, dependencies = get_dependencies_for_type(test_type, platform_id)
|
||||
|
||||
# 生成 uv 命令参数
|
||||
uv_args = generate_uv_args(
|
||||
dependencies=dependencies,
|
||||
test_path=test_path,
|
||||
pytest_args=pytest_args,
|
||||
python_version=python_version,
|
||||
)
|
||||
|
||||
# 设置环境变量
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = str(project_root)
|
||||
|
||||
# 执行测试
|
||||
result = subprocess.run(uv_args, env=env, cwd=str(project_root))
|
||||
sys.exit(result.returncode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -13,6 +13,8 @@ from readers import (
|
||||
PptxReader,
|
||||
HtmlReader,
|
||||
XlsReader,
|
||||
DocReader,
|
||||
PptReader,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,6 +26,8 @@ _READER_KEY_MAP: Dict[Type[BaseReader], str] = {
|
||||
PptxReader: "pptx",
|
||||
HtmlReader: "html",
|
||||
XlsReader: "xls",
|
||||
DocReader: "doc",
|
||||
PptReader: "ppt",
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user