feat: 新增测试运行器脚本 run_tests.py
- 新增根目录 run_tests.py,自动根据测试类型加载依赖 - 支持所有测试类型:pdf/docx/xlsx/pptx/html/xls/doc/ppt/cli/core/utils/all - 支持透传 pytest 参数(-v/--cov 等) - 补全 advice_generator.py 中的 DocReader 和 PptReader 映射 - 更新 README.md,简化测试命令说明
This commit is contained in:
204
README.md
204
README.md
@@ -115,10 +115,8 @@ python scripts/lyxy_document_reader.py "https://example.com"
|
||||
### 运行基础测试
|
||||
|
||||
```bash
|
||||
# 运行 CLI 测试(验证项目基本功能)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_cli/ -v
|
||||
# 使用 run_tests.py 自动加载依赖并运行测试
|
||||
python run_tests.py cli -v
|
||||
```
|
||||
|
||||
## 开发指南
|
||||
@@ -138,7 +136,7 @@ uv run \
|
||||
|
||||
### 如何测试
|
||||
|
||||
项目包含完整的测试套件,覆盖 CLI、核心模块、工具函数和所有 Reader 实现。根据测试类型使用对应的 `uv run --with` 命令。
|
||||
项目包含完整的测试套件,覆盖 CLI、核心模块、工具函数和所有 Reader 实现。使用 `run_tests.py` 自动加载对应依赖并运行测试。
|
||||
|
||||
#### 测试目录结构
|
||||
- tests/test_cli/ - CLI 功能测试
|
||||
@@ -146,187 +144,37 @@ uv run \
|
||||
- tests/test_readers/ - 各格式 Reader 测试
|
||||
- tests/test_utils/ - 工具函数测试(file_detection, encoding_detection)
|
||||
|
||||
#### 运行所有测试
|
||||
#### run_tests.py 使用说明
|
||||
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with pytest-cov \
|
||||
--with docling \
|
||||
--with "unstructured[pdf]" \
|
||||
--with "unstructured[docx]" \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "unstructured[pptx]" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with "markitdown[docx]" \
|
||||
--with "markitdown[xlsx]" \
|
||||
--with "markitdown[pptx]" \
|
||||
--with "markitdown[xls]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
--with pypandoc-binary \
|
||||
--with python-docx \
|
||||
--with python-pptx \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
--with xlrd \
|
||||
--with olefile \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
--with pyppeteer \
|
||||
--with selenium \
|
||||
pytest
|
||||
```
|
||||
# 查看帮助
|
||||
python run_tests.py -h
|
||||
|
||||
注:由于依赖较多,也可以按测试类别分别运行(见下文)。
|
||||
# 运行所有测试
|
||||
python run_tests.py all
|
||||
|
||||
#### 测试 DOCX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[docx]" \
|
||||
--with "markitdown[docx]" \
|
||||
--with pypandoc-binary \
|
||||
--with python-docx \
|
||||
--with markdownify \
|
||||
pytest tests/test_readers/test_docx/
|
||||
```
|
||||
# 运行特定类型测试
|
||||
python run_tests.py pdf
|
||||
python run_tests.py docx
|
||||
python run_tests.py xlsx
|
||||
python run_tests.py pptx
|
||||
python run_tests.py html
|
||||
python run_tests.py xls
|
||||
python run_tests.py doc
|
||||
python run_tests.py ppt
|
||||
python run_tests.py cli
|
||||
python run_tests.py core
|
||||
python run_tests.py utils
|
||||
|
||||
#### 测试 XLSX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "markitdown[xlsx]" \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
pytest tests/test_readers/test_xlsx/
|
||||
```
|
||||
|
||||
#### 测试 PPTX reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[pptx]" \
|
||||
--with "markitdown[pptx]" \
|
||||
--with python-pptx \
|
||||
--with markdownify \
|
||||
pytest tests/test_readers/test_pptx/
|
||||
```
|
||||
|
||||
#### 测试 PDF reader
|
||||
```bash
|
||||
# 默认命令(macOS ARM、Linux、Windows)
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
--with "unstructured[pdf]" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
pytest tests/test_readers/test_pdf/
|
||||
|
||||
# macOS x86_64 (Intel) 特殊命令
|
||||
uv run \
|
||||
--python 3.12 \
|
||||
--with pytest \
|
||||
--with "docling==2.40.0" \
|
||||
--with "docling-parse==4.0.0" \
|
||||
--with "numpy<2" \
|
||||
--with "markitdown[pdf]" \
|
||||
--with pypdf \
|
||||
--with markdownify \
|
||||
--with reportlab \
|
||||
pytest tests/test_readers/test_pdf/
|
||||
```
|
||||
|
||||
#### 测试 HTML reader
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with markitdown \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
pytest tests/test_readers/test_html/
|
||||
```
|
||||
|
||||
#### 测试 XLS reader(旧格式,使用静态文件)
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with "unstructured[xlsx]" \
|
||||
--with "markitdown[xls]" \
|
||||
--with pandas \
|
||||
--with tabulate \
|
||||
--with xlrd \
|
||||
pytest tests/test_readers/test_xls/
|
||||
```
|
||||
|
||||
#### 测试 Core 模块
|
||||
```bash
|
||||
# 测试核心模块(无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_core/
|
||||
```
|
||||
|
||||
#### 测试 Utils 模块
|
||||
```bash
|
||||
# 测试工具函数(无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_utils/
|
||||
```
|
||||
|
||||
#### 测试 HTML 下载器
|
||||
```bash
|
||||
# 测试 HTML 下载器
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with trafilatura \
|
||||
--with domscribe \
|
||||
--with markitdown \
|
||||
--with html2text \
|
||||
--with beautifulsoup4 \
|
||||
--with httpx \
|
||||
--with chardet \
|
||||
--with pyppeteer \
|
||||
--with selenium \
|
||||
pytest tests/test_readers/test_html_downloader.py
|
||||
```
|
||||
|
||||
#### 运行特定测试文件或方法
|
||||
```bash
|
||||
# 运行特定测试文件(CLI 测试无需额外依赖)
|
||||
uv run \
|
||||
--with pytest \
|
||||
pytest tests/test_cli/test_main.py
|
||||
|
||||
# 运行特定测试类或方法
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with docling \
|
||||
pytest tests/test_cli/test_main.py::TestCLIDefaultOutput::test_default_output_docx
|
||||
# 透传 pytest 参数
|
||||
python run_tests.py pdf -v
|
||||
python run_tests.py pdf --cov=scripts
|
||||
python run_tests.py pdf tests/test_readers/test_pdf/test_docling_pdf.py
|
||||
```
|
||||
|
||||
#### 查看测试覆盖率
|
||||
```bash
|
||||
uv run \
|
||||
--with pytest \
|
||||
--with pytest-cov \
|
||||
pytest --cov=scripts --cov-report=term-missing
|
||||
python run_tests.py all --with pytest-cov --cov=scripts --cov-report=term-missing
|
||||
```
|
||||
|
||||
### 代码规范
|
||||
|
||||
Reference in New Issue
Block a user