- 更新 openspec/config.yaml 中 git 任务相关说明 - 将 scripts.core.* 改为 core.*,scripts.readers.* 改为 readers.* - 优化 lyxy_document_reader.py 中 sys.path 设置方式 - 同步更新所有测试文件的导入路径
230 lines
5.9 KiB
Python
230 lines
5.9 KiB
Python
"""测试配置和共享 fixtures。"""
|
||
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
# 将 scripts/ 目录添加到 sys.path(必须在最顶部,在其他导入之前)
|
||
project_root = Path(__file__).resolve().parent.parent # tests/ 的父目录是项目根目录
|
||
scripts_dir = project_root / "scripts"
|
||
if str(scripts_dir) not in sys.path:
|
||
sys.path.insert(0, str(scripts_dir))
|
||
|
||
import pytest
|
||
from readers import READERS
|
||
|
||
|
||
@pytest.fixture
|
||
def all_readers():
|
||
"""返回所有 Reader 实例的列表。"""
|
||
return [ReaderCls() for ReaderCls in READERS]
|
||
|
||
|
||
@pytest.fixture
|
||
def sample_markdown():
|
||
"""示例 Markdown 文本。"""
|
||
return """# 标题
|
||
|
||
这是一段测试文本。
|
||
|
||
## 子标题
|
||
|
||
- 列表项 1
|
||
- 列表项 2
|
||
|
||
### 另一个标题
|
||
|
||
这是更多的文本。
|
||
"""
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_docx(tmp_path):
|
||
"""创建临时 DOCX 文件的 fixture 工厂。
|
||
|
||
Args:
|
||
paragraphs: 段落文本列表
|
||
headings: 标题列表,格式为 [(level, text), ...]
|
||
table_data: 表格数据,格式为 [[cell1, cell2], [cell3, cell4]]
|
||
list_items: 列表项列表
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_docx(paragraphs=None, headings=None, table_data=None, list_items=None):
|
||
try:
|
||
from docx import Document
|
||
except ImportError:
|
||
pytest.skip("python-docx 未安装")
|
||
|
||
doc = Document()
|
||
|
||
# 添加标题
|
||
if headings:
|
||
for level, text in headings:
|
||
doc.add_heading(text, level=level)
|
||
|
||
# 添加段落
|
||
if paragraphs:
|
||
for para_text in paragraphs:
|
||
doc.add_paragraph(para_text)
|
||
|
||
# 添加表格
|
||
if table_data:
|
||
table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
|
||
for i, row_data in enumerate(table_data):
|
||
for j, cell_text in enumerate(row_data):
|
||
table.rows[i].cells[j].text = str(cell_text)
|
||
|
||
# 添加列表项
|
||
if list_items:
|
||
for item in list_items:
|
||
doc.add_paragraph(item, style='List Bullet')
|
||
|
||
file_path = tmp_path / "test.docx"
|
||
doc.save(str(file_path))
|
||
return str(file_path)
|
||
|
||
return _create_docx
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_pdf(tmp_path):
|
||
"""创建临时 PDF 文件的 fixture 工厂。
|
||
|
||
Args:
|
||
text: PDF 文本内容
|
||
lines: 文本行列表
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_pdf(text=None, lines=None):
|
||
try:
|
||
from reportlab.pdfgen import canvas
|
||
from reportlab.lib.pagesizes import letter
|
||
from reportlab.pdfbase import pdfmetrics
|
||
from reportlab.pdfbase.ttfonts import TTFont
|
||
except ImportError:
|
||
pytest.skip("reportlab 未安装")
|
||
|
||
file_path = tmp_path / "test.pdf"
|
||
c = canvas.Canvas(str(file_path), pagesize=letter)
|
||
|
||
# 尝试注册中文字体(如果可用)
|
||
try:
|
||
# 使用系统字体
|
||
pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
|
||
c.setFont('SimSun', 12)
|
||
except:
|
||
# 回退到默认字体
|
||
c.setFont('Helvetica', 12)
|
||
|
||
y_position = 750
|
||
|
||
if text:
|
||
# 单个文本块
|
||
for line in text.split('\n'):
|
||
c.drawString(100, y_position, line)
|
||
y_position -= 20
|
||
|
||
if lines:
|
||
# 多行文本
|
||
for line in lines:
|
||
c.drawString(100, y_position, line)
|
||
y_position -= 20
|
||
|
||
c.save()
|
||
return str(file_path)
|
||
|
||
return _create_pdf
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_html(tmp_path):
|
||
"""创建临时 HTML 文件的 fixture 工厂。
|
||
|
||
Args:
|
||
content: HTML 内容字符串
|
||
encoding: 文件编码,默认 'utf-8'
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_html(content="<html><body><p>Test</p></body></html>", encoding='utf-8'):
|
||
file_path = tmp_path / "test.html"
|
||
|
||
# 如果内容不包含完整的 HTML 结构,添加基本结构
|
||
if not content.strip().startswith('<html'):
|
||
content = f"<html><head><meta charset='{encoding}'></head><body>{content}</body></html>"
|
||
|
||
with open(file_path, 'w', encoding=encoding) as f:
|
||
f.write(content)
|
||
|
||
return str(file_path)
|
||
|
||
return _create_html
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_pptx(tmp_path):
|
||
"""创建临时 PPTX 文件的 fixture 工厂。
|
||
|
||
Args:
|
||
slides: 幻灯片内容列表,每个元素为 (title, content) 元组
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_pptx(slides=None):
|
||
try:
|
||
from pptx import Presentation
|
||
except ImportError:
|
||
pytest.skip("python-pptx 未安装")
|
||
|
||
prs = Presentation()
|
||
|
||
if slides:
|
||
for title, content in slides:
|
||
slide = prs.slides.add_slide(prs.slide_layouts[1]) # Title and Content layout
|
||
slide.shapes.title.text = title
|
||
if content:
|
||
text_frame = slide.shapes.placeholders[1].text_frame
|
||
text_frame.text = content
|
||
|
||
file_path = tmp_path / "test.pptx"
|
||
prs.save(str(file_path))
|
||
return str(file_path)
|
||
|
||
return _create_pptx
|
||
|
||
|
||
@pytest.fixture
|
||
def temp_xlsx(tmp_path):
|
||
"""创建临时 XLSX 文件的 fixture 工厂。
|
||
|
||
Args:
|
||
data: 表格数据,格式为 [[cell1, cell2], [cell3, cell4]]
|
||
|
||
Returns:
|
||
str: 临时文件路径
|
||
"""
|
||
def _create_xlsx(data=None):
|
||
try:
|
||
import pandas as pd
|
||
except ImportError:
|
||
pytest.skip("pandas 未安装")
|
||
|
||
file_path = tmp_path / "test.xlsx"
|
||
|
||
if data:
|
||
df = pd.DataFrame(data)
|
||
df.to_excel(str(file_path), index=False, header=False)
|
||
else:
|
||
# 创建空的 Excel 文件
|
||
df = pd.DataFrame()
|
||
df.to_excel(str(file_path), index=False)
|
||
|
||
return str(file_path)
|
||
|
||
return _create_xlsx
|