lyxy-document/tests/conftest.py

"""测试配置和共享 fixtures。"""

import sys
from pathlib import Path

# 将 scripts/ 目录添加到 sys.path（必须在最顶部，在其他导入之前）
project_root = Path(__file__).resolve().parent.parent  # tests/ 的父目录是项目根目录
scripts_dir = project_root / "scripts"
if str(scripts_dir) not in sys.path:
    sys.path.insert(0, str(scripts_dir))

import pytest
from readers import READERS


@pytest.fixture
def all_readers():
    """返回所有 Reader 实例的列表。"""
    return [ReaderCls() for ReaderCls in READERS]


@pytest.fixture
def sample_markdown():
    """示例 Markdown 文本。"""
    return """# 标题

这是一段测试文本。

## 子标题

- 列表项 1
- 列表项 2

### 另一个标题

这是更多的文本。
"""


@pytest.fixture
def temp_docx(tmp_path):
    """创建临时 DOCX 文件的 fixture 工厂。

    Args:
        paragraphs: 段落文本列表
        headings: 标题列表，格式为 [(level, text), ...]
        table_data: 表格数据，格式为 [[cell1, cell2], [cell3, cell4]]
        list_items: 列表项列表

    Returns:
        str: 临时文件路径
    """
    def _create_docx(paragraphs=None, headings=None, table_data=None, list_items=None):
        try:
            from docx import Document
        except ImportError:
            pytest.skip("python-docx 未安装")

        doc = Document()

        # 添加标题
        if headings:
            for level, text in headings:
                doc.add_heading(text, level=level)

        # 添加段落
        if paragraphs:
            for para_text in paragraphs:
                doc.add_paragraph(para_text)

        # 添加表格
        if table_data:
            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
            for i, row_data in enumerate(table_data):
                for j, cell_text in enumerate(row_data):
                    table.rows[i].cells[j].text = str(cell_text)

        # 添加列表项
        if list_items:
            for item in list_items:
                doc.add_paragraph(item, style='List Bullet')

        file_path = tmp_path / "test.docx"
        doc.save(str(file_path))
        return str(file_path)

    return _create_docx


@pytest.fixture
def temp_pdf(tmp_path):
    """创建临时 PDF 文件的 fixture 工厂。

    Args:
        text: PDF 文本内容
        lines: 文本行列表

    Returns:
        str: 临时文件路径
    """
    def _create_pdf(text=None, lines=None):
        try:
            from reportlab.pdfgen import canvas
            from reportlab.lib.pagesizes import letter
            from reportlab.pdfbase import pdfmetrics
            from reportlab.pdfbase.ttfonts import TTFont
        except ImportError:
            pytest.skip("reportlab 未安装")

        file_path = tmp_path / "test.pdf"
        c = canvas.Canvas(str(file_path), pagesize=letter)

        # 尝试注册中文字体（如果可用）
        try:
            # 使用系统字体
            pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
            c.setFont('SimSun', 12)
        except:
            # 回退到默认字体
            c.setFont('Helvetica', 12)

        y_position = 750

        if text:
            # 单个文本块
            for line in text.split('\n'):
                c.drawString(100, y_position, line)
                y_position -= 20

        if lines:
            # 多行文本
            for line in lines:
                c.drawString(100, y_position, line)
                y_position -= 20

        c.save()
        return str(file_path)

    return _create_pdf


@pytest.fixture
def temp_html(tmp_path):
    """创建临时 HTML 文件的 fixture 工厂。

    Args:
        content: HTML 内容字符串
        encoding: 文件编码，默认 'utf-8'

    Returns:
        str: 临时文件路径
    """
    def _create_html(content="<html><body><p>Test</p></body></html>", encoding='utf-8'):
        file_path = tmp_path / "test.html"

        # 如果内容不包含完整的 HTML 结构，添加基本结构
        if not content.strip().startswith('<html'):
            content = f"<html><head><meta charset='{encoding}'></head><body>{content}</body></html>"

        with open(file_path, 'w', encoding=encoding) as f:
            f.write(content)

        return str(file_path)

    return _create_html


@pytest.fixture
def temp_pptx(tmp_path):
    """创建临时 PPTX 文件的 fixture 工厂。

    Args:
        slides: 幻灯片内容列表，每个元素为 (title, content) 元组

    Returns:
        str: 临时文件路径
    """
    def _create_pptx(slides=None):
        try:
            from pptx import Presentation
        except ImportError:
            pytest.skip("python-pptx 未安装")

        prs = Presentation()

        if slides:
            for title, content in slides:
                slide = prs.slides.add_slide(prs.slide_layouts[1])  # Title and Content layout
                slide.shapes.title.text = title
                if content:
                    text_frame = slide.shapes.placeholders[1].text_frame
                    text_frame.text = content

        file_path = tmp_path / "test.pptx"
        prs.save(str(file_path))
        return str(file_path)

    return _create_pptx


@pytest.fixture
def temp_xlsx(tmp_path):
    """创建临时 XLSX 文件的 fixture 工厂。

    Args:
        data: 表格数据，格式为 [[cell1, cell2], [cell3, cell4]]

    Returns:
        str: 临时文件路径
    """
    def _create_xlsx(data=None):
        try:
            import pandas as pd
        except ImportError:
            pytest.skip("pandas 未安装")

        file_path = tmp_path / "test.xlsx"

        if data:
            df = pd.DataFrame(data)
            df.to_excel(str(file_path), index=False, header=False)
        else:
            # 创建空的 Excel 文件
            df = pd.DataFrame()
            df.to_excel(str(file_path), index=False)

        return str(file_path)

    return _create_xlsx