lyxy-document/tests/test_readers/test_xlsx/test_unstructured_xlsx.py

"""测试 Unstructured XLSX Reader 的解析功能。"""

import pytest
from scripts.readers.xlsx import unstructured


class TestUnstructuredXlsxReaderParse:
    """测试 Unstructured XLSX Reader 的 parse 方法。"""

    def test_normal_file(self, temp_xlsx):
        """测试正常 XLSX 文件解析。"""
        file_path = temp_xlsx(data=[["列1", "列2"], ["数据1", "数据2"], ["数据3", "数据4"]])
        content, error = unstructured.parse(file_path)
        if content is not None:
            assert "列1" in content or "列2" in content or "数据" in content

    def test_file_not_exists(self, tmp_path):
        """测试文件不存在的情况。"""
        non_existent_file = str(tmp_path / "non_existent.xlsx")
        content, error = unstructured.parse(non_existent_file)
        assert content is None
        assert error is not None

    def test_empty_file(self, temp_xlsx):
        """测试空 XLSX 文件。"""
        file_path = temp_xlsx()
        content, error = unstructured.parse(file_path)
        assert content is None or content.strip() == ""

    def test_corrupted_file(self, temp_xlsx, tmp_path):
        """测试损坏的 XLSX 文件。"""
        file_path = temp_xlsx(data=[["测试", "数据"]])
        with open(file_path, "wb") as f:
            f.write(b"corrupted content")
        content, error = unstructured.parse(file_path)
        assert content is None
        assert error is not None

    def test_special_chars(self, temp_xlsx):
        """测试特殊字符处理。"""
        file_path = temp_xlsx(data=[["中文", "😀"], ["©®", "特殊符号"]])
        content, error = unstructured.parse(file_path)
        if content is not None:
            assert "中文" in content or "😀" in content