"""XLSX 文件阅读器,支持多种解析方法。""" from typing import List, Optional, Tuple from readers.base import BaseReader from utils import is_valid_xlsx from . import docling from . import unstructured from . import markitdown from . import pandas from . import native_xml PARSERS = [ ("docling", docling.parse), ("unstructured", unstructured.parse), ("MarkItDown", markitdown.parse), ("pandas", pandas.parse), ("XML 原生解析", native_xml.parse), ] class XlsxReader(BaseReader): """XLSX 文件阅读器""" @property def supported_extensions(self) -> List[str]: return [".xlsx"] def supports(self, file_path: str) -> bool: return is_valid_xlsx(file_path) def parse(self, file_path: str) -> Tuple[Optional[str], List[str]]: failures = [] content = None for parser_name, parser_func in PARSERS: content, error = parser_func(file_path) if content is not None: return content, failures else: failures.append(f"- {parser_name}: {error}") return None, failures