feat: 新增 LibreOffice soffice DOCX 解析器

- 新增 scripts/readers/docx/libreoffice.py
- 在 MarkItDown 之后、python-docx 之前插入解析器
- 新增 tests/test_readers/test_docx/test_libreoffice.py
- 更新 openspec/specs/docx-reader/spec.md

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-15 22:04:39 +08:00
parent 3b2b368db2
commit 0dd7aa221c
4 changed files with 148 additions and 1 deletions

View File

@@ -10,6 +10,7 @@ from . import docling
from . import unstructured
from . import markitdown
from . import pypandoc
from . import libreoffice
from . import python_docx
from . import native_xml
@@ -19,6 +20,7 @@ PARSERS = [
("unstructured", unstructured.parse),
("pypandoc-binary", pypandoc.parse),
("MarkItDown", markitdown.parse),
("LibreOffice", libreoffice.parse),
("python-docx", python_docx.parse),
("XML 原生解析", native_xml.parse),
]