Files
lyxy-document/pyproject.toml
lanyuanxiaoyao 7eab1dcef1 test: 添加全面的测试套件,覆盖所有 Reader 实现
- 测试数量从 83 个增加到 193 个 (+132%)
- 代码覆盖率从 48% 提升到 69% (+44%)
- 为每种文档格式的所有 Reader 实现创建独立测试
- 添加跨 Reader 的一致性验证测试
- 新增 4 个测试规范 (cli-testing, exception-testing, reader-testing, test-fixtures)
- 更新 README 测试统计信息

测试覆盖:
- DOCX: python-docx, markitdown, docling, native-xml, pypandoc, unstructured
- PDF: pypdf, markitdown, docling, docling-ocr, unstructured, unstructured-ocr
- HTML: html2text, markitdown, trafilatura, domscribe
- PPTX: python-pptx, markitdown, docling, native-xml, unstructured
- XLSX: pandas, markitdown, docling, native-xml, unstructured
- CLI: 所有命令行选项和错误处理

所有 193 个测试通过。
2026-03-08 22:20:21 +08:00

68 lines
1.3 KiB
TOML

[project]
name = "lyxy-document"
version = "0.1.0"
description = "帮助AI工具读取转换文档到markdown的skill"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"chardet>=5.0.0",
]
[project.optional-dependencies]
docx = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"pypandoc-binary>=1.13.0",
"python-docx>=1.1.0",
"markdownify>=0.12.0",
]
xlsx = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"pandas>=2.0.0",
"tabulate>=0.9.0",
]
pptx = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"python-pptx>=0.6.0",
"markdownify>=0.12.0",
]
pdf = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"unstructured-paddleocr>=0.1.0",
"markitdown>=0.1.0",
"pypdf>=4.0.0",
"markdownify>=0.12.0",
]
html = [
"trafilatura>=1.10.0",
"domscribe>=0.1.0",
"markitdown>=0.1.0",
"html2text>=2024.2.26",
"beautifulsoup4>=4.12.0",
]
http = [
"httpx>=0.27.0",
"pyppeteer>=2.0.0",
"selenium>=4.18.0",
]
office = [
"lyxy-document[docx,xlsx,pptx,pdf]",
]
web = [
"lyxy-document[html,http]",
]
full = [
"lyxy-document[office,web]",
]
dev = [
"pytest>=8.0.0",
"pytest-cov>=4.1.0",
"reportlab>=4.0.0",
]