- 测试数量从 83 个增加到 193 个 (+132%) - 代码覆盖率从 48% 提升到 69% (+44%) - 为每种文档格式的所有 Reader 实现创建独立测试 - 添加跨 Reader 的一致性验证测试 - 新增 4 个测试规范 (cli-testing, exception-testing, reader-testing, test-fixtures) - 更新 README 测试统计信息 测试覆盖: - DOCX: python-docx, markitdown, docling, native-xml, pypandoc, unstructured - PDF: pypdf, markitdown, docling, docling-ocr, unstructured, unstructured-ocr - HTML: html2text, markitdown, trafilatura, domscribe - PPTX: python-pptx, markitdown, docling, native-xml, unstructured - XLSX: pandas, markitdown, docling, native-xml, unstructured - CLI: 所有命令行选项和错误处理 所有 193 个测试通过。
68 lines
1.3 KiB
TOML
68 lines
1.3 KiB
TOML
[project]
|
|
name = "lyxy-document"
|
|
version = "0.1.0"
|
|
description = "帮助AI工具读取转换文档到markdown的skill"
|
|
readme = "README.md"
|
|
requires-python = ">=3.11"
|
|
dependencies = [
|
|
"chardet>=5.0.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
docx = [
|
|
"docling>=2.0.0",
|
|
"unstructured>=0.12.0",
|
|
"markitdown>=0.1.0",
|
|
"pypandoc-binary>=1.13.0",
|
|
"python-docx>=1.1.0",
|
|
"markdownify>=0.12.0",
|
|
]
|
|
xlsx = [
|
|
"docling>=2.0.0",
|
|
"unstructured>=0.12.0",
|
|
"markitdown>=0.1.0",
|
|
"pandas>=2.0.0",
|
|
"tabulate>=0.9.0",
|
|
]
|
|
pptx = [
|
|
"docling>=2.0.0",
|
|
"unstructured>=0.12.0",
|
|
"markitdown>=0.1.0",
|
|
"python-pptx>=0.6.0",
|
|
"markdownify>=0.12.0",
|
|
]
|
|
pdf = [
|
|
"docling>=2.0.0",
|
|
"unstructured>=0.12.0",
|
|
"unstructured-paddleocr>=0.1.0",
|
|
"markitdown>=0.1.0",
|
|
"pypdf>=4.0.0",
|
|
"markdownify>=0.12.0",
|
|
]
|
|
html = [
|
|
"trafilatura>=1.10.0",
|
|
"domscribe>=0.1.0",
|
|
"markitdown>=0.1.0",
|
|
"html2text>=2024.2.26",
|
|
"beautifulsoup4>=4.12.0",
|
|
]
|
|
http = [
|
|
"httpx>=0.27.0",
|
|
"pyppeteer>=2.0.0",
|
|
"selenium>=4.18.0",
|
|
]
|
|
office = [
|
|
"lyxy-document[docx,xlsx,pptx,pdf]",
|
|
]
|
|
web = [
|
|
"lyxy-document[html,http]",
|
|
]
|
|
full = [
|
|
"lyxy-document[office,web]",
|
|
]
|
|
dev = [
|
|
"pytest>=8.0.0",
|
|
"pytest-cov>=4.1.0",
|
|
"reportlab>=4.0.0",
|
|
]
|