feat: 添加多平台依赖支持

为不同平台提供特定的依赖 extras,解决 macOS x86_64 的依赖兼容性问题。

- 添加平台特定的 PDF 解析 extras:pdf-win, pdf-macos-intel, pdf-macos-arm, pdf-linux
- 添加平台特定的 Office 文档 extras:office-win, office-macos-intel, office-macos-arm, office-linux
- macOS x86_64 使用硬编码版本:docling==2.40.0, docling-parse==4.0.0
- 移除通用的 pdf 和 office extras,强制用户选择平台
- 更新 SKILL.md 添加详细的多平台依赖安装指南
- 更新 README.md 添加平台特定安装说明
- 在 .gitignore 中添加 uv.lock
- 删除现有的 uv.lock 文件
- 创建 multi-platform-dependencies 规范文档
This commit is contained in:
2026-03-09 10:49:53 +08:00
parent b2fb418a06
commit dfe6904f4c
7 changed files with 264 additions and 4875 deletions

View File

@@ -9,36 +9,104 @@ dependencies = [
]
[project.optional-dependencies]
docx = [
# 平台特定的 DOCX 解析 extras
docx-win = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"unstructured[docx]>=0.12.0",
"markitdown[docx]>=0.1.0",
"pypandoc-binary>=1.13.0",
"python-docx>=1.1.0",
"markdownify>=0.12.0",
]
xlsx = [
docx-unix = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"unstructured[docx]>=0.12.0",
"markitdown[docx]>=0.1.0",
"pypandoc-binary>=1.13.0",
"python-docx>=1.1.0",
"markdownify>=0.12.0",
]
# 平台特定的 XLSX 解析 extras
xlsx-win = [
"docling>=2.0.0",
"unstructured[xlsx]>=0.12.0",
"markitdown[xlsx]>=0.1.0",
"pandas>=2.0.0",
"tabulate>=0.9.0",
]
pptx = [
xlsx-unix = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"markitdown>=0.1.0",
"unstructured[xlsx]>=0.12.0",
"markitdown[xlsx]>=0.1.0",
"pandas>=2.0.0",
"tabulate>=0.9.0",
]
# 平台特定的 PPTX 解析 extras
pptx-win = [
"docling>=2.0.0",
"unstructured[pptx]>=0.12.0",
"markitdown[pptx]>=0.1.0",
"python-pptx>=0.6.0",
"markdownify>=0.12.0",
]
pdf = [
pptx-unix = [
"docling>=2.0.0",
"unstructured>=0.12.0",
"unstructured[pptx]>=0.12.0",
"markitdown[pptx]>=0.1.0",
"python-pptx>=0.6.0",
"markdownify>=0.12.0",
]
# 平台特定的 PDF 解析 extras
pdf-win = [
"docling>=2.0.0",
"unstructured[pdf]>=0.12.0",
"unstructured-paddleocr>=0.1.0",
"markitdown>=0.1.0",
"paddlepaddle==2.6.2",
"ml-dtypes>=0.3.0",
"markitdown[pdf]>=0.1.0",
"pypdf>=4.0.0",
"markdownify>=0.12.0",
]
pdf-macos-intel = [
"docling==2.40.0",
"docling-parse==4.0.0",
"markitdown[pdf]>=0.1.0",
"pypdf>=4.0.0",
"markdownify>=0.12.0",
]
pdf-macos-arm = [
"docling>=2.0.0",
"unstructured[pdf]>=0.12.0",
"markitdown[pdf]>=0.1.0",
"pypdf>=4.0.0",
"markdownify>=0.12.0",
]
pdf-linux = [
"docling>=2.0.0",
"unstructured[pdf]>=0.12.0",
"markitdown[pdf]>=0.1.0",
"pypdf>=4.0.0",
"markdownify>=0.12.0",
]
# 平台特定的 Office 文档组合 extras
office-win = [
"lyxy-document[docx-win,xlsx-win,pptx-win,pdf-win]",
]
office-macos-intel = [
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-intel]",
]
office-macos-arm = [
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-arm]",
]
office-linux = [
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-linux]",
]
# 其他 extras非平台特定
html = [
"trafilatura>=1.10.0",
"domscribe>=0.1.0",
@@ -51,14 +119,11 @@ http = [
"pyppeteer>=2.0.0",
"selenium>=4.18.0",
]
office = [
"lyxy-document[docx,xlsx,pptx,pdf]",
]
web = [
"lyxy-document[html,http]",
]
full = [
"lyxy-document[office,web]",
"lyxy-document[office-macos-arm,web]",
]
dev = [
"pytest>=8.0.0",