feat: 添加多平台依赖支持
为不同平台提供特定的依赖 extras,解决 macOS x86_64 的依赖兼容性问题。 - 添加平台特定的 PDF 解析 extras:pdf-win, pdf-macos-intel, pdf-macos-arm, pdf-linux - 添加平台特定的 Office 文档 extras:office-win, office-macos-intel, office-macos-arm, office-linux - macOS x86_64 使用硬编码版本:docling==2.40.0, docling-parse==4.0.0 - 移除通用的 pdf 和 office extras,强制用户选择平台 - 更新 SKILL.md 添加详细的多平台依赖安装指南 - 更新 README.md 添加平台特定安装说明 - 在 .gitignore 中添加 uv.lock - 删除现有的 uv.lock 文件 - 创建 multi-platform-dependencies 规范文档
This commit is contained in:
@@ -9,36 +9,104 @@ dependencies = [
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
docx = [
|
||||
# 平台特定的 DOCX 解析 extras
|
||||
docx-win = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured>=0.12.0",
|
||||
"markitdown>=0.1.0",
|
||||
"unstructured[docx]>=0.12.0",
|
||||
"markitdown[docx]>=0.1.0",
|
||||
"pypandoc-binary>=1.13.0",
|
||||
"python-docx>=1.1.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
xlsx = [
|
||||
docx-unix = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured>=0.12.0",
|
||||
"markitdown>=0.1.0",
|
||||
"unstructured[docx]>=0.12.0",
|
||||
"markitdown[docx]>=0.1.0",
|
||||
"pypandoc-binary>=1.13.0",
|
||||
"python-docx>=1.1.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
|
||||
# 平台特定的 XLSX 解析 extras
|
||||
xlsx-win = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured[xlsx]>=0.12.0",
|
||||
"markitdown[xlsx]>=0.1.0",
|
||||
"pandas>=2.0.0",
|
||||
"tabulate>=0.9.0",
|
||||
]
|
||||
pptx = [
|
||||
xlsx-unix = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured>=0.12.0",
|
||||
"markitdown>=0.1.0",
|
||||
"unstructured[xlsx]>=0.12.0",
|
||||
"markitdown[xlsx]>=0.1.0",
|
||||
"pandas>=2.0.0",
|
||||
"tabulate>=0.9.0",
|
||||
]
|
||||
|
||||
# 平台特定的 PPTX 解析 extras
|
||||
pptx-win = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured[pptx]>=0.12.0",
|
||||
"markitdown[pptx]>=0.1.0",
|
||||
"python-pptx>=0.6.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
pdf = [
|
||||
pptx-unix = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured>=0.12.0",
|
||||
"unstructured[pptx]>=0.12.0",
|
||||
"markitdown[pptx]>=0.1.0",
|
||||
"python-pptx>=0.6.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
|
||||
# 平台特定的 PDF 解析 extras
|
||||
pdf-win = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured[pdf]>=0.12.0",
|
||||
"unstructured-paddleocr>=0.1.0",
|
||||
"markitdown>=0.1.0",
|
||||
"paddlepaddle==2.6.2",
|
||||
"ml-dtypes>=0.3.0",
|
||||
"markitdown[pdf]>=0.1.0",
|
||||
"pypdf>=4.0.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
pdf-macos-intel = [
|
||||
"docling==2.40.0",
|
||||
"docling-parse==4.0.0",
|
||||
"markitdown[pdf]>=0.1.0",
|
||||
"pypdf>=4.0.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
pdf-macos-arm = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured[pdf]>=0.12.0",
|
||||
"markitdown[pdf]>=0.1.0",
|
||||
"pypdf>=4.0.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
pdf-linux = [
|
||||
"docling>=2.0.0",
|
||||
"unstructured[pdf]>=0.12.0",
|
||||
"markitdown[pdf]>=0.1.0",
|
||||
"pypdf>=4.0.0",
|
||||
"markdownify>=0.12.0",
|
||||
]
|
||||
|
||||
# 平台特定的 Office 文档组合 extras
|
||||
office-win = [
|
||||
"lyxy-document[docx-win,xlsx-win,pptx-win,pdf-win]",
|
||||
]
|
||||
office-macos-intel = [
|
||||
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-intel]",
|
||||
]
|
||||
office-macos-arm = [
|
||||
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-arm]",
|
||||
]
|
||||
office-linux = [
|
||||
"lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-linux]",
|
||||
]
|
||||
|
||||
# 其他 extras(非平台特定)
|
||||
html = [
|
||||
"trafilatura>=1.10.0",
|
||||
"domscribe>=0.1.0",
|
||||
@@ -51,14 +119,11 @@ http = [
|
||||
"pyppeteer>=2.0.0",
|
||||
"selenium>=4.18.0",
|
||||
]
|
||||
office = [
|
||||
"lyxy-document[docx,xlsx,pptx,pdf]",
|
||||
]
|
||||
web = [
|
||||
"lyxy-document[html,http]",
|
||||
]
|
||||
full = [
|
||||
"lyxy-document[office,web]",
|
||||
"lyxy-document[office-macos-arm,web]",
|
||||
]
|
||||
dev = [
|
||||
"pytest>=8.0.0",
|
||||
|
||||
Reference in New Issue
Block a user