[project] name = "lyxy-document" version = "0.1.0" description = "帮助AI工具读取转换文档到markdown的skill" readme = "README.md" requires-python = ">=3.11" dependencies = [ "chardet>=5.0.0", ] [project.optional-dependencies] # 平台特定的 DOCX 解析 extras docx-win = [ "docling>=2.0.0", "unstructured[docx]>=0.12.0", "markitdown[docx]>=0.1.0", "pypandoc-binary>=1.13.0", "python-docx>=1.1.0", "markdownify>=0.12.0", ] docx-unix = [ "docling>=2.0.0", "unstructured[docx]>=0.12.0", "markitdown[docx]>=0.1.0", "pypandoc-binary>=1.13.0", "python-docx>=1.1.0", "markdownify>=0.12.0", ] # 平台特定的 XLSX 解析 extras xlsx-win = [ "docling>=2.0.0", "unstructured[xlsx]>=0.12.0", "markitdown[xlsx]>=0.1.0", "pandas>=2.0.0", "tabulate>=0.9.0", ] xlsx-unix = [ "docling>=2.0.0", "unstructured[xlsx]>=0.12.0", "markitdown[xlsx]>=0.1.0", "pandas>=2.0.0", "tabulate>=0.9.0", ] # 平台特定的 PPTX 解析 extras pptx-win = [ "docling>=2.0.0", "unstructured[pptx]>=0.12.0", "markitdown[pptx]>=0.1.0", "python-pptx>=0.6.0", "markdownify>=0.12.0", ] pptx-unix = [ "docling>=2.0.0", "unstructured[pptx]>=0.12.0", "markitdown[pptx]>=0.1.0", "python-pptx>=0.6.0", "markdownify>=0.12.0", ] # 平台特定的 PDF 解析 extras pdf-win = [ "docling>=2.0.0", "unstructured[pdf]>=0.12.0", "unstructured-paddleocr>=0.1.0", "paddlepaddle==2.6.2", "ml-dtypes>=0.3.0", "markitdown[pdf]>=0.1.0", "pypdf>=4.0.0", "markdownify>=0.12.0", ] pdf-macos-intel = [ "docling==2.40.0", "docling-parse==4.0.0", "markitdown[pdf]>=0.1.0", "pypdf>=4.0.0", "markdownify>=0.12.0", ] pdf-macos-arm = [ "docling>=2.0.0", "unstructured[pdf]>=0.12.0", "markitdown[pdf]>=0.1.0", "pypdf>=4.0.0", "markdownify>=0.12.0", ] pdf-linux = [ "docling>=2.0.0", "unstructured[pdf]>=0.12.0", "markitdown[pdf]>=0.1.0", "pypdf>=4.0.0", "markdownify>=0.12.0", ] # 平台特定的 Office 文档组合 extras office-win = [ "lyxy-document[docx-win,xlsx-win,pptx-win,pdf-win]", ] office-macos-intel = [ "lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-intel]", ] office-macos-arm = [ "lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-macos-arm]", ] office-linux = [ "lyxy-document[docx-unix,xlsx-unix,pptx-unix,pdf-linux]", ] # 其他 extras(非平台特定) html = [ "trafilatura>=1.10.0", "domscribe>=0.1.0", "markitdown>=0.1.0", "html2text>=2024.2.26", "beautifulsoup4>=4.12.0", ] http = [ "httpx>=0.27.0", "pyppeteer>=2.0.0", "selenium>=4.18.0", ] web = [ "lyxy-document[html,http]", ] full = [ "lyxy-document[office-macos-arm,web]", ] dev = [ "pytest>=8.0.0", "pytest-cov>=4.1.0", "reportlab>=4.0.0", ]