Files
lyxy-document/scripts/config.py
lanyuanxiaoyao 5cc347589b refactor: 重新梳理 DEPENDENCIES 版本和 python 版本
- default.python 全部改为 None(使用默认 python)
- 所有依赖都指定版本号(截止 2026-03-17 最新版)
- 为 unstructured[...]、domscribe 等未指定版本的依赖添加版本
- 更新 markdownify、pypandoc-binary、tabulate、trafilatura、html2text、chardet、xlrd 等依赖版本
- html 的 selenium 降级到 4.25.0 解决 urllib3 冲突
- 为 pdf/docx/xlsx/pptx/html/xls/ppt 添加 Darwin-x86_64 配置(python 3.12 + docling 2.40.0 + docling-parse 4.0.0 + numpy<2)
- 更新测试期望 python_ver 为 None
2026-03-17 13:15:00 +08:00

207 lines
5.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""统一配置类,集中管理所有配置项。"""
class Config:
"""统一配置类"""
# 编码检测
# 回退编码列表,当 chardet 检测失败时依次尝试
FALLBACK_ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin-1']
# HTML 下载
# 下载超时时间(秒)
DOWNLOAD_TIMEOUT = 30
# HTTP User-Agent 标识
USER_AGENT = "lyxy-document/0.1.0"
# 日志
# 日志等级,默认只输出 ERROR 级别避免干扰 Markdown 输出
LOG_LEVEL = "ERROR"
# 依赖配置:按文件类型和平台组织
# 每个平台配置包含 python 版本要求None 表示使用默认)和依赖列表
DEPENDENCIES = {
"pdf": {
"default": {
"python": None,
"dependencies": [
"docling==2.80.0",
"unstructured[pdf]==0.21.5",
"markitdown[pdf]==0.1.5",
"pypdf==6.9.0",
"markdownify==1.2.2"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[pdf]==0.1.5",
"pypdf==6.9.0",
"markdownify==1.2.2"
]
}
},
"docx": {
"default": {
"python": None,
"dependencies": [
"docling==2.80.0",
"unstructured[docx]==0.21.5",
"markitdown[docx]==0.1.5",
"pypandoc-binary==1.17",
"python-docx==1.2.0",
"markdownify==1.2.2"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[docx]==0.1.5",
"pypandoc-binary==1.17",
"python-docx==1.2.0",
"markdownify==1.2.2"
]
}
},
"xlsx": {
"default": {
"python": None,
"dependencies": [
"docling==2.80.0",
"unstructured[xlsx]==0.21.5",
"markitdown[xlsx]==0.1.5",
"pandas==3.0.1",
"tabulate==0.10.0",
"openpyxl==3.1.5"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[xlsx]==0.1.5",
"pandas<3.0.0",
"tabulate==0.10.0",
"openpyxl==3.1.5"
]
}
},
"pptx": {
"default": {
"python": None,
"dependencies": [
"docling==2.80.0",
"unstructured[pptx]==0.21.5",
"markitdown[pptx]==0.1.5",
"python-pptx==1.0.2",
"markdownify==1.2.2"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[pptx]==0.1.5",
"python-pptx==1.0.2",
"markdownify==1.2.2"
]
}
},
"html": {
"default": {
"python": None,
"dependencies": [
"trafilatura==2.0.0",
"domscribe==0.1.3",
"markitdown==0.1.5",
"html2text==2025.4.15",
"beautifulsoup4==4.14.3",
"httpx==0.28.1",
"chardet==7.1.0",
"pyppeteer==2.0.0",
"selenium==4.25.0"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"trafilatura==2.0.0",
"domscribe==0.1.3",
"markitdown==0.1.5",
"html2text==2025.4.15",
"beautifulsoup4==4.14.3",
"httpx==0.28.1",
"chardet==7.1.0",
"pyppeteer==2.0.0",
"selenium==4.25.0"
]
}
},
"xls": {
"default": {
"python": None,
"dependencies": [
"unstructured[xlsx]==0.21.5",
"markitdown[xls]==0.1.5",
"pandas==3.0.1",
"tabulate==0.10.0",
"xlrd==2.0.2",
"olefile==0.47"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"markitdown[xls]==0.1.5",
"pandas<3.0.0",
"tabulate==0.10.0",
"xlrd==2.0.2",
"olefile==0.47",
"openpyxl==3.1.5"
]
}
},
"doc": {
"default": {
"python": None,
"dependencies": []
}
},
"ppt": {
"default": {
"python": None,
"dependencies": [
"docling==2.80.0",
"unstructured[pptx]==0.21.5",
"markitdown[pptx]==0.1.5",
"python-pptx==1.0.2",
"markdownify==1.2.2",
"olefile==0.47"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[pptx]==0.1.5",
"python-pptx==1.0.2",
"markdownify==1.2.2",
"olefile==0.47"
]
}
}
}