"""统一配置类,集中管理所有配置项。""" class Config: """统一配置类""" # 编码检测 # 回退编码列表,当 chardet 检测失败时依次尝试 FALLBACK_ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin-1'] # HTML 下载 # 下载超时时间(秒) DOWNLOAD_TIMEOUT = 30 # HTTP User-Agent 标识 USER_AGENT = "lyxy-document/0.1.0" # 日志 # 日志等级,默认只输出 ERROR 级别避免干扰 Markdown 输出 LOG_LEVEL = "ERROR" # 依赖配置:按文件类型和平台组织 # 每个平台配置包含 python 版本要求(None 表示使用默认)和依赖列表 DEPENDENCIES = { "pdf": { "default": { "python": None, "dependencies": [ "docling==2.80.0", "unstructured[pdf]==0.21.5", "markitdown[pdf]==0.1.5", "pypdf==6.9.0", "markdownify==1.2.2" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pdf]==0.1.5", "pypdf==6.9.0", "markdownify==1.2.2" ] } }, "docx": { "default": { "python": None, "dependencies": [ "docling==2.80.0", "unstructured[docx]==0.21.5", "markitdown[docx]==0.1.5", "pypandoc-binary==1.17", "python-docx==1.2.0", "markdownify==1.2.2" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[docx]==0.1.5", "pypandoc-binary==1.17", "python-docx==1.2.0", "markdownify==1.2.2" ] } }, "xlsx": { "default": { "python": None, "dependencies": [ "docling==2.80.0", "unstructured[xlsx]==0.21.5", "markitdown[xlsx]==0.1.5", "pandas==3.0.1", "tabulate==0.10.0", "openpyxl==3.1.5" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[xlsx]==0.1.5", "pandas<3.0.0", "tabulate==0.10.0", "openpyxl==3.1.5" ] } }, "pptx": { "default": { "python": None, "dependencies": [ "docling==2.80.0", "unstructured[pptx]==0.21.5", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==1.2.2" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==1.2.2" ] } }, "html": { "default": { "python": None, "dependencies": [ "trafilatura==2.0.0", "domscribe==0.1.3", "markitdown==0.1.5", "html2text==2025.4.15", "beautifulsoup4==4.14.3", "httpx==0.28.1", "chardet==7.1.0", "pyppeteer==2.0.0", "selenium==4.25.0" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "trafilatura==2.0.0", "domscribe==0.1.3", "markitdown==0.1.5", "html2text==2025.4.15", "beautifulsoup4==4.14.3", "httpx==0.28.1", "chardet==7.1.0", "pyppeteer==2.0.0", "selenium==4.25.0" ] } }, "xls": { "default": { "python": None, "dependencies": [ "unstructured[xlsx]==0.21.5", "markitdown[xls]==0.1.5", "pandas==3.0.1", "tabulate==0.10.0", "xlrd==2.0.2", "olefile==0.47" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "markitdown[xls]==0.1.5", "pandas<3.0.0", "tabulate==0.10.0", "xlrd==2.0.2", "olefile==0.47", "openpyxl==3.1.5" ] } }, "doc": { "default": { "python": None, "dependencies": [] } }, "ppt": { "default": { "python": None, "dependencies": [ "docling==2.80.0", "unstructured[pptx]==0.21.5", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==1.2.2", "olefile==0.47" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==1.2.2", "olefile==0.47" ] } } }