"""统一配置类,集中管理所有配置项。""" class Config: """统一配置类""" # 编码检测 # 回退编码列表,当 chardet 检测失败时依次尝试 FALLBACK_ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin-1'] # HTML 下载 # 下载超时时间(秒) DOWNLOAD_TIMEOUT = 30 # HTTP User-Agent 标识 USER_AGENT = "lyxy-document/0.1.0" # 日志 # 日志等级,默认只输出 ERROR 级别避免干扰 Markdown 输出 LOG_LEVEL = "ERROR" # 依赖配置:按文件类型和平台组织 # 每个平台配置包含 python 版本要求(None 表示使用默认)和依赖列表 DEPENDENCIES = { "pdf": { "default": { "python": "3.12", "dependencies": [ "docling==2.80.0", "unstructured[pdf]", "markitdown[pdf]==0.1.5", "pypdf==6.9.0", "markdownify==0.13.1" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pdf]==0.1.5", "pypdf==6.9.0", "markdownify==0.13.1" ] } }, "docx": { "default": { "python": "3.12", "dependencies": [ "docling==2.80.0", "unstructured[docx]", "markitdown[docx]==0.1.5", "pypandoc-binary==1.13", "python-docx==1.2.0", "markdownify==0.13.1" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[docx]==0.1.5", "pypandoc-binary==1.13", "python-docx==1.2.0", "markdownify==0.13.1" ] } }, "xlsx": { "default": { "python": "3.12", "dependencies": [ "docling==2.80.0", "unstructured[xlsx]", "markitdown[xlsx]==0.1.5", "pandas==3.0.1", "tabulate==0.9.0", "openpyxl==3.1.5" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[xlsx]==0.1.5", "pandas<3.0.0", "tabulate==0.9.0", "openpyxl==3.1.5" ] } }, "pptx": { "default": { "python": "3.12", "dependencies": [ "docling==2.80.0", "unstructured[pptx]", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==0.13.1" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==0.13.1" ] } }, "html": { "default": { "python": "3.12", "dependencies": [ "trafilatura==1.12.2", "domscribe", "markitdown==0.1.5", "html2text==2024.2.26", "beautifulsoup4==4.14.3", "httpx==0.28.1", "chardet==5.2.0", "pyppeteer==2.0.0", "selenium==4.25.0" ] } }, "xls": { "default": { "python": "3.12", "dependencies": [ "unstructured[xlsx]", "markitdown[xls]==0.1.5", "pandas==3.0.1", "tabulate==0.9.0", "xlrd==2.0.1", "olefile==0.47" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "markitdown[xls]==0.1.5", "pandas<3.0.0", "tabulate==0.9.0", "xlrd==2.0.1", "olefile==0.47", "openpyxl==3.1.5" ] } }, "doc": { "default": { "python": "3.12", "dependencies": [] } }, "ppt": { "default": { "python": "3.12", "dependencies": [ "docling==2.80.0", "unstructured[pptx]", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==0.13.1", "olefile==0.47" ] }, "Darwin-x86_64": { "python": "3.12", "dependencies": [ "docling==2.40.0", "docling-parse==4.0.0", "numpy<2", "markitdown[pptx]==0.1.5", "python-pptx==1.0.2", "markdownify==0.13.1", "olefile==0.47" ] } } }