Files
lyxy-document/scripts/config.py
lanyuanxiaoyao 65c746c639 refactor: 移除 doc 和 ppt reader 支持
移除对旧版 .doc 和 .ppt 格式的支持,以简化项目架构和减少维护负担。

变更内容:
- 删除 scripts/readers/doc/ 目录
- 删除 scripts/readers/ppt/ 目录
- 从 readers/__init__.py 中移除 DocReader 和 PptReader
- 从 utils/file_detection.py 中移除 is_valid_doc 和 is_valid_ppt
- 从 config.py 中移除 doc 和 ppt 依赖配置
- 从 advice_generator.py 中移除相关映射
- 更新 CLI 帮助文档
- 更新 README.md 文档
- 删除相关测试用例
- 删除相关规范文档
2026-03-11 00:55:15 +08:00

115 lines
2.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""统一配置类,集中管理所有配置项。"""
class Config:
"""统一配置类"""
# 编码检测
# 回退编码列表,当 chardet 检测失败时依次尝试
FALLBACK_ENCODINGS = ['utf-8', 'gbk', 'gb2312', 'latin-1']
# HTML 下载
# 下载超时时间(秒)
DOWNLOAD_TIMEOUT = 30
# HTTP User-Agent 标识
USER_AGENT = "lyxy-document/0.1.0"
# 日志
# 日志等级,默认只输出 ERROR 级别避免干扰 Markdown 输出
LOG_LEVEL = "ERROR"
# 依赖配置:按文件类型和平台组织
# 每个平台配置包含 python 版本要求None 表示使用默认)和依赖列表
DEPENDENCIES = {
"pdf": {
"default": {
"python": None,
"dependencies": [
"docling",
"unstructured[pdf]",
"markitdown[pdf]",
"pypdf",
"markdownify"
]
},
"Darwin-x86_64": {
"python": "3.12",
"dependencies": [
"docling==2.40.0",
"docling-parse==4.0.0",
"numpy<2",
"markitdown[pdf]",
"pypdf",
"markdownify"
]
}
},
"docx": {
"default": {
"python": None,
"dependencies": [
"docling",
"unstructured[docx]",
"markitdown[docx]",
"pypandoc-binary",
"python-docx",
"markdownify"
]
}
},
"xlsx": {
"default": {
"python": None,
"dependencies": [
"docling",
"unstructured[xlsx]",
"markitdown[xlsx]",
"pandas",
"tabulate"
]
}
},
"pptx": {
"default": {
"python": None,
"dependencies": [
"docling",
"unstructured[pptx]",
"markitdown[pptx]",
"python-pptx",
"markdownify"
]
}
},
"html": {
"default": {
"python": None,
"dependencies": [
"trafilatura",
"domscribe",
"markitdown",
"html2text",
"beautifulsoup4",
"httpx",
"chardet",
"pyppeteer",
"selenium"
]
}
},
"xls": {
"default": {
"python": None,
"dependencies": [
"unstructured[xlsx]",
"markitdown[xls]",
"pandas",
"tabulate",
"xlrd",
"olefile"
]
}
}
}