refactor: 将核心代码迁移到 scripts 目录

- 创建 scripts/ 目录作为核心代码根目录
- 移动 core/, readers/, utils/ 到 scripts/ 下
- 移动 config.py, lyxy_document_reader.py 到 scripts/
- 移动 encoding_detection.py 到 scripts/utils/
- 更新 pyproject.toml 中的入口点路径和 pytest 配置
- 更新所有内部导入语句为 scripts.* 模块
- 更新 README.md 目录结构说明
- 更新 openspec/config.yaml 添加目录结构说明
- 删除无用的 main.py

此变更使项目结构更清晰,便于区分核心代码与测试、文档等支撑文件。
This commit is contained in:
2026-03-08 17:41:03 +08:00
parent 750ef50a8d
commit 15b63800a8
50 changed files with 66 additions and 60 deletions

View File

@@ -108,21 +108,26 @@ else:
```
lyxy-document/
├── lyxy_document_reader.py # 统一 CLI 入口
├── core/ # 核心模块
│ ├── exceptions.py # 自定义异常体系
│ ├── markdown.py # Markdown 工具函数
└── parser.py # 统一解析调度器
├── readers/ # 格式阅读器
├── base.py # Reader 基类
│ ├── docx/ # DOCX 阅读器
│ ├── xlsx/ # XLSX 阅读器
│ ├── pptx/ # PPTX 阅读器
│ ├── pdf/ # PDF 阅读器
└── html/ # HTML/URL 阅读器
├── utils/ # 工具函数
│ └── file_detection.py # 文件类型检测
└── tests/ # 测试
├── scripts/ # 核心代码目录
│ ├── lyxy_document_reader.py # 统一 CLI 入口
│ ├── config.py # 统一配置类
│ ├── core/ # 核心模块
│ ├── exceptions.py # 自定义异常体系
├── markdown.py # Markdown 工具函数
│ └── parser.py # 统一解析调度器
│ ├── readers/ # 格式阅读器
│ ├── base.py # Reader 基类
│ ├── docx/ # DOCX 阅读器
│ ├── xlsx/ # XLSX 阅读器
│ ├── pptx/ # PPTX 阅读器
├── pdf/ # PDF 阅读器
│ └── html/ # HTML/URL 阅读器
│ └── utils/ # 工具函数
│ ├── file_detection.py # 文件类型检测
│ └── encoding_detection.py # 编码检测
├── tests/ # 测试
├── openspec/ # 规范文档
└── README.md # 项目文档
```
## 解析器优先级

View File

@@ -1,6 +0,0 @@
def main():
print("Hello from lyxy-document!")
if __name__ == "__main__":
main()

View File

@@ -5,7 +5,6 @@ context: |
- 语言: 仅中文(交流/注释/文档/代码)
- Python: 始终用uv运行(脚本/临时命令uv run python -c); 禁用主机python/禁主机安装包
- 依赖: pyproject.toml声明,使用uv安装
- 临时文件: 统一放temp目录
- 主机环境: 禁止污染配置,需操作须请求用户
- 文档: README.md,每次迭代按需更新用户文档和开发文档; 禁emoji/特殊字符
- 测试: 所有需求必须设计全面测试
@@ -13,3 +12,11 @@ context: |
- 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文
- 项目阶段: 未上线,无用户,破坏性变更无需迁移说明
- Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明
# 项目目录结构
- scripts/: 核心代码目录
- tests/: 测试目录
- openspec/: 规范文档目录
- temp/: 开发临时文件目录
- pyproject.toml: 项目配置
- README.md: 项目文档

View File

@@ -69,7 +69,7 @@ dev = [
]
[project.scripts]
lyxy-document-reader = "lyxy_document_reader:main"
lyxy-document-reader = "scripts.lyxy_document_reader:main"
[build-system]
requires = ["hatchling"]
@@ -91,4 +91,4 @@ disallow_untyped_defs = true
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["."]
pythonpath = ["scripts", "."]

View File

@@ -4,12 +4,12 @@ import argparse
import sys
from typing import List, Optional, Tuple
from core.exceptions import FileDetectionError, ReaderNotFoundError
from core.markdown import (
from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError
from scripts.core.markdown import (
normalize_markdown_whitespace,
remove_markdown_images,
)
from readers import BaseReader
from scripts.readers import BaseReader
def parse_input(

View File

@@ -20,14 +20,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')
logging.getLogger('docling').setLevel(logging.ERROR)
logging.getLogger('unstructured').setLevel(logging.ERROR)
from core import (
from scripts.core import (
FileDetectionError,
ReaderNotFoundError,
output_result,
parse_input,
process_content,
)
from readers import READERS
from scripts.readers import READERS
def main() -> None:

View File

@@ -3,8 +3,8 @@
import os
from typing import List, Optional, Tuple
from readers.base import BaseReader
from utils import is_valid_docx
from scripts.readers.base import BaseReader
from scripts.utils import is_valid_docx
from . import docling
from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_docling
from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_markitdown
from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
import zipfile
from typing import Any, Dict, List, Optional, Tuple
from core import build_markdown_table, safe_open_zip
from scripts.core import build_markdown_table, safe_open_zip
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Any, List, Optional, Tuple
from core import build_markdown_table
from scripts.core import build_markdown_table
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown
from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,9 +3,9 @@
import os
from typing import List, Optional, Tuple
from readers.base import BaseReader
from utils import is_url
import encoding_detection
from scripts.readers.base import BaseReader
from scripts.utils import is_url
from scripts.utils import encoding_detection
from . import cleaner
from . import downloader

View File

@@ -3,8 +3,8 @@
import os
from typing import List, Optional, Tuple
from readers.base import BaseReader
from utils import is_valid_pdf
from scripts.readers.base import BaseReader
from scripts.utils import is_valid_pdf
from . import docling_ocr
from . import unstructured_ocr

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_markitdown
from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown
from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown
from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,8 +3,8 @@
import os
from typing import List, Optional, Tuple
from readers.base import BaseReader
from utils import is_valid_pptx
from scripts.readers.base import BaseReader
from scripts.utils import is_valid_pptx
from . import docling
from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_docling
from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_markitdown
from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
import zipfile
from typing import Any, List, Optional, Tuple
from core import build_markdown_table, flush_list_stack
from scripts.core import build_markdown_table, flush_list_stack
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Any, List, Optional, Tuple
from core import build_markdown_table, flush_list_stack
from scripts.core import build_markdown_table, flush_list_stack
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown
from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,8 +3,8 @@
import os
from typing import List, Optional, Tuple
from readers.base import BaseReader
from utils import is_valid_xlsx
from scripts.readers.base import BaseReader
from scripts.utils import is_valid_xlsx
from . import docling
from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_docling
from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import parse_with_markitdown
from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
import zipfile
from typing import List, Optional, Tuple
from core import build_markdown_table, safe_open_zip
from scripts.core import build_markdown_table, safe_open_zip
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown
from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple
from config import Config
from scripts.config import Config
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -1,6 +1,6 @@
"""测试 Markdown 工具函数。"""
from core import (
from scripts.core import (
get_heading_level,
extract_titles,
normalize_markdown_whitespace,

View File

@@ -1,6 +1,6 @@
"""测试文件检测工具函数。"""
from utils import is_url, is_html_file
from scripts.utils import is_url, is_html_file
class TestIsUrl: