refactor: 将核心代码迁移到 scripts 目录

- 创建 scripts/ 目录作为核心代码根目录
- 移动 core/, readers/, utils/ 到 scripts/ 下
- 移动 config.py, lyxy_document_reader.py 到 scripts/
- 移动 encoding_detection.py 到 scripts/utils/
- 更新 pyproject.toml 中的入口点路径和 pytest 配置
- 更新所有内部导入语句为 scripts.* 模块
- 更新 README.md 目录结构说明
- 更新 openspec/config.yaml 添加目录结构说明
- 删除无用的 main.py

此变更使项目结构更清晰,便于区分核心代码与测试、文档等支撑文件。
This commit is contained in:
2026-03-08 17:41:03 +08:00
parent 750ef50a8d
commit 15b63800a8
50 changed files with 66 additions and 60 deletions

View File

@@ -108,21 +108,26 @@ else:
``` ```
lyxy-document/ lyxy-document/
├── lyxy_document_reader.py # 统一 CLI 入口 ├── scripts/ # 核心代码目录
├── core/ # 核心模块 │ ├── lyxy_document_reader.py # 统一 CLI 入口
│ ├── exceptions.py # 自定义异常体系 │ ├── config.py # 统一配置类
│ ├── markdown.py # Markdown 工具函数 │ ├── core/ # 核心模块
└── parser.py # 统一解析调度器 │ ├── exceptions.py # 自定义异常体系
├── readers/ # 格式阅读器 ├── markdown.py # Markdown 工具函数
├── base.py # Reader 基类 │ └── parser.py # 统一解析调度器
│ ├── docx/ # DOCX 阅读器 │ ├── readers/ # 格式阅读器
│ ├── xlsx/ # XLSX 阅读器 │ ├── base.py # Reader 基类
│ ├── pptx/ # PPTX 阅读器 │ ├── docx/ # DOCX 阅读器
│ ├── pdf/ # PDF 阅读器 │ ├── xlsx/ # XLSX 阅读器
└── html/ # HTML/URL 阅读器 │ ├── pptx/ # PPTX 阅读器
├── utils/ # 工具函数 ├── pdf/ # PDF 阅读器
│ └── file_detection.py # 文件类型检测 │ └── html/ # HTML/URL 阅读器
└── tests/ # 测试 │ └── utils/ # 工具函数
│ ├── file_detection.py # 文件类型检测
│ └── encoding_detection.py # 编码检测
├── tests/ # 测试
├── openspec/ # 规范文档
└── README.md # 项目文档
``` ```
## 解析器优先级 ## 解析器优先级

View File

@@ -1,6 +0,0 @@
def main():
print("Hello from lyxy-document!")
if __name__ == "__main__":
main()

View File

@@ -5,7 +5,6 @@ context: |
- 语言: 仅中文(交流/注释/文档/代码) - 语言: 仅中文(交流/注释/文档/代码)
- Python: 始终用uv运行(脚本/临时命令uv run python -c); 禁用主机python/禁主机安装包 - Python: 始终用uv运行(脚本/临时命令uv run python -c); 禁用主机python/禁主机安装包
- 依赖: pyproject.toml声明,使用uv安装 - 依赖: pyproject.toml声明,使用uv安装
- 临时文件: 统一放temp目录
- 主机环境: 禁止污染配置,需操作须请求用户 - 主机环境: 禁止污染配置,需操作须请求用户
- 文档: README.md,每次迭代按需更新用户文档和开发文档; 禁emoji/特殊字符 - 文档: README.md,每次迭代按需更新用户文档和开发文档; 禁emoji/特殊字符
- 测试: 所有需求必须设计全面测试 - 测试: 所有需求必须设计全面测试
@@ -13,3 +12,11 @@ context: |
- 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文 - 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文
- 项目阶段: 未上线,无用户,破坏性变更无需迁移说明 - 项目阶段: 未上线,无用户,破坏性变更无需迁移说明
- Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明 - Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明
# 项目目录结构
- scripts/: 核心代码目录
- tests/: 测试目录
- openspec/: 规范文档目录
- temp/: 开发临时文件目录
- pyproject.toml: 项目配置
- README.md: 项目文档

View File

@@ -69,7 +69,7 @@ dev = [
] ]
[project.scripts] [project.scripts]
lyxy-document-reader = "lyxy_document_reader:main" lyxy-document-reader = "scripts.lyxy_document_reader:main"
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
@@ -91,4 +91,4 @@ disallow_untyped_defs = true
[tool.pytest.ini_options] [tool.pytest.ini_options]
testpaths = ["tests"] testpaths = ["tests"]
pythonpath = ["."] pythonpath = ["scripts", "."]

View File

@@ -4,12 +4,12 @@ import argparse
import sys import sys
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from core.exceptions import FileDetectionError, ReaderNotFoundError from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError
from core.markdown import ( from scripts.core.markdown import (
normalize_markdown_whitespace, normalize_markdown_whitespace,
remove_markdown_images, remove_markdown_images,
) )
from readers import BaseReader from scripts.readers import BaseReader
def parse_input( def parse_input(

View File

@@ -20,14 +20,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')
logging.getLogger('docling').setLevel(logging.ERROR) logging.getLogger('docling').setLevel(logging.ERROR)
logging.getLogger('unstructured').setLevel(logging.ERROR) logging.getLogger('unstructured').setLevel(logging.ERROR)
from core import ( from scripts.core import (
FileDetectionError, FileDetectionError,
ReaderNotFoundError, ReaderNotFoundError,
output_result, output_result,
parse_input, parse_input,
process_content, process_content,
) )
from readers import READERS from scripts.readers import READERS
def main() -> None: def main() -> None:

View File

@@ -3,8 +3,8 @@
import os import os
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from readers.base import BaseReader from scripts.readers.base import BaseReader
from utils import is_valid_docx from scripts.utils import is_valid_docx
from . import docling from . import docling
from . import unstructured from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_docling from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_markitdown from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
import zipfile import zipfile
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
from core import build_markdown_table, safe_open_zip from scripts.core import build_markdown_table, safe_open_zip
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Any, List, Optional, Tuple from typing import Any, List, Optional, Tuple
from core import build_markdown_table from scripts.core import build_markdown_table
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,9 +3,9 @@
import os import os
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from readers.base import BaseReader from scripts.readers.base import BaseReader
from utils import is_url from scripts.utils import is_url
import encoding_detection from scripts.utils import encoding_detection
from . import cleaner from . import cleaner
from . import downloader from . import downloader

View File

@@ -3,8 +3,8 @@
import os import os
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from readers.base import BaseReader from scripts.readers.base import BaseReader
from utils import is_valid_pdf from scripts.utils import is_valid_pdf
from . import docling_ocr from . import docling_ocr
from . import unstructured_ocr from . import unstructured_ocr

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_markitdown from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,8 +3,8 @@
import os import os
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from readers.base import BaseReader from scripts.readers.base import BaseReader
from utils import is_valid_pptx from scripts.utils import is_valid_pptx
from . import docling from . import docling
from . import unstructured from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_docling from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_markitdown from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
import zipfile import zipfile
from typing import Any, List, Optional, Tuple from typing import Any, List, Optional, Tuple
from core import build_markdown_table, flush_list_stack from scripts.core import build_markdown_table, flush_list_stack
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Any, List, Optional, Tuple from typing import Any, List, Optional, Tuple
from core import build_markdown_table, flush_list_stack from scripts.core import build_markdown_table, flush_list_stack
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -3,8 +3,8 @@
import os import os
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from readers.base import BaseReader from scripts.readers.base import BaseReader
from utils import is_valid_xlsx from scripts.utils import is_valid_xlsx
from . import docling from . import docling
from . import unstructured from . import unstructured

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_docling from scripts.core import parse_with_docling
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import parse_with_markitdown from scripts.core import parse_with_markitdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
import zipfile import zipfile
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from core import build_markdown_table, safe_open_zip from scripts.core import build_markdown_table, safe_open_zip
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from core import _unstructured_elements_to_markdown from scripts.core import _unstructured_elements_to_markdown
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -2,7 +2,7 @@
from typing import Optional, Tuple from typing import Optional, Tuple
from config import Config from scripts.config import Config
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]: def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:

View File

@@ -1,6 +1,6 @@
"""测试 Markdown 工具函数。""" """测试 Markdown 工具函数。"""
from core import ( from scripts.core import (
get_heading_level, get_heading_level,
extract_titles, extract_titles,
normalize_markdown_whitespace, normalize_markdown_whitespace,

View File

@@ -1,6 +1,6 @@
"""测试文件检测工具函数。""" """测试文件检测工具函数。"""
from utils import is_url, is_html_file from scripts.utils import is_url, is_html_file
class TestIsUrl: class TestIsUrl: