refactor: 将核心代码迁移到 scripts 目录
- 创建 scripts/ 目录作为核心代码根目录 - 移动 core/, readers/, utils/ 到 scripts/ 下 - 移动 config.py, lyxy_document_reader.py 到 scripts/ - 移动 encoding_detection.py 到 scripts/utils/ - 更新 pyproject.toml 中的入口点路径和 pytest 配置 - 更新所有内部导入语句为 scripts.* 模块 - 更新 README.md 目录结构说明 - 更新 openspec/config.yaml 添加目录结构说明 - 删除无用的 main.py 此变更使项目结构更清晰,便于区分核心代码与测试、文档等支撑文件。
This commit is contained in:
35
README.md
35
README.md
@@ -108,21 +108,26 @@ else:
|
||||
|
||||
```
|
||||
lyxy-document/
|
||||
├── lyxy_document_reader.py # 统一 CLI 入口
|
||||
├── core/ # 核心模块
|
||||
│ ├── exceptions.py # 自定义异常体系
|
||||
│ ├── markdown.py # Markdown 工具函数
|
||||
│ └── parser.py # 统一解析调度器
|
||||
├── readers/ # 格式阅读器
|
||||
│ ├── base.py # Reader 基类
|
||||
│ ├── docx/ # DOCX 阅读器
|
||||
│ ├── xlsx/ # XLSX 阅读器
|
||||
│ ├── pptx/ # PPTX 阅读器
|
||||
│ ├── pdf/ # PDF 阅读器
|
||||
│ └── html/ # HTML/URL 阅读器
|
||||
├── utils/ # 工具函数
|
||||
│ └── file_detection.py # 文件类型检测
|
||||
└── tests/ # 测试
|
||||
├── scripts/ # 核心代码目录
|
||||
│ ├── lyxy_document_reader.py # 统一 CLI 入口
|
||||
│ ├── config.py # 统一配置类
|
||||
│ ├── core/ # 核心模块
|
||||
│ │ ├── exceptions.py # 自定义异常体系
|
||||
│ │ ├── markdown.py # Markdown 工具函数
|
||||
│ │ └── parser.py # 统一解析调度器
|
||||
│ ├── readers/ # 格式阅读器
|
||||
│ │ ├── base.py # Reader 基类
|
||||
│ │ ├── docx/ # DOCX 阅读器
|
||||
│ │ ├── xlsx/ # XLSX 阅读器
|
||||
│ │ ├── pptx/ # PPTX 阅读器
|
||||
│ │ ├── pdf/ # PDF 阅读器
|
||||
│ │ └── html/ # HTML/URL 阅读器
|
||||
│ └── utils/ # 工具函数
|
||||
│ ├── file_detection.py # 文件类型检测
|
||||
│ └── encoding_detection.py # 编码检测
|
||||
├── tests/ # 测试
|
||||
├── openspec/ # 规范文档
|
||||
└── README.md # 项目文档
|
||||
```
|
||||
|
||||
## 解析器优先级
|
||||
|
||||
6
main.py
6
main.py
@@ -1,6 +0,0 @@
|
||||
def main():
|
||||
print("Hello from lyxy-document!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -5,7 +5,6 @@ context: |
|
||||
- 语言: 仅中文(交流/注释/文档/代码)
|
||||
- Python: 始终用uv运行(脚本/临时命令uv run python -c); 禁用主机python/禁主机安装包
|
||||
- 依赖: pyproject.toml声明,使用uv安装
|
||||
- 临时文件: 统一放temp目录
|
||||
- 主机环境: 禁止污染配置,需操作须请求用户
|
||||
- 文档: README.md,每次迭代按需更新用户文档和开发文档; 禁emoji/特殊字符
|
||||
- 测试: 所有需求必须设计全面测试
|
||||
@@ -13,3 +12,11 @@ context: |
|
||||
- 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文
|
||||
- 项目阶段: 未上线,无用户,破坏性变更无需迁移说明
|
||||
- Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明
|
||||
|
||||
# 项目目录结构
|
||||
- scripts/: 核心代码目录
|
||||
- tests/: 测试目录
|
||||
- openspec/: 规范文档目录
|
||||
- temp/: 开发临时文件目录
|
||||
- pyproject.toml: 项目配置
|
||||
- README.md: 项目文档
|
||||
|
||||
@@ -69,7 +69,7 @@ dev = [
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
lyxy-document-reader = "lyxy_document_reader:main"
|
||||
lyxy-document-reader = "scripts.lyxy_document_reader:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
@@ -91,4 +91,4 @@ disallow_untyped_defs = true
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
pythonpath = ["."]
|
||||
pythonpath = ["scripts", "."]
|
||||
|
||||
@@ -4,12 +4,12 @@ import argparse
|
||||
import sys
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
from core.markdown import (
|
||||
from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
from scripts.core.markdown import (
|
||||
normalize_markdown_whitespace,
|
||||
remove_markdown_images,
|
||||
)
|
||||
from readers import BaseReader
|
||||
from scripts.readers import BaseReader
|
||||
|
||||
|
||||
def parse_input(
|
||||
@@ -20,14 +20,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')
|
||||
logging.getLogger('docling').setLevel(logging.ERROR)
|
||||
logging.getLogger('unstructured').setLevel(logging.ERROR)
|
||||
|
||||
from core import (
|
||||
from scripts.core import (
|
||||
FileDetectionError,
|
||||
ReaderNotFoundError,
|
||||
output_result,
|
||||
parse_input,
|
||||
process_content,
|
||||
)
|
||||
from readers import READERS
|
||||
from scripts.readers import READERS
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_docx
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_docx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_docling
|
||||
from scripts.core import parse_with_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_markitdown
|
||||
from scripts.core import parse_with_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from core import build_markdown_table, safe_open_zip
|
||||
from scripts.core import build_markdown_table, safe_open_zip
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from core import build_markdown_table
|
||||
from scripts.core import build_markdown_table
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import _unstructured_elements_to_markdown
|
||||
from scripts.core import _unstructured_elements_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -3,9 +3,9 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from readers.base import BaseReader
|
||||
from utils import is_url
|
||||
import encoding_detection
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_url
|
||||
from scripts.utils import encoding_detection
|
||||
|
||||
from . import cleaner
|
||||
from . import downloader
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_pdf
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_pdf
|
||||
|
||||
from . import docling_ocr
|
||||
from . import unstructured_ocr
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_markitdown
|
||||
from scripts.core import parse_with_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import _unstructured_elements_to_markdown
|
||||
from scripts.core import _unstructured_elements_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import _unstructured_elements_to_markdown
|
||||
from scripts.core import _unstructured_elements_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_pptx
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_pptx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_docling
|
||||
from scripts.core import parse_with_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_markitdown
|
||||
from scripts.core import parse_with_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from core import build_markdown_table, flush_list_stack
|
||||
from scripts.core import build_markdown_table, flush_list_stack
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from core import build_markdown_table, flush_list_stack
|
||||
from scripts.core import build_markdown_table, flush_list_stack
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import _unstructured_elements_to_markdown
|
||||
from scripts.core import _unstructured_elements_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_xlsx
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_xlsx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_docling
|
||||
from scripts.core import parse_with_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import parse_with_markitdown
|
||||
from scripts.core import parse_with_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from core import build_markdown_table, safe_open_zip
|
||||
from scripts.core import build_markdown_table, safe_open_zip
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from core import _unstructured_elements_to_markdown
|
||||
from scripts.core import _unstructured_elements_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from config import Config
|
||||
from scripts.config import Config
|
||||
|
||||
|
||||
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
@@ -1,6 +1,6 @@
|
||||
"""测试 Markdown 工具函数。"""
|
||||
|
||||
from core import (
|
||||
from scripts.core import (
|
||||
get_heading_level,
|
||||
extract_titles,
|
||||
normalize_markdown_whitespace,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"""测试文件检测工具函数。"""
|
||||
|
||||
from utils import is_url, is_html_file
|
||||
from scripts.utils import is_url, is_html_file
|
||||
|
||||
|
||||
class TestIsUrl:
|
||||
|
||||
Reference in New Issue
Block a user