- 新增 scripts/readers/_utils.py 作为 Reader 内部共享工具模块 - 将 parse_with_markitdown 等函数从 core/markdown.py 迁移到 _utils.py - 函数重命名:parse_with_xxx → parse_via_xxx,_unstructured_elements_to_markdown → convert_unstructured_to_markdown - 更新 17 个 Reader 实现文件的 import 路径 - 从 core/__init__.py 移除已迁移函数的导出 - 新增测试文件 tests/test_readers/test_utils.py - 新增 spec 文档 openspec/specs/reader-internal-utils/spec.md 这次重构明确了模块边界:core/ 提供公共 API,readers/_utils.py 提供 Reader 内部工具
36 lines
778 B
Python
36 lines
778 B
Python
"""Core module for lyxy-document."""
|
|
|
|
from .exceptions import (
|
|
LyxyDocumentError,
|
|
FileDetectionError,
|
|
ReaderNotFoundError,
|
|
ParseError,
|
|
DownloadError,
|
|
)
|
|
from .markdown import (
|
|
normalize_markdown_whitespace,
|
|
remove_markdown_images,
|
|
get_heading_level,
|
|
extract_titles,
|
|
extract_title_content,
|
|
search_markdown,
|
|
)
|
|
from .parser import parse_input, process_content, output_result
|
|
|
|
__all__ = [
|
|
"LyxyDocumentError",
|
|
"FileDetectionError",
|
|
"ReaderNotFoundError",
|
|
"ParseError",
|
|
"DownloadError",
|
|
"normalize_markdown_whitespace",
|
|
"remove_markdown_images",
|
|
"get_heading_level",
|
|
"extract_titles",
|
|
"extract_title_content",
|
|
"search_markdown",
|
|
"parse_input",
|
|
"process_content",
|
|
"output_result",
|
|
]
|