refactor: 调整模块导入路径,简化引用结构
- 更新 openspec/config.yaml 中 git 任务相关说明 - 将 scripts.core.* 改为 core.*,scripts.readers.* 改为 readers.* - 优化 lyxy_document_reader.py 中 sys.path 设置方式 - 同步更新所有测试文件的导入路径
This commit is contained in:
@@ -4,12 +4,12 @@ import argparse
|
||||
import sys
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
from scripts.core.markdown import (
|
||||
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||
from core.markdown import (
|
||||
normalize_markdown_whitespace,
|
||||
remove_markdown_images,
|
||||
)
|
||||
from scripts.readers import BaseReader
|
||||
from readers import BaseReader
|
||||
|
||||
|
||||
def parse_input(
|
||||
|
||||
@@ -6,12 +6,12 @@ import logging
|
||||
import os
|
||||
import sys
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
|
||||
# 将项目根目录添加到 sys.path,支持从任意位置执行脚本
|
||||
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
_project_root = os.path.dirname(_current_dir)
|
||||
if _project_root not in sys.path:
|
||||
sys.path.insert(0, _project_root)
|
||||
# 将 scripts/ 目录添加到 sys.path,支持从任意位置执行脚本
|
||||
scripts_dir = Path(__file__).resolve().parent
|
||||
if str(scripts_dir) not in sys.path:
|
||||
sys.path.append(str(scripts_dir))
|
||||
|
||||
# 抑制第三方库的进度条和日志,仅保留解析结果输出
|
||||
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
||||
@@ -26,14 +26,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')
|
||||
logging.getLogger('docling').setLevel(logging.ERROR)
|
||||
logging.getLogger('unstructured').setLevel(logging.ERROR)
|
||||
|
||||
from scripts.core import (
|
||||
from core import (
|
||||
FileDetectionError,
|
||||
ReaderNotFoundError,
|
||||
output_result,
|
||||
parse_input,
|
||||
process_content,
|
||||
)
|
||||
from scripts.readers import READERS
|
||||
from readers import READERS
|
||||
|
||||
|
||||
def main() -> None:
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_docx
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_docx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_docling
|
||||
from readers._utils import parse_via_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_markitdown
|
||||
from readers._utils import parse_via_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import build_markdown_table, safe_open_zip
|
||||
from readers._utils import build_markdown_table, safe_open_zip
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import build_markdown_table
|
||||
from readers._utils import build_markdown_table
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
||||
from readers._utils import convert_unstructured_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -4,9 +4,9 @@ import os
|
||||
import tempfile
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_url
|
||||
from scripts.utils import encoding_detection
|
||||
from readers.base import BaseReader
|
||||
from utils import is_url
|
||||
from utils import encoding_detection
|
||||
|
||||
from . import cleaner
|
||||
from .downloader import download_html
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_pdf
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_pdf
|
||||
|
||||
from . import docling_ocr
|
||||
from . import unstructured_ocr
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_markitdown
|
||||
from readers._utils import parse_via_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
||||
from readers._utils import convert_unstructured_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
||||
from readers._utils import convert_unstructured_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_pptx
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_pptx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_docling
|
||||
from readers._utils import parse_via_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_markitdown
|
||||
from readers._utils import parse_via_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import build_markdown_table, flush_list_stack
|
||||
from readers._utils import build_markdown_table, flush_list_stack
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Any, List, Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import build_markdown_table, flush_list_stack
|
||||
from readers._utils import build_markdown_table, flush_list_stack
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
||||
from readers._utils import convert_unstructured_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
import os
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers.base import BaseReader
|
||||
from scripts.utils import is_valid_xlsx
|
||||
from readers.base import BaseReader
|
||||
from utils import is_valid_xlsx
|
||||
|
||||
from . import docling
|
||||
from . import unstructured
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_docling
|
||||
from readers._utils import parse_via_docling
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import parse_via_markitdown
|
||||
from readers._utils import parse_via_markitdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
||||
import zipfile
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import build_markdown_table, safe_open_zip
|
||||
from readers._utils import build_markdown_table, safe_open_zip
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
||||
from readers._utils import convert_unstructured_to_markdown
|
||||
|
||||
|
||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from scripts.config import Config
|
||||
from config import Config
|
||||
|
||||
|
||||
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
|
||||
Reference in New Issue
Block a user