From 9daff735897ec70819fe03a8d1956f6a5444b6cd Mon Sep 17 00:00:00 2001 From: lanyuanxiaoyao Date: Mon, 9 Mar 2026 15:44:51 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20=E8=B0=83=E6=95=B4=E6=A8=A1?= =?UTF-8?q?=E5=9D=97=E5=AF=BC=E5=85=A5=E8=B7=AF=E5=BE=84=EF=BC=8C=E7=AE=80?= =?UTF-8?q?=E5=8C=96=E5=BC=95=E7=94=A8=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 更新 openspec/config.yaml 中 git 任务相关说明 - 将 scripts.core.* 改为 core.*,scripts.readers.* 改为 readers.* - 优化 lyxy_document_reader.py 中 sys.path 设置方式 - 同步更新所有测试文件的导入路径 --- openspec/config.yaml | 2 +- scripts/core/parser.py | 6 +++--- scripts/lyxy_document_reader.py | 14 +++++++------- scripts/readers/docx/__init__.py | 4 ++-- scripts/readers/docx/docling.py | 2 +- scripts/readers/docx/markitdown.py | 2 +- scripts/readers/docx/native_xml.py | 2 +- scripts/readers/docx/python_docx.py | 2 +- scripts/readers/docx/unstructured.py | 2 +- scripts/readers/html/__init__.py | 6 +++--- scripts/readers/pdf/__init__.py | 4 ++-- scripts/readers/pdf/markitdown.py | 2 +- scripts/readers/pdf/unstructured.py | 2 +- scripts/readers/pdf/unstructured_ocr.py | 2 +- scripts/readers/pptx/__init__.py | 4 ++-- scripts/readers/pptx/docling.py | 2 +- scripts/readers/pptx/markitdown.py | 2 +- scripts/readers/pptx/native_xml.py | 2 +- scripts/readers/pptx/python_pptx.py | 2 +- scripts/readers/pptx/unstructured.py | 2 +- scripts/readers/xlsx/__init__.py | 4 ++-- scripts/readers/xlsx/docling.py | 2 +- scripts/readers/xlsx/markitdown.py | 2 +- scripts/readers/xlsx/native_xml.py | 2 +- scripts/readers/xlsx/unstructured.py | 2 +- scripts/utils/encoding_detection.py | 2 +- tests/__init__.py | 11 +++++++++++ tests/conftest.py | 11 ++++++++++- tests/test_cli/conftest.py | 9 ++++++++- tests/test_core/test_markdown.py | 2 +- tests/test_readers/test_docx/test_consistency.py | 2 +- tests/test_readers/test_docx/test_docling_docx.py | 2 +- .../test_readers/test_docx/test_markitdown_docx.py | 2 +- .../test_readers/test_docx/test_native_xml_docx.py | 2 +- tests/test_readers/test_docx/test_pypandoc_docx.py | 2 +- tests/test_readers/test_docx/test_python_docx.py | 2 +- .../test_docx/test_unstructured_docx.py | 2 +- tests/test_readers/test_html/test_consistency.py | 2 +- .../test_readers/test_html/test_domscribe_html.py | 2 +- tests/test_readers/test_html/test_html2text.py | 2 +- .../test_readers/test_html/test_markitdown_html.py | 2 +- .../test_html/test_trafilatura_html.py | 2 +- tests/test_readers/test_pdf/test_consistency.py | 2 +- .../test_readers/test_pdf/test_docling_ocr_pdf.py | 2 +- tests/test_readers/test_pdf/test_docling_pdf.py | 2 +- tests/test_readers/test_pdf/test_markitdown_pdf.py | 2 +- tests/test_readers/test_pdf/test_pypdf.py | 2 +- .../test_pdf/test_unstructured_ocr_pdf.py | 2 +- .../test_readers/test_pdf/test_unstructured_pdf.py | 2 +- tests/test_readers/test_pptx/test_consistency.py | 2 +- tests/test_readers/test_pptx/test_docling_pptx.py | 2 +- .../test_readers/test_pptx/test_markitdown_pptx.py | 2 +- .../test_readers/test_pptx/test_native_xml_pptx.py | 2 +- tests/test_readers/test_pptx/test_python_pptx.py | 2 +- .../test_pptx/test_unstructured_pptx.py | 2 +- tests/test_readers/test_utils.py | 2 +- tests/test_readers/test_xlsx/test_consistency.py | 2 +- tests/test_readers/test_xlsx/test_docling_xlsx.py | 2 +- .../test_readers/test_xlsx/test_markitdown_xlsx.py | 2 +- .../test_readers/test_xlsx/test_native_xml_xlsx.py | 2 +- tests/test_readers/test_xlsx/test_pandas_xlsx.py | 2 +- .../test_xlsx/test_unstructured_xlsx.py | 2 +- tests/test_utils/test_file_detection.py | 2 +- 63 files changed, 103 insertions(+), 76 deletions(-) diff --git a/openspec/config.yaml b/openspec/config.yaml index a9da0e8..6c3e6cf 100644 --- a/openspec/config.yaml +++ b/openspec/config.yaml @@ -9,7 +9,7 @@ context: | - 开发文档: README.md,每次迭代按需更新开发文档; 禁emoji/特殊字符 - skill文档: SKILL.md,每次迭代按需更新skill文档 - 测试: 所有需求必须设计全面测试 - - 任务: 禁止创建git变更任务(push/commit等); git读取允许(status/log/diff等) + - 任务: 除非用户直接要求,禁止创建git变更任务(push/commit等); git读取允许(status/log/diff等) - 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文 - 项目阶段: 未上线,无用户,破坏性变更无需迁移说明 - Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明 diff --git a/scripts/core/parser.py b/scripts/core/parser.py index 9027e38..490445b 100644 --- a/scripts/core/parser.py +++ b/scripts/core/parser.py @@ -4,12 +4,12 @@ import argparse import sys from typing import List, Optional, Tuple -from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError -from scripts.core.markdown import ( +from core.exceptions import FileDetectionError, ReaderNotFoundError +from core.markdown import ( normalize_markdown_whitespace, remove_markdown_images, ) -from scripts.readers import BaseReader +from readers import BaseReader def parse_input( diff --git a/scripts/lyxy_document_reader.py b/scripts/lyxy_document_reader.py index 83e5ae4..722fa85 100644 --- a/scripts/lyxy_document_reader.py +++ b/scripts/lyxy_document_reader.py @@ -6,12 +6,12 @@ import logging import os import sys import warnings +from pathlib import Path -# 将项目根目录添加到 sys.path,支持从任意位置执行脚本 -_current_dir = os.path.dirname(os.path.abspath(__file__)) -_project_root = os.path.dirname(_current_dir) -if _project_root not in sys.path: - sys.path.insert(0, _project_root) +# 将 scripts/ 目录添加到 sys.path,支持从任意位置执行脚本 +scripts_dir = Path(__file__).resolve().parent +if str(scripts_dir) not in sys.path: + sys.path.append(str(scripts_dir)) # 抑制第三方库的进度条和日志,仅保留解析结果输出 os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" @@ -26,14 +26,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s') logging.getLogger('docling').setLevel(logging.ERROR) logging.getLogger('unstructured').setLevel(logging.ERROR) -from scripts.core import ( +from core import ( FileDetectionError, ReaderNotFoundError, output_result, parse_input, process_content, ) -from scripts.readers import READERS +from readers import READERS def main() -> None: diff --git a/scripts/readers/docx/__init__.py b/scripts/readers/docx/__init__.py index be6ab5a..f7e35dc 100644 --- a/scripts/readers/docx/__init__.py +++ b/scripts/readers/docx/__init__.py @@ -3,8 +3,8 @@ import os from typing import List, Optional, Tuple -from scripts.readers.base import BaseReader -from scripts.utils import is_valid_docx +from readers.base import BaseReader +from utils import is_valid_docx from . import docling from . import unstructured diff --git a/scripts/readers/docx/docling.py b/scripts/readers/docx/docling.py index 5fc20d1..e59e1fc 100644 --- a/scripts/readers/docx/docling.py +++ b/scripts/readers/docx/docling.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_docling +from readers._utils import parse_via_docling def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/docx/markitdown.py b/scripts/readers/docx/markitdown.py index 392b65c..f552e59 100644 --- a/scripts/readers/docx/markitdown.py +++ b/scripts/readers/docx/markitdown.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_markitdown +from readers._utils import parse_via_markitdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/docx/native_xml.py b/scripts/readers/docx/native_xml.py index bd8f3e2..3b5b8d1 100644 --- a/scripts/readers/docx/native_xml.py +++ b/scripts/readers/docx/native_xml.py @@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET import zipfile from typing import Any, Dict, List, Optional, Tuple -from scripts.readers._utils import build_markdown_table, safe_open_zip +from readers._utils import build_markdown_table, safe_open_zip def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/docx/python_docx.py b/scripts/readers/docx/python_docx.py index a631493..78ff4bd 100644 --- a/scripts/readers/docx/python_docx.py +++ b/scripts/readers/docx/python_docx.py @@ -2,7 +2,7 @@ from typing import Any, List, Optional, Tuple -from scripts.readers._utils import build_markdown_table +from readers._utils import build_markdown_table def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/docx/unstructured.py b/scripts/readers/docx/unstructured.py index e27d48d..20df3dc 100644 --- a/scripts/readers/docx/unstructured.py +++ b/scripts/readers/docx/unstructured.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import convert_unstructured_to_markdown +from readers._utils import convert_unstructured_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/html/__init__.py b/scripts/readers/html/__init__.py index 0e8a27b..b2ec273 100644 --- a/scripts/readers/html/__init__.py +++ b/scripts/readers/html/__init__.py @@ -4,9 +4,9 @@ import os import tempfile from typing import List, Optional, Tuple -from scripts.readers.base import BaseReader -from scripts.utils import is_url -from scripts.utils import encoding_detection +from readers.base import BaseReader +from utils import is_url +from utils import encoding_detection from . import cleaner from .downloader import download_html diff --git a/scripts/readers/pdf/__init__.py b/scripts/readers/pdf/__init__.py index bf54a1b..f9ca753 100644 --- a/scripts/readers/pdf/__init__.py +++ b/scripts/readers/pdf/__init__.py @@ -3,8 +3,8 @@ import os from typing import List, Optional, Tuple -from scripts.readers.base import BaseReader -from scripts.utils import is_valid_pdf +from readers.base import BaseReader +from utils import is_valid_pdf from . import docling_ocr from . import unstructured_ocr diff --git a/scripts/readers/pdf/markitdown.py b/scripts/readers/pdf/markitdown.py index f430571..c95e626 100644 --- a/scripts/readers/pdf/markitdown.py +++ b/scripts/readers/pdf/markitdown.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_markitdown +from readers._utils import parse_via_markitdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pdf/unstructured.py b/scripts/readers/pdf/unstructured.py index d02f25e..5c8e98d 100644 --- a/scripts/readers/pdf/unstructured.py +++ b/scripts/readers/pdf/unstructured.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import convert_unstructured_to_markdown +from readers._utils import convert_unstructured_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pdf/unstructured_ocr.py b/scripts/readers/pdf/unstructured_ocr.py index 6c8ae81..8dbfbd0 100644 --- a/scripts/readers/pdf/unstructured_ocr.py +++ b/scripts/readers/pdf/unstructured_ocr.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import convert_unstructured_to_markdown +from readers._utils import convert_unstructured_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pptx/__init__.py b/scripts/readers/pptx/__init__.py index be12f43..4a23b03 100644 --- a/scripts/readers/pptx/__init__.py +++ b/scripts/readers/pptx/__init__.py @@ -3,8 +3,8 @@ import os from typing import List, Optional, Tuple -from scripts.readers.base import BaseReader -from scripts.utils import is_valid_pptx +from readers.base import BaseReader +from utils import is_valid_pptx from . import docling from . import unstructured diff --git a/scripts/readers/pptx/docling.py b/scripts/readers/pptx/docling.py index 463b802..0c708aa 100644 --- a/scripts/readers/pptx/docling.py +++ b/scripts/readers/pptx/docling.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_docling +from readers._utils import parse_via_docling def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pptx/markitdown.py b/scripts/readers/pptx/markitdown.py index 7f31dba..bfa59f3 100644 --- a/scripts/readers/pptx/markitdown.py +++ b/scripts/readers/pptx/markitdown.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_markitdown +from readers._utils import parse_via_markitdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pptx/native_xml.py b/scripts/readers/pptx/native_xml.py index 679fc4c..ce74230 100644 --- a/scripts/readers/pptx/native_xml.py +++ b/scripts/readers/pptx/native_xml.py @@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET import zipfile from typing import Any, List, Optional, Tuple -from scripts.readers._utils import build_markdown_table, flush_list_stack +from readers._utils import build_markdown_table, flush_list_stack def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pptx/python_pptx.py b/scripts/readers/pptx/python_pptx.py index df787c5..80297f0 100644 --- a/scripts/readers/pptx/python_pptx.py +++ b/scripts/readers/pptx/python_pptx.py @@ -2,7 +2,7 @@ from typing import Any, List, Optional, Tuple -from scripts.readers._utils import build_markdown_table, flush_list_stack +from readers._utils import build_markdown_table, flush_list_stack def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/pptx/unstructured.py b/scripts/readers/pptx/unstructured.py index 5340b51..5c20100 100644 --- a/scripts/readers/pptx/unstructured.py +++ b/scripts/readers/pptx/unstructured.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import convert_unstructured_to_markdown +from readers._utils import convert_unstructured_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/xlsx/__init__.py b/scripts/readers/xlsx/__init__.py index f86b122..fd557e0 100644 --- a/scripts/readers/xlsx/__init__.py +++ b/scripts/readers/xlsx/__init__.py @@ -3,8 +3,8 @@ import os from typing import List, Optional, Tuple -from scripts.readers.base import BaseReader -from scripts.utils import is_valid_xlsx +from readers.base import BaseReader +from utils import is_valid_xlsx from . import docling from . import unstructured diff --git a/scripts/readers/xlsx/docling.py b/scripts/readers/xlsx/docling.py index d7d2d51..9e8a5bc 100644 --- a/scripts/readers/xlsx/docling.py +++ b/scripts/readers/xlsx/docling.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_docling +from readers._utils import parse_via_docling def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/xlsx/markitdown.py b/scripts/readers/xlsx/markitdown.py index ecbc903..00a7e78 100644 --- a/scripts/readers/xlsx/markitdown.py +++ b/scripts/readers/xlsx/markitdown.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import parse_via_markitdown +from readers._utils import parse_via_markitdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/xlsx/native_xml.py b/scripts/readers/xlsx/native_xml.py index 9939e5b..b9d3b08 100644 --- a/scripts/readers/xlsx/native_xml.py +++ b/scripts/readers/xlsx/native_xml.py @@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET import zipfile from typing import List, Optional, Tuple -from scripts.readers._utils import build_markdown_table, safe_open_zip +from readers._utils import build_markdown_table, safe_open_zip def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/readers/xlsx/unstructured.py b/scripts/readers/xlsx/unstructured.py index fba861a..098933b 100644 --- a/scripts/readers/xlsx/unstructured.py +++ b/scripts/readers/xlsx/unstructured.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.readers._utils import convert_unstructured_to_markdown +from readers._utils import convert_unstructured_to_markdown def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/scripts/utils/encoding_detection.py b/scripts/utils/encoding_detection.py index 32ccb01..60eeb72 100644 --- a/scripts/utils/encoding_detection.py +++ b/scripts/utils/encoding_detection.py @@ -2,7 +2,7 @@ from typing import Optional, Tuple -from scripts.config import Config +from config import Config def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]: diff --git a/tests/__init__.py b/tests/__init__.py index e5fbfcf..a0dd646 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1,12 @@ """Tests package for lyxy-document.""" + +import sys +from pathlib import Path + +# 将 scripts/ 目录添加到 sys.path +project_root = Path(__file__).resolve().parent.parent +scripts_dir = project_root / "scripts" +if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + + diff --git a/tests/conftest.py b/tests/conftest.py index a81172a..2bc9ada 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,16 @@ """测试配置和共享 fixtures。""" +import sys +from pathlib import Path + +# 将 scripts/ 目录添加到 sys.path(必须在最顶部,在其他导入之前) +project_root = Path(__file__).resolve().parent.parent # tests/ 的父目录是项目根目录 +scripts_dir = project_root / "scripts" +if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + import pytest -from scripts.readers import READERS +from readers import READERS @pytest.fixture diff --git a/tests/test_cli/conftest.py b/tests/test_cli/conftest.py index 6ad4034..83ad0d1 100644 --- a/tests/test_cli/conftest.py +++ b/tests/test_cli/conftest.py @@ -2,6 +2,7 @@ import pytest import sys +from pathlib import Path from io import StringIO from contextlib import redirect_stdout, redirect_stderr @@ -22,7 +23,13 @@ def cli_runner(): Returns: tuple: (stdout, stderr, exit_code) """ - from scripts.lyxy_document_reader import main + # 将 scripts/ 目录添加到 sys.path + project_root = Path(__file__).resolve().parent.parent.parent # tests/test_cli/ 的父目录是 tests/,再父目录是项目根目录 + scripts_dir = project_root / "scripts" + if str(scripts_dir) not in sys.path: + sys.path.insert(0, str(scripts_dir)) + + from lyxy_document_reader import main # 保存原始 sys.argv 和 sys.exit original_argv = sys.argv diff --git a/tests/test_core/test_markdown.py b/tests/test_core/test_markdown.py index 047e6f4..e21fe6e 100644 --- a/tests/test_core/test_markdown.py +++ b/tests/test_core/test_markdown.py @@ -1,6 +1,6 @@ """测试 Markdown 工具函数。""" -from scripts.core import ( +from core import ( get_heading_level, extract_titles, normalize_markdown_whitespace, diff --git a/tests/test_readers/test_docx/test_consistency.py b/tests/test_readers/test_docx/test_consistency.py index 30318a3..a2c6d4c 100644 --- a/tests/test_readers/test_docx/test_consistency.py +++ b/tests/test_readers/test_docx/test_consistency.py @@ -1,7 +1,7 @@ """测试所有 DOCX Readers 的一致性。""" import pytest -from scripts.readers.docx import ( +from readers.docx import ( docling, unstructured, pypandoc, diff --git a/tests/test_readers/test_docx/test_docling_docx.py b/tests/test_readers/test_docx/test_docling_docx.py index fe9fcac..2b57878 100644 --- a/tests/test_readers/test_docx/test_docling_docx.py +++ b/tests/test_readers/test_docx/test_docling_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import docling +from readers.docx import docling class TestDoclingDocxReaderParse: diff --git a/tests/test_readers/test_docx/test_markitdown_docx.py b/tests/test_readers/test_docx/test_markitdown_docx.py index d09eafa..e791b58 100644 --- a/tests/test_readers/test_docx/test_markitdown_docx.py +++ b/tests/test_readers/test_docx/test_markitdown_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import markitdown +from readers.docx import markitdown class TestMarkitdownDocxReaderParse: diff --git a/tests/test_readers/test_docx/test_native_xml_docx.py b/tests/test_readers/test_docx/test_native_xml_docx.py index 0ee1fe5..419699f 100644 --- a/tests/test_readers/test_docx/test_native_xml_docx.py +++ b/tests/test_readers/test_docx/test_native_xml_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import native_xml +from readers.docx import native_xml class TestNativeXmlDocxReaderParse: diff --git a/tests/test_readers/test_docx/test_pypandoc_docx.py b/tests/test_readers/test_docx/test_pypandoc_docx.py index f6b3b09..d5b7985 100644 --- a/tests/test_readers/test_docx/test_pypandoc_docx.py +++ b/tests/test_readers/test_docx/test_pypandoc_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import pypandoc +from readers.docx import pypandoc class TestPypandocDocxReaderParse: diff --git a/tests/test_readers/test_docx/test_python_docx.py b/tests/test_readers/test_docx/test_python_docx.py index ce0c11b..1bc8962 100644 --- a/tests/test_readers/test_docx/test_python_docx.py +++ b/tests/test_readers/test_docx/test_python_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import DocxReader +from readers.docx import DocxReader class TestPythonDocxReaderParse: diff --git a/tests/test_readers/test_docx/test_unstructured_docx.py b/tests/test_readers/test_docx/test_unstructured_docx.py index 2b38aaa..18e71f8 100644 --- a/tests/test_readers/test_docx/test_unstructured_docx.py +++ b/tests/test_readers/test_docx/test_unstructured_docx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.docx import unstructured +from readers.docx import unstructured class TestUnstructuredDocxReaderParse: diff --git a/tests/test_readers/test_html/test_consistency.py b/tests/test_readers/test_html/test_consistency.py index b9d17e2..dfbe00a 100644 --- a/tests/test_readers/test_html/test_consistency.py +++ b/tests/test_readers/test_html/test_consistency.py @@ -1,7 +1,7 @@ """测试所有 HTML Readers 的一致性。""" import pytest -from scripts.readers.html import ( +from readers.html import ( html2text, markitdown, trafilatura, diff --git a/tests/test_readers/test_html/test_domscribe_html.py b/tests/test_readers/test_html/test_domscribe_html.py index 9b7923b..868a916 100644 --- a/tests/test_readers/test_html/test_domscribe_html.py +++ b/tests/test_readers/test_html/test_domscribe_html.py @@ -1,7 +1,7 @@ """测试 Domscribe HTML Reader 的解析功能。""" import pytest -from scripts.readers.html import domscribe +from readers.html import domscribe class TestDomscribeHtmlReaderParse: diff --git a/tests/test_readers/test_html/test_html2text.py b/tests/test_readers/test_html/test_html2text.py index c8d0266..07a4845 100644 --- a/tests/test_readers/test_html/test_html2text.py +++ b/tests/test_readers/test_html/test_html2text.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.html import HtmlReader +from readers.html import HtmlReader class TestHtml2TextReaderParse: diff --git a/tests/test_readers/test_html/test_markitdown_html.py b/tests/test_readers/test_html/test_markitdown_html.py index 9592a7b..d4cd427 100644 --- a/tests/test_readers/test_html/test_markitdown_html.py +++ b/tests/test_readers/test_html/test_markitdown_html.py @@ -1,7 +1,7 @@ """测试 MarkItDown HTML Reader 的解析功能。""" import pytest -from scripts.readers.html import markitdown +from readers.html import markitdown class TestMarkitdownHtmlReaderParse: diff --git a/tests/test_readers/test_html/test_trafilatura_html.py b/tests/test_readers/test_html/test_trafilatura_html.py index 12de5ed..62b08e5 100644 --- a/tests/test_readers/test_html/test_trafilatura_html.py +++ b/tests/test_readers/test_html/test_trafilatura_html.py @@ -1,7 +1,7 @@ """测试 Trafilatura HTML Reader 的解析功能。""" import pytest -from scripts.readers.html import trafilatura +from readers.html import trafilatura class TestTrafilaturaHtmlReaderParse: diff --git a/tests/test_readers/test_pdf/test_consistency.py b/tests/test_readers/test_pdf/test_consistency.py index 184082e..a22f75f 100644 --- a/tests/test_readers/test_pdf/test_consistency.py +++ b/tests/test_readers/test_pdf/test_consistency.py @@ -1,7 +1,7 @@ """测试所有 PDF Readers 的一致性。""" import pytest -from scripts.readers.pdf import ( +from readers.pdf import ( docling, docling_ocr, markitdown, diff --git a/tests/test_readers/test_pdf/test_docling_ocr_pdf.py b/tests/test_readers/test_pdf/test_docling_ocr_pdf.py index 0f27066..d0c6eb5 100644 --- a/tests/test_readers/test_pdf/test_docling_ocr_pdf.py +++ b/tests/test_readers/test_pdf/test_docling_ocr_pdf.py @@ -1,7 +1,7 @@ """测试 Docling OCR PDF Reader 的解析功能。""" import pytest -from scripts.readers.pdf import docling_ocr +from readers.pdf import docling_ocr class TestDoclingOcrPdfReaderParse: diff --git a/tests/test_readers/test_pdf/test_docling_pdf.py b/tests/test_readers/test_pdf/test_docling_pdf.py index e4a6113..f8aaffb 100644 --- a/tests/test_readers/test_pdf/test_docling_pdf.py +++ b/tests/test_readers/test_pdf/test_docling_pdf.py @@ -1,7 +1,7 @@ """测试 Docling PDF Reader 的解析功能。""" import pytest -from scripts.readers.pdf import docling +from readers.pdf import docling class TestDoclingPdfReaderParse: diff --git a/tests/test_readers/test_pdf/test_markitdown_pdf.py b/tests/test_readers/test_pdf/test_markitdown_pdf.py index cddd898..07c7b37 100644 --- a/tests/test_readers/test_pdf/test_markitdown_pdf.py +++ b/tests/test_readers/test_pdf/test_markitdown_pdf.py @@ -1,7 +1,7 @@ """测试 MarkItDown PDF Reader 的解析功能。""" import pytest -from scripts.readers.pdf import markitdown +from readers.pdf import markitdown class TestMarkitdownPdfReaderParse: diff --git a/tests/test_readers/test_pdf/test_pypdf.py b/tests/test_readers/test_pdf/test_pypdf.py index 97dc3f8..7fd827a 100644 --- a/tests/test_readers/test_pdf/test_pypdf.py +++ b/tests/test_readers/test_pdf/test_pypdf.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.pdf import PdfReader +from readers.pdf import PdfReader class TestPypdfReaderParse: diff --git a/tests/test_readers/test_pdf/test_unstructured_ocr_pdf.py b/tests/test_readers/test_pdf/test_unstructured_ocr_pdf.py index 6092410..43ecca8 100644 --- a/tests/test_readers/test_pdf/test_unstructured_ocr_pdf.py +++ b/tests/test_readers/test_pdf/test_unstructured_ocr_pdf.py @@ -1,7 +1,7 @@ """测试 Unstructured OCR PDF Reader 的解析功能。""" import pytest -from scripts.readers.pdf import unstructured_ocr +from readers.pdf import unstructured_ocr class TestUnstructuredOcrPdfReaderParse: diff --git a/tests/test_readers/test_pdf/test_unstructured_pdf.py b/tests/test_readers/test_pdf/test_unstructured_pdf.py index d097366..798d7ed 100644 --- a/tests/test_readers/test_pdf/test_unstructured_pdf.py +++ b/tests/test_readers/test_pdf/test_unstructured_pdf.py @@ -1,7 +1,7 @@ """测试 Unstructured PDF Reader 的解析功能。""" import pytest -from scripts.readers.pdf import unstructured +from readers.pdf import unstructured class TestUnstructuredPdfReaderParse: diff --git a/tests/test_readers/test_pptx/test_consistency.py b/tests/test_readers/test_pptx/test_consistency.py index 5f3e00c..b094dd0 100644 --- a/tests/test_readers/test_pptx/test_consistency.py +++ b/tests/test_readers/test_pptx/test_consistency.py @@ -1,7 +1,7 @@ """测试所有 PPTX Readers 的一致性。""" import pytest -from scripts.readers.pptx import ( +from readers.pptx import ( docling, markitdown, native_xml, diff --git a/tests/test_readers/test_pptx/test_docling_pptx.py b/tests/test_readers/test_pptx/test_docling_pptx.py index 815bacf..01832b1 100644 --- a/tests/test_readers/test_pptx/test_docling_pptx.py +++ b/tests/test_readers/test_pptx/test_docling_pptx.py @@ -1,7 +1,7 @@ """测试 Docling PPTX Reader 的解析功能。""" import pytest -from scripts.readers.pptx import docling +from readers.pptx import docling class TestDoclingPptxReaderParse: diff --git a/tests/test_readers/test_pptx/test_markitdown_pptx.py b/tests/test_readers/test_pptx/test_markitdown_pptx.py index 996d9a1..1a03400 100644 --- a/tests/test_readers/test_pptx/test_markitdown_pptx.py +++ b/tests/test_readers/test_pptx/test_markitdown_pptx.py @@ -1,7 +1,7 @@ """测试 MarkItDown PPTX Reader 的解析功能。""" import pytest -from scripts.readers.pptx import markitdown +from readers.pptx import markitdown class TestMarkitdownPptxReaderParse: diff --git a/tests/test_readers/test_pptx/test_native_xml_pptx.py b/tests/test_readers/test_pptx/test_native_xml_pptx.py index 61785b1..edbcb83 100644 --- a/tests/test_readers/test_pptx/test_native_xml_pptx.py +++ b/tests/test_readers/test_pptx/test_native_xml_pptx.py @@ -1,7 +1,7 @@ """测试 Native XML PPTX Reader 的解析功能。""" import pytest -from scripts.readers.pptx import native_xml +from readers.pptx import native_xml class TestNativeXmlPptxReaderParse: diff --git a/tests/test_readers/test_pptx/test_python_pptx.py b/tests/test_readers/test_pptx/test_python_pptx.py index a4c00cd..05d7217 100644 --- a/tests/test_readers/test_pptx/test_python_pptx.py +++ b/tests/test_readers/test_pptx/test_python_pptx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.pptx import PptxReader +from readers.pptx import PptxReader class TestPythonPptxReaderParse: diff --git a/tests/test_readers/test_pptx/test_unstructured_pptx.py b/tests/test_readers/test_pptx/test_unstructured_pptx.py index c05a47e..e10d9c0 100644 --- a/tests/test_readers/test_pptx/test_unstructured_pptx.py +++ b/tests/test_readers/test_pptx/test_unstructured_pptx.py @@ -1,7 +1,7 @@ """测试 Unstructured PPTX Reader 的解析功能。""" import pytest -from scripts.readers.pptx import unstructured +from readers.pptx import unstructured class TestUnstructuredPptxReaderParse: diff --git a/tests/test_readers/test_utils.py b/tests/test_readers/test_utils.py index 8443f8d..7aa3d63 100644 --- a/tests/test_readers/test_utils.py +++ b/tests/test_readers/test_utils.py @@ -2,7 +2,7 @@ import zipfile import pytest -from scripts.readers._utils import ( +from readers._utils import ( parse_via_markitdown, parse_via_docling, build_markdown_table, diff --git a/tests/test_readers/test_xlsx/test_consistency.py b/tests/test_readers/test_xlsx/test_consistency.py index a9e2256..8b7445c 100644 --- a/tests/test_readers/test_xlsx/test_consistency.py +++ b/tests/test_readers/test_xlsx/test_consistency.py @@ -1,7 +1,7 @@ """测试所有 XLSX Readers 的一致性。""" import pytest -from scripts.readers.xlsx import ( +from readers.xlsx import ( docling, markitdown, native_xml, diff --git a/tests/test_readers/test_xlsx/test_docling_xlsx.py b/tests/test_readers/test_xlsx/test_docling_xlsx.py index 1c688fe..0423e39 100644 --- a/tests/test_readers/test_xlsx/test_docling_xlsx.py +++ b/tests/test_readers/test_xlsx/test_docling_xlsx.py @@ -1,7 +1,7 @@ """测试 Docling XLSX Reader 的解析功能。""" import pytest -from scripts.readers.xlsx import docling +from readers.xlsx import docling class TestDoclingXlsxReaderParse: diff --git a/tests/test_readers/test_xlsx/test_markitdown_xlsx.py b/tests/test_readers/test_xlsx/test_markitdown_xlsx.py index 54b2c16..dddeb0c 100644 --- a/tests/test_readers/test_xlsx/test_markitdown_xlsx.py +++ b/tests/test_readers/test_xlsx/test_markitdown_xlsx.py @@ -1,7 +1,7 @@ """测试 MarkItDown XLSX Reader 的解析功能。""" import pytest -from scripts.readers.xlsx import markitdown +from readers.xlsx import markitdown class TestMarkitdownXlsxReaderParse: diff --git a/tests/test_readers/test_xlsx/test_native_xml_xlsx.py b/tests/test_readers/test_xlsx/test_native_xml_xlsx.py index 23a7d97..7049a80 100644 --- a/tests/test_readers/test_xlsx/test_native_xml_xlsx.py +++ b/tests/test_readers/test_xlsx/test_native_xml_xlsx.py @@ -1,7 +1,7 @@ """测试 Native XML XLSX Reader 的解析功能。""" import pytest -from scripts.readers.xlsx import native_xml +from readers.xlsx import native_xml class TestNativeXmlXlsxReaderParse: diff --git a/tests/test_readers/test_xlsx/test_pandas_xlsx.py b/tests/test_readers/test_xlsx/test_pandas_xlsx.py index 77edb51..bc72d2e 100644 --- a/tests/test_readers/test_xlsx/test_pandas_xlsx.py +++ b/tests/test_readers/test_xlsx/test_pandas_xlsx.py @@ -2,7 +2,7 @@ import pytest import os -from scripts.readers.xlsx import XlsxReader +from readers.xlsx import XlsxReader class TestPandasXlsxReaderParse: diff --git a/tests/test_readers/test_xlsx/test_unstructured_xlsx.py b/tests/test_readers/test_xlsx/test_unstructured_xlsx.py index 201ccac..c59578a 100644 --- a/tests/test_readers/test_xlsx/test_unstructured_xlsx.py +++ b/tests/test_readers/test_xlsx/test_unstructured_xlsx.py @@ -1,7 +1,7 @@ """测试 Unstructured XLSX Reader 的解析功能。""" import pytest -from scripts.readers.xlsx import unstructured +from readers.xlsx import unstructured class TestUnstructuredXlsxReaderParse: diff --git a/tests/test_utils/test_file_detection.py b/tests/test_utils/test_file_detection.py index 17d3fc1..9996052 100644 --- a/tests/test_utils/test_file_detection.py +++ b/tests/test_utils/test_file_detection.py @@ -1,6 +1,6 @@ """测试文件检测工具函数。""" -from scripts.utils import is_url, is_html_file +from utils import is_url, is_html_file class TestIsUrl: