refactor: 调整模块导入路径,简化引用结构
- 更新 openspec/config.yaml 中 git 任务相关说明 - 将 scripts.core.* 改为 core.*,scripts.readers.* 改为 readers.* - 优化 lyxy_document_reader.py 中 sys.path 设置方式 - 同步更新所有测试文件的导入路径
This commit is contained in:
@@ -9,7 +9,7 @@ context: |
|
|||||||
- 开发文档: README.md,每次迭代按需更新开发文档; 禁emoji/特殊字符
|
- 开发文档: README.md,每次迭代按需更新开发文档; 禁emoji/特殊字符
|
||||||
- skill文档: SKILL.md,每次迭代按需更新skill文档
|
- skill文档: SKILL.md,每次迭代按需更新skill文档
|
||||||
- 测试: 所有需求必须设计全面测试
|
- 测试: 所有需求必须设计全面测试
|
||||||
- 任务: 禁止创建git变更任务(push/commit等); git读取允许(status/log/diff等)
|
- 任务: 除非用户直接要求,禁止创建git变更任务(push/commit等); git读取允许(status/log/diff等)
|
||||||
- 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文
|
- 代码: 模块文件150-300行; 错误需自定义异常+清晰信息+位置上下文
|
||||||
- 项目阶段: 未上线,无用户,破坏性变更无需迁移说明
|
- 项目阶段: 未上线,无用户,破坏性变更无需迁移说明
|
||||||
- Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明
|
- Git提交: 仅中文; 格式为"类型: 简短描述",类型可选: feat(新功能)/fix(修复)/refactor(重构)/docs(文档)/style(格式)/test(测试)/chore(构建/工具); 多行描述空行后加详细说明
|
||||||
|
|||||||
@@ -4,12 +4,12 @@ import argparse
|
|||||||
import sys
|
import sys
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.core.exceptions import FileDetectionError, ReaderNotFoundError
|
from core.exceptions import FileDetectionError, ReaderNotFoundError
|
||||||
from scripts.core.markdown import (
|
from core.markdown import (
|
||||||
normalize_markdown_whitespace,
|
normalize_markdown_whitespace,
|
||||||
remove_markdown_images,
|
remove_markdown_images,
|
||||||
)
|
)
|
||||||
from scripts.readers import BaseReader
|
from readers import BaseReader
|
||||||
|
|
||||||
|
|
||||||
def parse_input(
|
def parse_input(
|
||||||
|
|||||||
@@ -6,12 +6,12 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
# 将项目根目录添加到 sys.path,支持从任意位置执行脚本
|
# 将 scripts/ 目录添加到 sys.path,支持从任意位置执行脚本
|
||||||
_current_dir = os.path.dirname(os.path.abspath(__file__))
|
scripts_dir = Path(__file__).resolve().parent
|
||||||
_project_root = os.path.dirname(_current_dir)
|
if str(scripts_dir) not in sys.path:
|
||||||
if _project_root not in sys.path:
|
sys.path.append(str(scripts_dir))
|
||||||
sys.path.insert(0, _project_root)
|
|
||||||
|
|
||||||
# 抑制第三方库的进度条和日志,仅保留解析结果输出
|
# 抑制第三方库的进度条和日志,仅保留解析结果输出
|
||||||
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
||||||
@@ -26,14 +26,14 @@ logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')
|
|||||||
logging.getLogger('docling').setLevel(logging.ERROR)
|
logging.getLogger('docling').setLevel(logging.ERROR)
|
||||||
logging.getLogger('unstructured').setLevel(logging.ERROR)
|
logging.getLogger('unstructured').setLevel(logging.ERROR)
|
||||||
|
|
||||||
from scripts.core import (
|
from core import (
|
||||||
FileDetectionError,
|
FileDetectionError,
|
||||||
ReaderNotFoundError,
|
ReaderNotFoundError,
|
||||||
output_result,
|
output_result,
|
||||||
parse_input,
|
parse_input,
|
||||||
process_content,
|
process_content,
|
||||||
)
|
)
|
||||||
from scripts.readers import READERS
|
from readers import READERS
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
import os
|
import os
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers.base import BaseReader
|
from readers.base import BaseReader
|
||||||
from scripts.utils import is_valid_docx
|
from utils import is_valid_docx
|
||||||
|
|
||||||
from . import docling
|
from . import docling
|
||||||
from . import unstructured
|
from . import unstructured
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_docling
|
from readers._utils import parse_via_docling
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_markitdown
|
from readers._utils import parse_via_markitdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
|||||||
import zipfile
|
import zipfile
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import build_markdown_table, safe_open_zip
|
from readers._utils import build_markdown_table, safe_open_zip
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Any, List, Optional, Tuple
|
from typing import Any, List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import build_markdown_table
|
from readers._utils import build_markdown_table
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
from readers._utils import convert_unstructured_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -4,9 +4,9 @@ import os
|
|||||||
import tempfile
|
import tempfile
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers.base import BaseReader
|
from readers.base import BaseReader
|
||||||
from scripts.utils import is_url
|
from utils import is_url
|
||||||
from scripts.utils import encoding_detection
|
from utils import encoding_detection
|
||||||
|
|
||||||
from . import cleaner
|
from . import cleaner
|
||||||
from .downloader import download_html
|
from .downloader import download_html
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
import os
|
import os
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers.base import BaseReader
|
from readers.base import BaseReader
|
||||||
from scripts.utils import is_valid_pdf
|
from utils import is_valid_pdf
|
||||||
|
|
||||||
from . import docling_ocr
|
from . import docling_ocr
|
||||||
from . import unstructured_ocr
|
from . import unstructured_ocr
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_markitdown
|
from readers._utils import parse_via_markitdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
from readers._utils import convert_unstructured_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
from readers._utils import convert_unstructured_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
import os
|
import os
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers.base import BaseReader
|
from readers.base import BaseReader
|
||||||
from scripts.utils import is_valid_pptx
|
from utils import is_valid_pptx
|
||||||
|
|
||||||
from . import docling
|
from . import docling
|
||||||
from . import unstructured
|
from . import unstructured
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_docling
|
from readers._utils import parse_via_docling
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_markitdown
|
from readers._utils import parse_via_markitdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
|
|||||||
import zipfile
|
import zipfile
|
||||||
from typing import Any, List, Optional, Tuple
|
from typing import Any, List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import build_markdown_table, flush_list_stack
|
from readers._utils import build_markdown_table, flush_list_stack
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Any, List, Optional, Tuple
|
from typing import Any, List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import build_markdown_table, flush_list_stack
|
from readers._utils import build_markdown_table, flush_list_stack
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
from readers._utils import convert_unstructured_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -3,8 +3,8 @@
|
|||||||
import os
|
import os
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers.base import BaseReader
|
from readers.base import BaseReader
|
||||||
from scripts.utils import is_valid_xlsx
|
from utils import is_valid_xlsx
|
||||||
|
|
||||||
from . import docling
|
from . import docling
|
||||||
from . import unstructured
|
from . import unstructured
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_docling
|
from readers._utils import parse_via_docling
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import parse_via_markitdown
|
from readers._utils import parse_via_markitdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import xml.etree.ElementTree as ET
|
|||||||
import zipfile
|
import zipfile
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import build_markdown_table, safe_open_zip
|
from readers._utils import build_markdown_table, safe_open_zip
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.readers._utils import convert_unstructured_to_markdown
|
from readers._utils import convert_unstructured_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def parse(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
from scripts.config import Config
|
from config import Config
|
||||||
|
|
||||||
|
|
||||||
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
def detect_encoding(file_path: str) -> Tuple[Optional[str], Optional[str]]:
|
||||||
|
|||||||
@@ -1 +1,12 @@
|
|||||||
"""Tests package for lyxy-document."""
|
"""Tests package for lyxy-document."""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 将 scripts/ 目录添加到 sys.path
|
||||||
|
project_root = Path(__file__).resolve().parent.parent
|
||||||
|
scripts_dir = project_root / "scripts"
|
||||||
|
if str(scripts_dir) not in sys.path:
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,16 @@
|
|||||||
"""测试配置和共享 fixtures。"""
|
"""测试配置和共享 fixtures。"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# 将 scripts/ 目录添加到 sys.path(必须在最顶部,在其他导入之前)
|
||||||
|
project_root = Path(__file__).resolve().parent.parent # tests/ 的父目录是项目根目录
|
||||||
|
scripts_dir = project_root / "scripts"
|
||||||
|
if str(scripts_dir) not in sys.path:
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers import READERS
|
from readers import READERS
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import sys
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from contextlib import redirect_stdout, redirect_stderr
|
from contextlib import redirect_stdout, redirect_stderr
|
||||||
|
|
||||||
@@ -22,7 +23,13 @@ def cli_runner():
|
|||||||
Returns:
|
Returns:
|
||||||
tuple: (stdout, stderr, exit_code)
|
tuple: (stdout, stderr, exit_code)
|
||||||
"""
|
"""
|
||||||
from scripts.lyxy_document_reader import main
|
# 将 scripts/ 目录添加到 sys.path
|
||||||
|
project_root = Path(__file__).resolve().parent.parent.parent # tests/test_cli/ 的父目录是 tests/,再父目录是项目根目录
|
||||||
|
scripts_dir = project_root / "scripts"
|
||||||
|
if str(scripts_dir) not in sys.path:
|
||||||
|
sys.path.insert(0, str(scripts_dir))
|
||||||
|
|
||||||
|
from lyxy_document_reader import main
|
||||||
|
|
||||||
# 保存原始 sys.argv 和 sys.exit
|
# 保存原始 sys.argv 和 sys.exit
|
||||||
original_argv = sys.argv
|
original_argv = sys.argv
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""测试 Markdown 工具函数。"""
|
"""测试 Markdown 工具函数。"""
|
||||||
|
|
||||||
from scripts.core import (
|
from core import (
|
||||||
get_heading_level,
|
get_heading_level,
|
||||||
extract_titles,
|
extract_titles,
|
||||||
normalize_markdown_whitespace,
|
normalize_markdown_whitespace,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试所有 DOCX Readers 的一致性。"""
|
"""测试所有 DOCX Readers 的一致性。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.docx import (
|
from readers.docx import (
|
||||||
docling,
|
docling,
|
||||||
unstructured,
|
unstructured,
|
||||||
pypandoc,
|
pypandoc,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import docling
|
from readers.docx import docling
|
||||||
|
|
||||||
|
|
||||||
class TestDoclingDocxReaderParse:
|
class TestDoclingDocxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import markitdown
|
from readers.docx import markitdown
|
||||||
|
|
||||||
|
|
||||||
class TestMarkitdownDocxReaderParse:
|
class TestMarkitdownDocxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import native_xml
|
from readers.docx import native_xml
|
||||||
|
|
||||||
|
|
||||||
class TestNativeXmlDocxReaderParse:
|
class TestNativeXmlDocxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import pypandoc
|
from readers.docx import pypandoc
|
||||||
|
|
||||||
|
|
||||||
class TestPypandocDocxReaderParse:
|
class TestPypandocDocxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import DocxReader
|
from readers.docx import DocxReader
|
||||||
|
|
||||||
|
|
||||||
class TestPythonDocxReaderParse:
|
class TestPythonDocxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.docx import unstructured
|
from readers.docx import unstructured
|
||||||
|
|
||||||
|
|
||||||
class TestUnstructuredDocxReaderParse:
|
class TestUnstructuredDocxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试所有 HTML Readers 的一致性。"""
|
"""测试所有 HTML Readers 的一致性。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.html import (
|
from readers.html import (
|
||||||
html2text,
|
html2text,
|
||||||
markitdown,
|
markitdown,
|
||||||
trafilatura,
|
trafilatura,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Domscribe HTML Reader 的解析功能。"""
|
"""测试 Domscribe HTML Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.html import domscribe
|
from readers.html import domscribe
|
||||||
|
|
||||||
|
|
||||||
class TestDomscribeHtmlReaderParse:
|
class TestDomscribeHtmlReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.html import HtmlReader
|
from readers.html import HtmlReader
|
||||||
|
|
||||||
|
|
||||||
class TestHtml2TextReaderParse:
|
class TestHtml2TextReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 MarkItDown HTML Reader 的解析功能。"""
|
"""测试 MarkItDown HTML Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.html import markitdown
|
from readers.html import markitdown
|
||||||
|
|
||||||
|
|
||||||
class TestMarkitdownHtmlReaderParse:
|
class TestMarkitdownHtmlReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Trafilatura HTML Reader 的解析功能。"""
|
"""测试 Trafilatura HTML Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.html import trafilatura
|
from readers.html import trafilatura
|
||||||
|
|
||||||
|
|
||||||
class TestTrafilaturaHtmlReaderParse:
|
class TestTrafilaturaHtmlReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试所有 PDF Readers 的一致性。"""
|
"""测试所有 PDF Readers 的一致性。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import (
|
from readers.pdf import (
|
||||||
docling,
|
docling,
|
||||||
docling_ocr,
|
docling_ocr,
|
||||||
markitdown,
|
markitdown,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Docling OCR PDF Reader 的解析功能。"""
|
"""测试 Docling OCR PDF Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import docling_ocr
|
from readers.pdf import docling_ocr
|
||||||
|
|
||||||
|
|
||||||
class TestDoclingOcrPdfReaderParse:
|
class TestDoclingOcrPdfReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Docling PDF Reader 的解析功能。"""
|
"""测试 Docling PDF Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import docling
|
from readers.pdf import docling
|
||||||
|
|
||||||
|
|
||||||
class TestDoclingPdfReaderParse:
|
class TestDoclingPdfReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 MarkItDown PDF Reader 的解析功能。"""
|
"""测试 MarkItDown PDF Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import markitdown
|
from readers.pdf import markitdown
|
||||||
|
|
||||||
|
|
||||||
class TestMarkitdownPdfReaderParse:
|
class TestMarkitdownPdfReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.pdf import PdfReader
|
from readers.pdf import PdfReader
|
||||||
|
|
||||||
|
|
||||||
class TestPypdfReaderParse:
|
class TestPypdfReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Unstructured OCR PDF Reader 的解析功能。"""
|
"""测试 Unstructured OCR PDF Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import unstructured_ocr
|
from readers.pdf import unstructured_ocr
|
||||||
|
|
||||||
|
|
||||||
class TestUnstructuredOcrPdfReaderParse:
|
class TestUnstructuredOcrPdfReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Unstructured PDF Reader 的解析功能。"""
|
"""测试 Unstructured PDF Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pdf import unstructured
|
from readers.pdf import unstructured
|
||||||
|
|
||||||
|
|
||||||
class TestUnstructuredPdfReaderParse:
|
class TestUnstructuredPdfReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试所有 PPTX Readers 的一致性。"""
|
"""测试所有 PPTX Readers 的一致性。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pptx import (
|
from readers.pptx import (
|
||||||
docling,
|
docling,
|
||||||
markitdown,
|
markitdown,
|
||||||
native_xml,
|
native_xml,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Docling PPTX Reader 的解析功能。"""
|
"""测试 Docling PPTX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pptx import docling
|
from readers.pptx import docling
|
||||||
|
|
||||||
|
|
||||||
class TestDoclingPptxReaderParse:
|
class TestDoclingPptxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 MarkItDown PPTX Reader 的解析功能。"""
|
"""测试 MarkItDown PPTX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pptx import markitdown
|
from readers.pptx import markitdown
|
||||||
|
|
||||||
|
|
||||||
class TestMarkitdownPptxReaderParse:
|
class TestMarkitdownPptxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Native XML PPTX Reader 的解析功能。"""
|
"""测试 Native XML PPTX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pptx import native_xml
|
from readers.pptx import native_xml
|
||||||
|
|
||||||
|
|
||||||
class TestNativeXmlPptxReaderParse:
|
class TestNativeXmlPptxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.pptx import PptxReader
|
from readers.pptx import PptxReader
|
||||||
|
|
||||||
|
|
||||||
class TestPythonPptxReaderParse:
|
class TestPythonPptxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Unstructured PPTX Reader 的解析功能。"""
|
"""测试 Unstructured PPTX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.pptx import unstructured
|
from readers.pptx import unstructured
|
||||||
|
|
||||||
|
|
||||||
class TestUnstructuredPptxReaderParse:
|
class TestUnstructuredPptxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import zipfile
|
import zipfile
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers._utils import (
|
from readers._utils import (
|
||||||
parse_via_markitdown,
|
parse_via_markitdown,
|
||||||
parse_via_docling,
|
parse_via_docling,
|
||||||
build_markdown_table,
|
build_markdown_table,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试所有 XLSX Readers 的一致性。"""
|
"""测试所有 XLSX Readers 的一致性。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.xlsx import (
|
from readers.xlsx import (
|
||||||
docling,
|
docling,
|
||||||
markitdown,
|
markitdown,
|
||||||
native_xml,
|
native_xml,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Docling XLSX Reader 的解析功能。"""
|
"""测试 Docling XLSX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.xlsx import docling
|
from readers.xlsx import docling
|
||||||
|
|
||||||
|
|
||||||
class TestDoclingXlsxReaderParse:
|
class TestDoclingXlsxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 MarkItDown XLSX Reader 的解析功能。"""
|
"""测试 MarkItDown XLSX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.xlsx import markitdown
|
from readers.xlsx import markitdown
|
||||||
|
|
||||||
|
|
||||||
class TestMarkitdownXlsxReaderParse:
|
class TestMarkitdownXlsxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Native XML XLSX Reader 的解析功能。"""
|
"""测试 Native XML XLSX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.xlsx import native_xml
|
from readers.xlsx import native_xml
|
||||||
|
|
||||||
|
|
||||||
class TestNativeXmlXlsxReaderParse:
|
class TestNativeXmlXlsxReaderParse:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
from scripts.readers.xlsx import XlsxReader
|
from readers.xlsx import XlsxReader
|
||||||
|
|
||||||
|
|
||||||
class TestPandasXlsxReaderParse:
|
class TestPandasXlsxReaderParse:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
"""测试 Unstructured XLSX Reader 的解析功能。"""
|
"""测试 Unstructured XLSX Reader 的解析功能。"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from scripts.readers.xlsx import unstructured
|
from readers.xlsx import unstructured
|
||||||
|
|
||||||
|
|
||||||
class TestUnstructuredXlsxReaderParse:
|
class TestUnstructuredXlsxReaderParse:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""测试文件检测工具函数。"""
|
"""测试文件检测工具函数。"""
|
||||||
|
|
||||||
from scripts.utils import is_url, is_html_file
|
from utils import is_url, is_html_file
|
||||||
|
|
||||||
|
|
||||||
class TestIsUrl:
|
class TestIsUrl:
|
||||||
|
|||||||
Reference in New Issue
Block a user