1
0

优化路径

This commit is contained in:
2026-02-25 18:28:45 +08:00
parent 1362972630
commit d296f97a71
6 changed files with 29 additions and 29 deletions

View File

@@ -4,46 +4,46 @@
```bash
# DOCX
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py /path/to/report.docx
uv run --with "markitdown[docx]" scripts/parser.py /path/to/report.docx
# PPTX
uv run --with "markitdown[pptx]" skills/lyxy-reader-office/scripts/parser.py /path/to/slides.pptx
uv run --with "markitdown[pptx]" scripts/parser.py /path/to/slides.pptx
# XLSX
uv run --with "markitdown[xlsx]" skills/lyxy-reader-office/scripts/parser.py /path/to/data.xlsx
uv run --with "markitdown[xlsx]" scripts/parser.py /path/to/data.xlsx
# PDF
uv run --with "markitdown[pdf]" --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/doc.pdf
uv run --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/doc.pdf
```
## 获取文档字数
```bash
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -c /path/to/report.docx
uv run --with "markitdown[docx]" scripts/parser.py -c /path/to/report.docx
```
## 提取所有标题
```bash
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -t /path/to/report.docx
uv run --with "markitdown[docx]" scripts/parser.py -t /path/to/report.docx
```
## 提取指定章节
```bash
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -tc "第一章" /path/to/report.docx
uv run --with "markitdown[docx]" scripts/parser.py -tc "第一章" /path/to/report.docx
```
## 搜索关键词
```bash
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -s "关键词" -n 3 /path/to/report.docx
uv run --with "markitdown[docx]" scripts/parser.py -s "关键词" -n 3 /path/to/report.docx
```
## PDF OCR 高精度解析
```bash
uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/scanned.pdf --high-res
uv run --with docling --with pypdf scripts/parser.py /path/to/scanned.pdf --high-res
```
## 降级到直接 Python 执行
@@ -51,5 +51,5 @@ uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /
仅当 lyxy-runner-python skill 不存在时使用:
```bash
python3 skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
python3 scripts/parser.py /path/to/file.docx
```

View File

@@ -11,23 +11,23 @@
### 使用 uv推荐
```bash
# DOCX - 推荐依赖
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
# DOCX - 依赖
uv run --with docling --with "unstructured[docx]" --with markdownify --with pypandoc-binary --with "markitdown[docx]" --with python-docx scripts/parser.py /path/to/file.docx
# PPTX - 推荐依赖
uv run --with "markitdown[pptx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.pptx
# PPTX - 依赖
uv run --with docling --with "unstructured[pptx]" --with markdownify --with "markitdown[pptx]" --with python-pptx scripts/parser.py /path/to/file.pptx
# XLSX - 推荐依赖
uv run --with "markitdown[xlsx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.xlsx
# XLSX - 依赖
uv run --with docling --with "unstructured[xlsx]" --with markdownify --with "markitdown[xlsx]" --with pandas --with tabulate scripts/parser.py /path/to/file.xlsx
# PDF - 推荐依赖
uv run --with "markitdown[pdf]" --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/file.pdf
# PDF - 全依赖(基础文本提取)
uv run --with docling --with "unstructured[pdf]" --with markdownify --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/file.pdf
# PDF OCR 高精度模式
uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/file.pdf --high-res
# PDF OCR 高精度模式(全依赖)
uv run --with docling --with "unstructured[pdf]" --with unstructured-paddleocr --with "paddlepaddle==2.6.2" --with ml-dtypes --with markdownify --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/file.pdf --high-res
```
> **注意**:以上为最小推荐依赖,更多解析器依赖和完整安装命令请查阅 `scripts/README.md` 的安装部分
> **说明**:以上为全依赖安装命令,包含所有解析器以获得最佳兼容性。详细的解析器优先级和对比请查阅 `scripts/README.md`。
## 各格式输出特点