优化路径
This commit is contained in:
@@ -55,10 +55,10 @@ compatibility: Requires Python 3.6+. DOCX/PPTX/XLSX 无需额外依赖(XML 原
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 使用 lyxy-runner-python(推荐)
|
# 使用 lyxy-runner-python(推荐)
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
|
uv run --with "markitdown[docx]" scripts/parser.py /path/to/file.docx
|
||||||
|
|
||||||
# 降级到直接执行
|
# 降级到直接执行
|
||||||
python3 skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
|
python3 scripts/parser.py /path/to/file.docx
|
||||||
```
|
```
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|||||||
@@ -4,46 +4,46 @@
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# DOCX
|
# DOCX
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py /path/to/report.docx
|
uv run --with "markitdown[docx]" scripts/parser.py /path/to/report.docx
|
||||||
|
|
||||||
# PPTX
|
# PPTX
|
||||||
uv run --with "markitdown[pptx]" skills/lyxy-reader-office/scripts/parser.py /path/to/slides.pptx
|
uv run --with "markitdown[pptx]" scripts/parser.py /path/to/slides.pptx
|
||||||
|
|
||||||
# XLSX
|
# XLSX
|
||||||
uv run --with "markitdown[xlsx]" skills/lyxy-reader-office/scripts/parser.py /path/to/data.xlsx
|
uv run --with "markitdown[xlsx]" scripts/parser.py /path/to/data.xlsx
|
||||||
|
|
||||||
# PDF
|
# PDF
|
||||||
uv run --with "markitdown[pdf]" --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/doc.pdf
|
uv run --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/doc.pdf
|
||||||
```
|
```
|
||||||
|
|
||||||
## 获取文档字数
|
## 获取文档字数
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -c /path/to/report.docx
|
uv run --with "markitdown[docx]" scripts/parser.py -c /path/to/report.docx
|
||||||
```
|
```
|
||||||
|
|
||||||
## 提取所有标题
|
## 提取所有标题
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -t /path/to/report.docx
|
uv run --with "markitdown[docx]" scripts/parser.py -t /path/to/report.docx
|
||||||
```
|
```
|
||||||
|
|
||||||
## 提取指定章节
|
## 提取指定章节
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -tc "第一章" /path/to/report.docx
|
uv run --with "markitdown[docx]" scripts/parser.py -tc "第一章" /path/to/report.docx
|
||||||
```
|
```
|
||||||
|
|
||||||
## 搜索关键词
|
## 搜索关键词
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py -s "关键词" -n 3 /path/to/report.docx
|
uv run --with "markitdown[docx]" scripts/parser.py -s "关键词" -n 3 /path/to/report.docx
|
||||||
```
|
```
|
||||||
|
|
||||||
## PDF OCR 高精度解析
|
## PDF OCR 高精度解析
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/scanned.pdf --high-res
|
uv run --with docling --with pypdf scripts/parser.py /path/to/scanned.pdf --high-res
|
||||||
```
|
```
|
||||||
|
|
||||||
## 降级到直接 Python 执行
|
## 降级到直接 Python 执行
|
||||||
@@ -51,5 +51,5 @@ uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /
|
|||||||
仅当 lyxy-runner-python skill 不存在时使用:
|
仅当 lyxy-runner-python skill 不存在时使用:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
|
python3 scripts/parser.py /path/to/file.docx
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -11,23 +11,23 @@
|
|||||||
### 使用 uv(推荐)
|
### 使用 uv(推荐)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# DOCX - 推荐依赖
|
# DOCX - 全依赖
|
||||||
uv run --with "markitdown[docx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.docx
|
uv run --with docling --with "unstructured[docx]" --with markdownify --with pypandoc-binary --with "markitdown[docx]" --with python-docx scripts/parser.py /path/to/file.docx
|
||||||
|
|
||||||
# PPTX - 推荐依赖
|
# PPTX - 全依赖
|
||||||
uv run --with "markitdown[pptx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.pptx
|
uv run --with docling --with "unstructured[pptx]" --with markdownify --with "markitdown[pptx]" --with python-pptx scripts/parser.py /path/to/file.pptx
|
||||||
|
|
||||||
# XLSX - 推荐依赖
|
# XLSX - 全依赖
|
||||||
uv run --with "markitdown[xlsx]" skills/lyxy-reader-office/scripts/parser.py /path/to/file.xlsx
|
uv run --with docling --with "unstructured[xlsx]" --with markdownify --with "markitdown[xlsx]" --with pandas --with tabulate scripts/parser.py /path/to/file.xlsx
|
||||||
|
|
||||||
# PDF - 推荐依赖
|
# PDF - 全依赖(基础文本提取)
|
||||||
uv run --with "markitdown[pdf]" --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/file.pdf
|
uv run --with docling --with "unstructured[pdf]" --with markdownify --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/file.pdf
|
||||||
|
|
||||||
# PDF OCR 高精度模式
|
# PDF OCR 高精度模式(全依赖)
|
||||||
uv run --with docling --with pypdf skills/lyxy-reader-office/scripts/parser.py /path/to/file.pdf --high-res
|
uv run --with docling --with "unstructured[pdf]" --with unstructured-paddleocr --with "paddlepaddle==2.6.2" --with ml-dtypes --with markdownify --with "markitdown[pdf]" --with pypdf scripts/parser.py /path/to/file.pdf --high-res
|
||||||
```
|
```
|
||||||
|
|
||||||
> **注意**:以上为最小推荐依赖,更多解析器依赖和完整安装命令请查阅 `scripts/README.md` 的安装部分。
|
> **说明**:以上为全依赖安装命令,包含所有解析器以获得最佳兼容性。详细的解析器优先级和对比请查阅 `scripts/README.md`。
|
||||||
|
|
||||||
## 各格式输出特点
|
## 各格式输出特点
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ Bun 特性:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 生成临时文件路径
|
# 生成临时文件路径
|
||||||
TEMP_FILE=$(bun skills/lyxy-runner-js/scripts/get_temp_path.js js)
|
TEMP_FILE=$(bun scripts/get_temp_path.js js)
|
||||||
|
|
||||||
# 写入脚本内容
|
# 写入脚本内容
|
||||||
cat <<EOF > "$TEMP_FILE"
|
cat <<EOF > "$TEMP_FILE"
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ process.exit(0) // 成功
|
|||||||
**CLI 使用方式:**
|
**CLI 使用方式:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
bun skills/lyxy-runner-js/scripts/get_temp_path.js <extension>
|
bun scripts/get_temp_path.js <extension>
|
||||||
```
|
```
|
||||||
|
|
||||||
**参数:**
|
**参数:**
|
||||||
@@ -75,9 +75,9 @@ bun skills/lyxy-runner-js/scripts/get_temp_path.js <extension>
|
|||||||
**示例:**
|
**示例:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ bun skills/lyxy-runner-js/scripts/get_temp_path.js js
|
$ bun scripts/get_temp_path.js js
|
||||||
/var/folders/8m/0hm18pdd7ts2bwp0530drz500000gn/T/lyxy-runner-js-1770257905333-na6ujx.js
|
/var/folders/8m/0hm18pdd7ts2bwp0530drz500000gn/T/lyxy-runner-js-1770257905333-na6ujx.js
|
||||||
|
|
||||||
$ bun skills/lyxy-runner-js/scripts/get_temp_path.js ts
|
$ bun scripts/get_temp_path.js ts
|
||||||
/var/folders/8m/0hm18pdd7ts2bwp0530drz500000gn/T/lyxy-runner-js-1770257905333-v8yzt.ts
|
/var/folders/8m/0hm18pdd7ts2bwp0530drz500000gn/T/lyxy-runner-js-1770257905333-v8yzt.ts
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ bun ./scripts/new-script.js
|
|||||||
bun --version
|
bun --version
|
||||||
|
|
||||||
# 步骤 2: 生成临时文件路径
|
# 步骤 2: 生成临时文件路径
|
||||||
TEMP_FILE=$(bun skills/lyxy-runner-js/scripts/get_temp_path.js js)
|
TEMP_FILE=$(bun scripts/get_temp_path.js js)
|
||||||
|
|
||||||
# 步骤 3: 将脚本内容写入临时文件
|
# 步骤 3: 将脚本内容写入临时文件
|
||||||
cat <<EOF > "$TEMP_FILE"
|
cat <<EOF > "$TEMP_FILE"
|
||||||
@@ -67,7 +67,7 @@ bun "$TEMP_FILE"
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# 生成 TypeScript 临时文件
|
# 生成 TypeScript 临时文件
|
||||||
TEMP_TS=$(bun skills/lyxy-runner-js/scripts/get_temp_path.js ts)
|
TEMP_TS=$(bun scripts/get_temp_path.js ts)
|
||||||
|
|
||||||
# 写入 TypeScript 脚本
|
# 写入 TypeScript 脚本
|
||||||
cat <<EOF > "$TEMP_TS"
|
cat <<EOF > "$TEMP_TS"
|
||||||
|
|||||||
Reference in New Issue
Block a user