修复bug
This commit is contained in:
@@ -6,10 +6,10 @@ import os
|
||||
import sys
|
||||
|
||||
import common
|
||||
import docx
|
||||
import pdf
|
||||
import pptx
|
||||
import xlsx
|
||||
import docx_parser
|
||||
import pdf_parser
|
||||
import pptx_parser
|
||||
import xlsx_parser
|
||||
|
||||
|
||||
def main() -> None:
|
||||
@@ -64,27 +64,27 @@ def main() -> None:
|
||||
|
||||
if file_type == "docx":
|
||||
parsers = [
|
||||
("MarkItDown", docx.parse_docx_with_markitdown),
|
||||
("python-docx", docx.parse_docx_with_python_docx),
|
||||
("XML 原生解析", docx.parse_docx_with_xml),
|
||||
("MarkItDown", docx_parser.parse_docx_with_markitdown),
|
||||
("python-docx", docx_parser.parse_docx_with_python_docx),
|
||||
("XML 原生解析", docx_parser.parse_docx_with_xml),
|
||||
]
|
||||
elif file_type == "pptx":
|
||||
parsers = [
|
||||
("MarkItDown", pptx.parse_pptx_with_markitdown),
|
||||
("python-pptx", pptx.parse_pptx_with_python_pptx),
|
||||
("XML 原生解析", pptx.parse_pptx_with_xml),
|
||||
("MarkItDown", pptx_parser.parse_pptx_with_markitdown),
|
||||
("python-pptx", pptx_parser.parse_pptx_with_python_pptx),
|
||||
("XML 原生解析", pptx_parser.parse_pptx_with_xml),
|
||||
]
|
||||
elif file_type == "xlsx":
|
||||
parsers = [
|
||||
("MarkItDown", xlsx.parse_xlsx_with_markitdown),
|
||||
("pandas", xlsx.parse_xlsx_with_pandas),
|
||||
("XML 原生解析", xlsx.parse_xlsx_with_xml),
|
||||
("MarkItDown", xlsx_parser.parse_xlsx_with_markitdown),
|
||||
("pandas", xlsx_parser.parse_xlsx_with_pandas),
|
||||
("XML 原生解析", xlsx_parser.parse_xlsx_with_xml),
|
||||
]
|
||||
else:
|
||||
parsers = [
|
||||
("MarkItDown", pdf.parse_pdf_with_markitdown),
|
||||
("unstructured", pdf.parse_pdf_with_unstructured),
|
||||
("pypdf", pdf.parse_pdf_with_pypdf),
|
||||
("MarkItDown", pdf_parser.parse_pdf_with_markitdown),
|
||||
("unstructured", pdf_parser.parse_pdf_with_unstructured),
|
||||
("pypdf", pdf_parser.parse_pdf_with_pypdf),
|
||||
]
|
||||
|
||||
failures = []
|
||||
|
||||
Reference in New Issue
Block a user