增加docling作为解析器
This commit is contained in:
@@ -64,6 +64,7 @@ def main() -> None:
|
||||
|
||||
if file_type == "docx":
|
||||
parsers = [
|
||||
("docling", docx_parser.parse_docx_with_docling),
|
||||
("pypandoc-binary", docx_parser.parse_docx_with_pypandoc),
|
||||
("MarkItDown", docx_parser.parse_docx_with_markitdown),
|
||||
("python-docx", docx_parser.parse_docx_with_python_docx),
|
||||
@@ -71,18 +72,21 @@ def main() -> None:
|
||||
]
|
||||
elif file_type == "pptx":
|
||||
parsers = [
|
||||
("docling", pptx_parser.parse_pptx_with_docling),
|
||||
("MarkItDown", pptx_parser.parse_pptx_with_markitdown),
|
||||
("python-pptx", pptx_parser.parse_pptx_with_python_pptx),
|
||||
("XML 原生解析", pptx_parser.parse_pptx_with_xml),
|
||||
]
|
||||
elif file_type == "xlsx":
|
||||
parsers = [
|
||||
("docling", xlsx_parser.parse_xlsx_with_docling),
|
||||
("MarkItDown", xlsx_parser.parse_xlsx_with_markitdown),
|
||||
("pandas", xlsx_parser.parse_xlsx_with_pandas),
|
||||
("XML 原生解析", xlsx_parser.parse_xlsx_with_xml),
|
||||
]
|
||||
else:
|
||||
parsers = [
|
||||
("docling", pdf_parser.parse_pdf_with_docling),
|
||||
("MarkItDown", pdf_parser.parse_pdf_with_markitdown),
|
||||
("unstructured", pdf_parser.parse_pdf_with_unstructured),
|
||||
("pypdf", pdf_parser.parse_pdf_with_pypdf),
|
||||
|
||||
Reference in New Issue
Block a user