增加pdf文件的读取
This commit is contained in:
@@ -114,6 +114,16 @@ def is_valid_xlsx(file_path: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def is_valid_pdf(file_path: str) -> bool:
|
||||
"""验证文件是否为有效的 PDF 格式"""
|
||||
try:
|
||||
with open(file_path, "rb") as f:
|
||||
header = f.read(4)
|
||||
return header == b"%PDF"
|
||||
except (IOError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
def remove_markdown_images(markdown_text: str) -> str:
|
||||
"""移除 Markdown 文本中的图片标记"""
|
||||
return IMAGE_PATTERN.sub("", markdown_text)
|
||||
@@ -286,7 +296,7 @@ def search_markdown(
|
||||
|
||||
|
||||
def detect_file_type(file_path: str) -> Optional[str]:
|
||||
"""检测文件类型,返回 'docx'、'pptx' 或 'xlsx'"""
|
||||
"""检测文件类型,返回 'docx'、'pptx'、'xlsx' 或 'pdf'"""
|
||||
_, ext = os.path.splitext(file_path)
|
||||
ext = ext.lower()
|
||||
|
||||
@@ -299,5 +309,8 @@ def detect_file_type(file_path: str) -> Optional[str]:
|
||||
elif ext == ".xlsx":
|
||||
if is_valid_xlsx(file_path):
|
||||
return "xlsx"
|
||||
elif ext == ".pdf":
|
||||
if is_valid_pdf(file_path):
|
||||
return "pdf"
|
||||
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user