1
0

修复bug

This commit is contained in:
2026-02-15 19:53:31 +08:00
parent b022ac736b
commit f30ea08805
6 changed files with 139 additions and 97 deletions

View File

@@ -23,7 +23,7 @@ def build_markdown_table(rows_data: List[List[str]]) -> str:
row_text = [cell if cell else "" for cell in row_data]
md_lines.append("| " + " | ".join(row_text) + " |")
if i == 0:
md_lines.append("|" + " | ".join(["---"] * len(row_text)) + " |")
md_lines.append("| " + " | ".join(["---"] * len(row_text)) + " |")
return "\n".join(md_lines) + "\n\n"
@@ -39,16 +39,12 @@ def safe_open_zip(zip_file: zipfile.ZipFile, name: str) -> Optional[zipfile.ZipE
"""安全地从 ZipFile 中打开文件,防止路径遍历攻击"""
if not name:
return None
if name.startswith("/") or name.startswith("\\"):
return None
if name.startswith(".."):
if name.startswith("/") or name.startswith(".."):
return None
if "/../" in name or name.endswith("/.."):
return None
if "\\" in name:
return None
if "/" not in name:
return None
return zip_file.open(name)
@@ -75,11 +71,9 @@ def is_valid_docx(file_path: str) -> bool:
"""验证文件是否为有效的 DOCX 格式"""
try:
with zipfile.ZipFile(file_path, "r") as zip_file:
names = set(zip_file.namelist())
required_files = ["[Content_Types].xml", "_rels/.rels", "word/document.xml"]
for required in required_files:
if required not in zip_file.namelist():
return False
return True
return all(r in names for r in required_files)
except (zipfile.BadZipFile, zipfile.LargeZipFile):
return False
@@ -88,15 +82,13 @@ def is_valid_pptx(file_path: str) -> bool:
"""验证文件是否为有效的 PPTX 格式"""
try:
with zipfile.ZipFile(file_path, "r") as zip_file:
names = set(zip_file.namelist())
required_files = [
"[Content_Types].xml",
"_rels/.rels",
"ppt/presentation.xml",
]
for required in required_files:
if required not in zip_file.namelist():
return False
return True
return all(r in names for r in required_files)
except (zipfile.BadZipFile, zipfile.LargeZipFile):
return False
@@ -105,11 +97,9 @@ def is_valid_xlsx(file_path: str) -> bool:
"""验证文件是否为有效的 XLSX 格式"""
try:
with zipfile.ZipFile(file_path, "r") as zip_file:
names = set(zip_file.namelist())
required_files = ["[Content_Types].xml", "_rels/.rels", "xl/workbook.xml"]
for required in required_files:
if required not in zip_file.namelist():
return False
return True
return all(r in names for r in required_files)
except (zipfile.BadZipFile, zipfile.LargeZipFile):
return False
@@ -177,7 +167,13 @@ def get_heading_level(line: str) -> int:
level += 1
else:
break
return level if 1 <= level <= 6 else 0
if not (1 <= level <= 6):
return 0
if len(stripped) == level:
return level
if stripped[level] != " ":
return 0
return level
def extract_titles(markdown_text: str) -> List[str]:
@@ -206,7 +202,10 @@ def extract_title_content(markdown_text: str, title_name: str) -> Optional[str]:
return None
result_lines = []
for idx in match_indices:
for match_num, idx in enumerate(match_indices):
if match_num > 0:
result_lines.append("\n---\n")
target_level = get_heading_level(lines[idx])
parent_titles = []
@@ -288,7 +287,6 @@ def search_markdown(
line
for i, line in enumerate(lines)
if start_line_idx <= i <= end_line_idx
and (line.strip() or i in selected_indices)
]
results.append("\n".join(result_lines))