""" PPTX 文件验证工具类 提供 Level 2 验证深度的 PPTX 文件检查功能: - 文件级别:存在、可打开 - 幻灯片级别:数量、尺寸 - 元素级别:类型、数量、内容、位置(容差 0.1 英寸) """ from pathlib import Path from typing import List, Dict, Any, Optional from pptx import Presentation from pptx.util import Inches from pptx.enum.shapes import MSO_SHAPE class PptxValidationError: """验证错误信息""" def __init__(self, level: str, message: str): self.level = level # 'ERROR' or 'WARNING' self.message = message def __repr__(self): return f"[{self.level}] {self.message}" class PptxFileValidator: """PPTX 文件验证器(Level 2 验证深度)""" # 位置容忍度(英寸) TOLERANCE = 0.1 # 幻灯片尺寸常量 SIZE_16_9 = (10.0, 5.625) SIZE_4_3 = (10.0, 7.5) def __init__(self): self.errors: List[PptxValidationError] = [] self.warnings: List[PptxValidationError] = [] def validate_file(self, pptx_path: Path) -> bool: """ 验证 PPTX 文件 Args: pptx_path: PPTX 文件路径 Returns: 验证是否通过 """ self.errors.clear() self.warnings.clear() # 1. 文件级别验证 if not self._validate_file_exists(pptx_path): return False # 2. 打开文件 try: prs = Presentation(str(pptx_path)) except Exception as e: self.errors.append(PptxValidationError("ERROR", f"无法打开 PPTX 文件: {e}")) return False return True def validate_slides_count(self, prs: Presentation, expected_count: int) -> bool: """验证幻灯片数量""" actual = len(prs.slides) if actual != expected_count: self.errors.append( PptxValidationError( "ERROR", f"幻灯片数量不匹配: 期望 {expected_count}, 实际 {actual}" ) ) return False return True def validate_slide_size( self, prs: Presentation, expected_size: str = "16:9" ) -> bool: """验证幻灯片尺寸""" expected = self.SIZE_16_9 if expected_size == "16:9" else self.SIZE_4_3 actual_width = prs.slide_width.inches actual_height = prs.slide_height.inches if abs(actual_width - expected[0]) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"幻灯片宽度不匹配: 期望 {expected[0]}, 实际 {actual_width}", ) ) return False if abs(actual_height - expected[1]) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"幻灯片高度不匹配: 期望 {expected[1]}, 实际 {actual_height}", ) ) return False return True def count_elements_by_type(self, slide) -> Dict[str, int]: """统计幻灯片中各类型元素的数量""" counts = { "text_box": 0, "picture": 0, "shape": 0, "table": 0, "group": 0, "other": 0, } for shape in slide.shapes: if hasattr(shape, "image"): counts["picture"] += 1 elif shape.shape_type in [ MSO_SHAPE.RECTANGLE, MSO_SHAPE.OVAL, MSO_SHAPE.ROUNDED_RECTANGLE, ]: counts["shape"] += 1 elif shape.has_table: counts["table"] += 1 elif shape.shape_type == MSO_SHAPE.GROUP: counts["group"] += 1 elif hasattr(shape, "has_text_frame") and shape.has_text_frame: counts["text_box"] += 1 else: counts["other"] += 1 return counts def validate_text_element( self, slide, index: int = 0, expected_content: Optional[str] = None, expected_font_size: Optional[int] = None, expected_color: Optional[tuple] = None, ) -> bool: """ 验证文本元素 Args: slide: 幻灯片对象 index: 文本框索引 expected_content: 期望的文本内容 expected_font_size: 期望的字体大小 expected_color: 期望的颜色 (R, G, B) Returns: 验证是否通过 """ # 通过检查是否有text_frame属性来判断是否是文本框 text_boxes = [s for s in slide.shapes if hasattr(s, "text_frame")] if index >= len(text_boxes): self.errors.append( PptxValidationError("ERROR", f"找不到索引 {index} 的文本框") ) return False textbox = text_boxes[index] text_frame = textbox.text_frame # 验证内容 if expected_content is not None: actual_content = text_frame.text if actual_content != expected_content: self.errors.append( PptxValidationError( "ERROR", f"文本内容不匹配: 期望 '{expected_content}', 实际 '{actual_content}'", ) ) return False # 验证字体大小 if expected_font_size is not None: actual_size = text_frame.paragraphs[0].font.size.pt if abs(actual_size - expected_font_size) > 1: self.errors.append( PptxValidationError( "ERROR", f"字体大小不匹配: 期望 {expected_font_size}pt, 实际 {actual_size}pt", ) ) return False # 验证颜色 if expected_color is not None: try: actual_rgb = text_frame.paragraphs[0].font.color.rgb actual_color = (actual_rgb[0], actual_rgb[1], actual_rgb[2]) if actual_color != expected_color: self.errors.append( PptxValidationError( "ERROR", f"字体颜色不匹配: 期望 RGB{expected_color}, 实际 RGB{actual_color}", ) ) return False except Exception: self.errors.append(PptxValidationError("WARNING", "无法获取字体颜色")) return True def validate_position( self, shape, expected_left: float, expected_top: float, expected_width: Optional[float] = None, expected_height: Optional[float] = None, ) -> bool: """ 验证元素位置和尺寸 Args: shape: 形状对象 expected_left: 期望的左边距(英寸) expected_top: 期望的上边距(英寸) expected_width: 期望的宽度(英寸) expected_height: 期望的高度(英寸) Returns: 验证是否通过 """ actual_left = shape.left.inches actual_top = shape.top.inches if abs(actual_left - expected_left) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"左边距不匹配: 期望 {expected_left}, 实际 {actual_left}" ) ) return False if abs(actual_top - expected_top) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"上边距不匹配: 期望 {expected_top}, 实际 {actual_top}" ) ) return False if expected_width is not None: actual_width = shape.width.inches if abs(actual_width - expected_width) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"宽度不匹配: 期望 {expected_width}, 实际 {actual_width}", ) ) return False if expected_height is not None: actual_height = shape.height.inches if abs(actual_height - expected_height) > self.TOLERANCE: self.errors.append( PptxValidationError( "ERROR", f"高度不匹配: 期望 {expected_height}, 实际 {actual_height}", ) ) return False return True def validate_background_color(self, slide, expected_rgb: tuple) -> bool: """ 验证幻灯片背景颜色 Args: slide: 幻灯片对象 expected_rgb: 期望的 RGB 颜色 (R, G, B) Returns: 验证是否通过 """ try: fill = slide.background.fill if fill.type == 1: # Solid fill actual_rgb = ( fill.fore_color.rgb[0], fill.fore_color.rgb[1], fill.fore_color.rgb[2], ) if actual_rgb != expected_rgb: self.errors.append( PptxValidationError( "ERROR", f"背景颜色不匹配: 期望 RGB{expected_rgb}, 实际 RGB{actual_rgb}", ) ) return False except Exception as e: self.errors.append(PptxValidationError("WARNING", f"无法获取背景颜色: {e}")) return True def _validate_file_exists(self, pptx_path: Path) -> bool: """验证文件存在且大小大于 0""" if not pptx_path.exists(): self.errors.append(PptxValidationError("ERROR", f"文件不存在: {pptx_path}")) return False if pptx_path.stat().st_size == 0: self.errors.append( PptxValidationError("ERROR", f"文件大小为 0: {pptx_path}") ) return False return True def has_errors(self) -> bool: """是否有错误""" return len(self.errors) > 0 def has_warnings(self) -> bool: """是否有警告""" return len(self.warnings) > 0 def get_errors(self) -> List[str]: """获取所有错误信息""" return [e.message for e in self.errors] def get_warnings(self) -> List[str]: """获取所有警告信息""" return [w.message for w in self.warnings] def clear(self): """清除所有错误和警告""" self.errors.clear() self.warnings.clear()