import { DOMParser } from "@xmldom/xmldom"; import * as xpath from "xpath"; import type { ConfigValidationIssue } from "../../config-contract/issues"; import type { JsonValue } from "../../types"; import { BodyRuleTypeKeys, OperatorKeys } from "../../config-contract/fragments"; import { issue, joinPath } from "../../config-contract/issues"; const OPERATOR_KEY_SET = new Set(OperatorKeys); export function isJsonValue(value: unknown): value is JsonValue { if (value === null) return true; if (typeof value === "string" || typeof value === "boolean") return true; if (typeof value === "number") return Number.isFinite(value); if (Array.isArray(value)) return value.every(isJsonValue); if (typeof value === "object") { return Object.values(value as Record).every(isJsonValue); } return false; } export function validateBodyRules(body: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!Array.isArray(body)) return [issue("invalid-type", path, "必须为数组", targetName)]; return body.flatMap((rule, index) => validateSingleBodyRule(rule, `${path}[${index}]`, targetName)); } export function validateJsonPath(path: string, rulePath: string, targetName?: string): ConfigValidationIssue[] { if (!path.startsWith("$.") || path.length <= 2) { return [issue("invalid-jsonpath", joinPath(rulePath, "path"), '必须为以 "$." 开头的有效 JSONPath', targetName)]; } const issues: ConfigValidationIssue[] = []; const segments = path.slice(2).split("."); for (const seg of segments) { if (seg === "") { issues.push(issue("invalid-jsonpath", joinPath(rulePath, "path"), "包含空段", targetName)); } const bracketMatch = /^(.+?)\[(\d+)\]$/.exec(seg); if (bracketMatch?.[1]!.trim() === "") { issues.push(issue("invalid-jsonpath", joinPath(rulePath, "path"), "数组访问缺少属性名", targetName)); } } return issues; } export function validateOperatorObject( operators: unknown, path: string, targetName?: string, options: { requireAtLeastOne: boolean } = { requireAtLeastOne: true }, ): ConfigValidationIssue[] { if (!isPlainRecord(operators)) return [issue("invalid-type", path, "必须为操作符对象", targetName)]; const issues: ConfigValidationIssue[] = []; let found = 0; for (const [key, value] of Object.entries(operators)) { if (!OPERATOR_KEY_SET.has(key)) { issues.push(issue("unknown-operator", joinPath(path, key), "是未知 operator", targetName)); continue; } if (value === undefined) continue; found++; issues.push(...validateOperatorValue(key, value, joinPath(path, key), targetName)); } if (options.requireAtLeastOne && found === 0) { issues.push(issue("empty-operator", path, "必须包含至少一个合法 operator", targetName)); } return issues; } export function validateTextRules(rules: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!Array.isArray(rules)) return [issue("invalid-type", path, "必须为数组", targetName)]; return rules.flatMap((rule, index) => validateOperatorObject(rule, `${path}[${index}]`, targetName)); } function collectOperatorObject( object: Record, allowedKeys: Set, path: string, targetName?: string, ): { issues: ConfigValidationIssue[]; operators: Record } { const issues: ConfigValidationIssue[] = []; const operators: Record = {}; for (const [key, value] of Object.entries(object)) { if (allowedKeys.has(key)) continue; if (OPERATOR_KEY_SET.has(key)) { operators[key] = value; } else { issues.push(issue("unknown-field", joinPath(path, key), "是未知字段", targetName)); } } return { issues, operators }; } function isPlainRecord(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } function validateCssRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!isPlainRecord(rule)) return [issue("invalid-type", path, "必须为对象", targetName)]; const issues: ConfigValidationIssue[] = []; if (typeof rule["selector"] !== "string" || rule["selector"].trim() === "") { issues.push(issue("invalid-type", joinPath(path, "selector"), "必须为非空字符串", targetName)); } if ("attr" in rule && typeof rule["attr"] !== "string") { issues.push(issue("invalid-type", joinPath(path, "attr"), "必须为字符串", targetName)); } const result = collectOperatorObject(rule, new Set(["attr", "selector"]), path, targetName); issues.push( ...result.issues, ...validateOperatorObject(result.operators, path, targetName, { requireAtLeastOne: false }), ); return issues; } function validateJsonRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!isPlainRecord(rule)) return [issue("invalid-type", path, "必须为对象", targetName)]; const issues: ConfigValidationIssue[] = []; if (typeof rule["path"] !== "string") { issues.push(issue("invalid-type", joinPath(path, "path"), "必须为字符串", targetName)); } else { issues.push(...validateJsonPath(rule["path"], path, targetName)); } const result = collectOperatorObject(rule, new Set(["path"]), path, targetName); issues.push( ...result.issues, ...validateOperatorObject(result.operators, path, targetName, { requireAtLeastOne: false }), ); return issues; } function validateOperatorValue( key: string, value: unknown, path: string, targetName?: string, ): ConfigValidationIssue[] { switch (key) { case "contains": return typeof value === "string" ? [] : [issue("invalid-type", path, "必须为字符串", targetName)]; case "empty": case "exists": return typeof value === "boolean" ? [] : [issue("invalid-type", path, "必须为布尔值", targetName)]; case "equals": return isJsonValue(value) ? [] : [issue("invalid-type", path, "必须为 JSON value", targetName)]; case "gt": case "gte": case "lt": case "lte": return typeof value === "number" && Number.isFinite(value) ? [] : [issue("invalid-type", path, "必须为有限数字", targetName)]; case "match": if (typeof value !== "string") return [issue("invalid-type", path, "必须为字符串", targetName)]; try { new RegExp(value); return []; } catch { return [issue("invalid-regex", path, "正则不合法", targetName)]; } default: return [issue("unknown-operator", path, "是未知 operator", targetName)]; } } function validateRegexRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (typeof rule !== "string") return [issue("invalid-type", path, "必须为字符串", targetName)]; try { new RegExp(rule); return []; } catch { return [issue("invalid-regex", path, "正则不合法", targetName)]; } } function validateSingleBodyRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!isPlainRecord(rule)) return [issue("invalid-type", path, "必须为对象", targetName)]; const found = BodyRuleTypeKeys.filter((type) => type in rule); if (found.length === 0) return [issue("missing-body-rule", path, "缺少支持的规则类型", targetName)]; if (found.length > 1) return [issue("multiple-body-rules", path, "只能配置一种规则类型", targetName)]; const ruleType = found[0]!; const issues: ConfigValidationIssue[] = []; for (const key of Object.keys(rule)) { if (key !== ruleType) issues.push(issue("unknown-field", joinPath(path, key), "是未知字段", targetName)); } if (issues.length > 0) return issues; switch (ruleType) { case "contains": return typeof rule["contains"] === "string" ? [] : [issue("invalid-type", joinPath(path, "contains"), "必须为字符串", targetName)]; case "css": return validateCssRule(rule["css"], joinPath(path, "css"), targetName); case "json": return validateJsonRule(rule["json"], joinPath(path, "json"), targetName); case "regex": return validateRegexRule(rule["regex"], joinPath(path, "regex"), targetName); case "xpath": return validateXpathRule(rule["xpath"], joinPath(path, "xpath"), targetName); } } function validateXpathRule(rule: unknown, path: string, targetName?: string): ConfigValidationIssue[] { if (!isPlainRecord(rule)) return [issue("invalid-type", path, "必须为对象", targetName)]; const issues: ConfigValidationIssue[] = []; if (typeof rule["path"] !== "string" || rule["path"].trim() === "") { issues.push(issue("invalid-type", joinPath(path, "path"), "必须为非空字符串", targetName)); } else { try { const doc = new DOMParser().parseFromString("", "text/xml"); xpath.select(rule["path"], doc as unknown as Node); } catch { issues.push(issue("invalid-xpath", joinPath(path, "path"), "xpath 不合法", targetName)); } } const result = collectOperatorObject(rule, new Set(["path"]), path, targetName); issues.push( ...result.issues, ...validateOperatorObject(result.operators, path, targetName, { requireAtLeastOne: false }), ); return issues; }