1
0
Files
DiAL/src/server/checker/expect/validate.ts
lanyuanxiaoyao cf847ccd7a feat: 重构配置生命周期为 Authoring/Normalized/Resolved 三层
将变量替换和 expect 简写展开统一放入 Normalized 阶段,
运行时 AJV 使用 Normalized schema,导出 schema 面向 Authoring Config。

主要变更:
- 新增 normalizer.ts 实现 normalizeAuthoringConfig()
- 拆分 Authoring/Normalized 双 schema,checker 接口支持 authoring/normalized 片段
- config-loader 流程:normalize → Normalized AJV → semantic → resolve
- validator 兼容层自动分派 raw/normalized expect 形态
- 删除 rawExpect,store.expect 列写入 null
- Authoring schema 对 integer/boolean/enum 字段接受变量引用
- 修复 DB/HTTP validate 入口守卫和 LLM options integer 变量引用
- 优化 compact() 避免 undefined 覆盖隐患
- 移除 content.ts 恒为 true 的前置条件
- 同步 5 个主规范并归档 change
2026-05-22 14:00:47 +08:00

323 lines
13 KiB
TypeScript

import { DOMParser } from "@xmldom/xmldom";
import { isBoolean, isNumber, isPlainObject, isString } from "es-toolkit";
import * as xpath from "xpath";
import type { ConfigValidationIssue } from "../schema/issues";
import type { JsonValue } from "../types";
import { issue, joinPath } from "../schema/issues";
import { CONTENT_EXTRACTOR_KEY_SET, MATCHER_KEY_SET } from "./keys";
import { isUnsafeRegex } from "./redos";
import { isValueMatcherPrimitive } from "./value";
export function isJsonValue(value: unknown): value is JsonValue {
if (value === null) return true;
if (isString(value) || isBoolean(value)) return true;
if (isNumber(value)) return Number.isFinite(value);
if (Array.isArray(value)) return value.every(isJsonValue);
if (isPlainObject(value)) return Object.values(value).every(isJsonValue);
return false;
}
export function isPlainRecord(value: unknown): value is Record<string, unknown> {
return isPlainObject(value);
}
export function validateJsonPath(path: string, expectationPath: string, targetName?: string): ConfigValidationIssue[] {
if (!path.startsWith("$.") || path.length <= 2) {
return [
issue("invalid-jsonpath", joinPath(expectationPath, "path"), '必须为以 "$." 开头的有效 JSONPath', targetName),
];
}
const issues: ConfigValidationIssue[] = [];
const segments = path.slice(2).split(".");
for (const seg of segments) {
if (seg === "") {
issues.push(issue("invalid-jsonpath", joinPath(expectationPath, "path"), "包含空段", targetName));
}
const bracketMatch = /^(.+?)\[(\d+)\]$/.exec(seg);
if (bracketMatch?.[1]!.trim() === "") {
issues.push(issue("invalid-jsonpath", joinPath(expectationPath, "path"), "数组访问缺少属性名", targetName));
}
}
return issues;
}
export function validateRawContentExpectations(
expectations: unknown,
path: string,
targetName?: string,
): ConfigValidationIssue[] {
if (!Array.isArray(expectations)) return [issue("invalid-type", path, "必须为数组", targetName)];
return expectations.flatMap((entry, index) => validateRawContentExpectation(entry, `${path}[${index}]`, targetName));
}
export function validateRawKeyedExpectations(
value: unknown,
path: string,
targetName?: string,
options?: { caseInsensitive?: boolean },
): ConfigValidationIssue[] {
if (Array.isArray(value)) return validateNormalizedKeyedExpectations(value, path, targetName, options);
if (!isPlainRecord(value)) return [issue("invalid-type", path, "必须为对象", targetName)];
const issues: ConfigValidationIssue[] = [];
if (options?.caseInsensitive) {
const seen = new Map<string, string>();
for (const key of Object.keys(value)) {
const lower = key.toLowerCase();
const prev = seen.get(lower);
if (prev !== undefined) {
issues.push(issue("duplicate-key", joinPath(path, key), `与 "${prev}" 大小写归一化后重复`, targetName));
} else {
seen.set(lower, key);
}
}
}
for (const [key, item] of Object.entries(value)) {
const itemPath = joinPath(path, key);
issues.push(...validateRawValueExpectation(item, itemPath, targetName));
}
return issues;
}
export function validateRawValueExpectation(
matcher: unknown,
path: string,
targetName?: string,
options: { requireAtLeastOne?: boolean } = {},
): ConfigValidationIssue[] {
const requireAtLeastOne = options.requireAtLeastOne ?? true;
if (isValueMatcherPrimitive(matcher)) return [];
if (Array.isArray(matcher)) {
return [
issue(
"invalid-type",
path,
"必须为 primitive 原始值或 matcher 对象;如需数组 equals 匹配应写成 {equals: [...]}",
targetName,
),
];
}
if (!isPlainRecord(matcher))
return [issue("invalid-type", path, "必须为 primitive 原始值或 matcher 对象", targetName)];
const issues: ConfigValidationIssue[] = [];
let found = 0;
for (const [key, value] of Object.entries(matcher)) {
if (!MATCHER_KEY_SET.has(key)) {
issues.push(issue("unknown-matcher", joinPath(path, key), "是未知 matcher", targetName));
continue;
}
if (value === undefined) continue;
found++;
issues.push(...validateMatcherValue(key, value, joinPath(path, key), targetName));
}
if (requireAtLeastOne && found === 0) {
issues.push(issue("empty-matcher", path, "必须包含至少一个合法 matcher", targetName));
}
if (matcher["exists"] === false && found > 1) {
issues.push(issue("invalid-value", joinPath(path, "exists"), "exists:false 不能与其他 matcher 组合", targetName));
}
return issues;
}
function validateCssExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
const issues: ConfigValidationIssue[] = [];
if (!isString(expectation["selector"]) || expectation["selector"].trim() === "") {
issues.push(issue("invalid-type", joinPath(path, "selector"), "必须为非空字符串", targetName));
}
if ("attr" in expectation && !isString(expectation["attr"])) {
issues.push(issue("invalid-type", joinPath(path, "attr"), "必须为字符串", targetName));
}
issues.push(...validateExtractorMatcher(expectation, new Set(["attr", "selector"]), path, targetName));
return issues;
}
function validateExtractorMatcher(
expectation: Record<string, unknown>,
allowedFields: Set<string>,
path: string,
targetName?: string,
): ConfigValidationIssue[] {
const matcher: Record<string, unknown> = {};
const issues: ConfigValidationIssue[] = [];
for (const [key, value] of Object.entries(expectation)) {
if (allowedFields.has(key)) continue;
matcher[key] = value;
}
issues.push(...validateRawValueExpectation(matcher, path, targetName, { requireAtLeastOne: false }));
return issues;
}
function validateJsonExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
const issues: ConfigValidationIssue[] = [];
if (!isString(expectation["path"])) {
issues.push(issue("invalid-type", joinPath(path, "path"), "必须为字符串", targetName));
} else {
issues.push(...validateJsonPath(expectation["path"], path, targetName));
}
issues.push(...validateExtractorMatcher(expectation, new Set(["path"]), path, targetName));
return issues;
}
function validateMatcherValue(key: string, value: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
switch (key) {
case "contains":
return isString(value) ? [] : [issue("invalid-type", path, "必须为字符串", targetName)];
case "empty":
case "exists":
return isBoolean(value) ? [] : [issue("invalid-type", path, "必须为布尔值", targetName)];
case "equals":
return isJsonValue(value) ? [] : [issue("invalid-type", path, "必须为 JSON value", targetName)];
case "gt":
case "gte":
case "lt":
case "lte":
return isNumber(value) && Number.isFinite(value)
? []
: [issue("invalid-type", path, "必须为有限数字", targetName)];
case "regex":
if (!isString(value)) return [issue("invalid-type", path, "必须为字符串", targetName)];
try {
new RegExp(value);
} catch {
return [issue("invalid-regex", path, "正则不合法", targetName)];
}
return isUnsafeRegex(value) ? [issue("unsafe-regex", path, "正则存在 ReDoS 风险", targetName)] : [];
default:
return [issue("unknown-matcher", path, "是未知 matcher", targetName)];
}
}
function validateNormalizedContentExpectation(
expectation: Record<string, unknown>,
path: string,
targetName?: string,
): ConfigValidationIssue[] {
const kind = expectation["kind"];
const matcherPath = joinPath(path, "matcher");
const issues = validateRawValueExpectation(expectation["matcher"], matcherPath, targetName);
switch (kind) {
case "css":
if (!isString(expectation["selector"]) || expectation["selector"].trim() === "") {
issues.push(issue("invalid-type", joinPath(path, "selector"), "必须为非空字符串", targetName));
}
if ("attr" in expectation && !isString(expectation["attr"])) {
issues.push(issue("invalid-type", joinPath(path, "attr"), "必须为字符串", targetName));
}
return issues;
case "json":
return isString(expectation["path"])
? [...issues, ...validateJsonPath(expectation["path"], path, targetName)]
: [...issues, issue("invalid-type", joinPath(path, "path"), "必须为字符串", targetName)];
case "value":
return issues;
case "xpath":
return isString(expectation["path"])
? [...issues, ...validateXpathExpectation({ path: expectation["path"] }, path, targetName)]
: [...issues, issue("invalid-type", joinPath(path, "path"), "必须为非空字符串", targetName)];
default:
return [...issues, issue("invalid-type", joinPath(path, "kind"), "必须为 value、json、css 或 xpath", targetName)];
}
}
function validateNormalizedKeyedExpectations(
value: unknown[],
path: string,
targetName?: string,
options?: { caseInsensitive?: boolean },
): ConfigValidationIssue[] {
const issues: ConfigValidationIssue[] = [];
const seen = new Map<string, string>();
for (let i = 0; i < value.length; i++) {
const itemPath = `${path}[${i}]`;
const item = value[i];
if (!isPlainRecord(item)) {
issues.push(issue("invalid-type", itemPath, "必须为对象", targetName));
continue;
}
if (!isString(item["key"])) {
issues.push(issue("invalid-type", joinPath(itemPath, "key"), "必须为字符串", targetName));
} else if (options?.caseInsensitive) {
const normalized = item["key"].toLowerCase();
const prev = seen.get(normalized);
if (prev !== undefined) {
issues.push(issue("duplicate-key", joinPath(itemPath, "key"), `与 "${prev}" 大小写归一化后重复`, targetName));
} else {
seen.set(normalized, item["key"]);
}
}
issues.push(...validateRawValueExpectation(item["matcher"], joinPath(itemPath, "matcher"), targetName));
}
return issues;
}
function validateRawContentExpectation(
expectation: unknown,
path: string,
targetName?: string,
): ConfigValidationIssue[] {
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
if (isString(expectation["kind"])) return validateNormalizedContentExpectation(expectation, path, targetName);
const issues: ConfigValidationIssue[] = [];
const extractors = Object.keys(expectation).filter((key) => CONTENT_EXTRACTOR_KEY_SET.has(key));
const directMatchers = Object.keys(expectation).filter((key) => MATCHER_KEY_SET.has(key));
for (const key of Object.keys(expectation)) {
if (!MATCHER_KEY_SET.has(key) && !CONTENT_EXTRACTOR_KEY_SET.has(key)) {
issues.push(issue("unknown-field", joinPath(path, key), "是未知字段", targetName));
}
}
if (extractors.length > 1) {
issues.push(
issue("multiple-content-expectations", path, "一条 expectation 不能同时包含多个 extractor", targetName),
);
}
if (extractors.length === 1 && directMatchers.length > 0) {
issues.push(issue("invalid-content-expectation", path, "直接 matcher 不能与 extractor 混用", targetName));
}
if (issues.length > 0) return issues;
if (extractors.length === 0) return validateRawValueExpectation(expectation, path, targetName);
const extractor = extractors[0]!;
switch (extractor) {
case "css":
return validateCssExpectation(expectation["css"], joinPath(path, "css"), targetName);
case "json":
return validateJsonExpectation(expectation["json"], joinPath(path, "json"), targetName);
case "xpath":
return validateXpathExpectation(expectation["xpath"], joinPath(path, "xpath"), targetName);
}
return [];
}
function validateXpathExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
const issues: ConfigValidationIssue[] = [];
if (!isString(expectation["path"]) || expectation["path"].trim() === "") {
issues.push(issue("invalid-type", joinPath(path, "path"), "必须为非空字符串", targetName));
} else {
try {
const doc = new DOMParser().parseFromString("<x/>", "text/xml");
xpath.select(expectation["path"], doc as unknown as Node);
} catch {
issues.push(issue("invalid-xpath", joinPath(path, "path"), "xpath 不合法", targetName));
}
}
issues.push(...validateExtractorMatcher(expectation, new Set(["path"]), path, targetName));
return issues;
}