将变量替换和 expect 简写展开统一放入 Normalized 阶段, 运行时 AJV 使用 Normalized schema,导出 schema 面向 Authoring Config。 主要变更: - 新增 normalizer.ts 实现 normalizeAuthoringConfig() - 拆分 Authoring/Normalized 双 schema,checker 接口支持 authoring/normalized 片段 - config-loader 流程:normalize → Normalized AJV → semantic → resolve - validator 兼容层自动分派 raw/normalized expect 形态 - 删除 rawExpect,store.expect 列写入 null - Authoring schema 对 integer/boolean/enum 字段接受变量引用 - 修复 DB/HTTP validate 入口守卫和 LLM options integer 变量引用 - 优化 compact() 避免 undefined 覆盖隐患 - 移除 content.ts 恒为 true 的前置条件 - 同步 5 个主规范并归档 change
323 lines
13 KiB
TypeScript
323 lines
13 KiB
TypeScript
import { DOMParser } from "@xmldom/xmldom";
|
|
import { isBoolean, isNumber, isPlainObject, isString } from "es-toolkit";
|
|
import * as xpath from "xpath";
|
|
|
|
import type { ConfigValidationIssue } from "../schema/issues";
|
|
import type { JsonValue } from "../types";
|
|
|
|
import { issue, joinPath } from "../schema/issues";
|
|
import { CONTENT_EXTRACTOR_KEY_SET, MATCHER_KEY_SET } from "./keys";
|
|
import { isUnsafeRegex } from "./redos";
|
|
import { isValueMatcherPrimitive } from "./value";
|
|
|
|
export function isJsonValue(value: unknown): value is JsonValue {
|
|
if (value === null) return true;
|
|
if (isString(value) || isBoolean(value)) return true;
|
|
if (isNumber(value)) return Number.isFinite(value);
|
|
if (Array.isArray(value)) return value.every(isJsonValue);
|
|
if (isPlainObject(value)) return Object.values(value).every(isJsonValue);
|
|
return false;
|
|
}
|
|
|
|
export function isPlainRecord(value: unknown): value is Record<string, unknown> {
|
|
return isPlainObject(value);
|
|
}
|
|
|
|
export function validateJsonPath(path: string, expectationPath: string, targetName?: string): ConfigValidationIssue[] {
|
|
if (!path.startsWith("$.") || path.length <= 2) {
|
|
return [
|
|
issue("invalid-jsonpath", joinPath(expectationPath, "path"), '必须为以 "$." 开头的有效 JSONPath', targetName),
|
|
];
|
|
}
|
|
const issues: ConfigValidationIssue[] = [];
|
|
const segments = path.slice(2).split(".");
|
|
for (const seg of segments) {
|
|
if (seg === "") {
|
|
issues.push(issue("invalid-jsonpath", joinPath(expectationPath, "path"), "包含空段", targetName));
|
|
}
|
|
const bracketMatch = /^(.+?)\[(\d+)\]$/.exec(seg);
|
|
if (bracketMatch?.[1]!.trim() === "") {
|
|
issues.push(issue("invalid-jsonpath", joinPath(expectationPath, "path"), "数组访问缺少属性名", targetName));
|
|
}
|
|
}
|
|
return issues;
|
|
}
|
|
|
|
export function validateRawContentExpectations(
|
|
expectations: unknown,
|
|
path: string,
|
|
targetName?: string,
|
|
): ConfigValidationIssue[] {
|
|
if (!Array.isArray(expectations)) return [issue("invalid-type", path, "必须为数组", targetName)];
|
|
return expectations.flatMap((entry, index) => validateRawContentExpectation(entry, `${path}[${index}]`, targetName));
|
|
}
|
|
|
|
export function validateRawKeyedExpectations(
|
|
value: unknown,
|
|
path: string,
|
|
targetName?: string,
|
|
options?: { caseInsensitive?: boolean },
|
|
): ConfigValidationIssue[] {
|
|
if (Array.isArray(value)) return validateNormalizedKeyedExpectations(value, path, targetName, options);
|
|
if (!isPlainRecord(value)) return [issue("invalid-type", path, "必须为对象", targetName)];
|
|
|
|
const issues: ConfigValidationIssue[] = [];
|
|
if (options?.caseInsensitive) {
|
|
const seen = new Map<string, string>();
|
|
for (const key of Object.keys(value)) {
|
|
const lower = key.toLowerCase();
|
|
const prev = seen.get(lower);
|
|
if (prev !== undefined) {
|
|
issues.push(issue("duplicate-key", joinPath(path, key), `与 "${prev}" 大小写归一化后重复`, targetName));
|
|
} else {
|
|
seen.set(lower, key);
|
|
}
|
|
}
|
|
}
|
|
for (const [key, item] of Object.entries(value)) {
|
|
const itemPath = joinPath(path, key);
|
|
issues.push(...validateRawValueExpectation(item, itemPath, targetName));
|
|
}
|
|
return issues;
|
|
}
|
|
|
|
export function validateRawValueExpectation(
|
|
matcher: unknown,
|
|
path: string,
|
|
targetName?: string,
|
|
options: { requireAtLeastOne?: boolean } = {},
|
|
): ConfigValidationIssue[] {
|
|
const requireAtLeastOne = options.requireAtLeastOne ?? true;
|
|
if (isValueMatcherPrimitive(matcher)) return [];
|
|
if (Array.isArray(matcher)) {
|
|
return [
|
|
issue(
|
|
"invalid-type",
|
|
path,
|
|
"必须为 primitive 原始值或 matcher 对象;如需数组 equals 匹配应写成 {equals: [...]}",
|
|
targetName,
|
|
),
|
|
];
|
|
}
|
|
if (!isPlainRecord(matcher))
|
|
return [issue("invalid-type", path, "必须为 primitive 原始值或 matcher 对象", targetName)];
|
|
|
|
const issues: ConfigValidationIssue[] = [];
|
|
let found = 0;
|
|
for (const [key, value] of Object.entries(matcher)) {
|
|
if (!MATCHER_KEY_SET.has(key)) {
|
|
issues.push(issue("unknown-matcher", joinPath(path, key), "是未知 matcher", targetName));
|
|
continue;
|
|
}
|
|
if (value === undefined) continue;
|
|
found++;
|
|
issues.push(...validateMatcherValue(key, value, joinPath(path, key), targetName));
|
|
}
|
|
|
|
if (requireAtLeastOne && found === 0) {
|
|
issues.push(issue("empty-matcher", path, "必须包含至少一个合法 matcher", targetName));
|
|
}
|
|
|
|
if (matcher["exists"] === false && found > 1) {
|
|
issues.push(issue("invalid-value", joinPath(path, "exists"), "exists:false 不能与其他 matcher 组合", targetName));
|
|
}
|
|
|
|
return issues;
|
|
}
|
|
|
|
function validateCssExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
|
|
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
|
|
const issues: ConfigValidationIssue[] = [];
|
|
|
|
if (!isString(expectation["selector"]) || expectation["selector"].trim() === "") {
|
|
issues.push(issue("invalid-type", joinPath(path, "selector"), "必须为非空字符串", targetName));
|
|
}
|
|
if ("attr" in expectation && !isString(expectation["attr"])) {
|
|
issues.push(issue("invalid-type", joinPath(path, "attr"), "必须为字符串", targetName));
|
|
}
|
|
issues.push(...validateExtractorMatcher(expectation, new Set(["attr", "selector"]), path, targetName));
|
|
return issues;
|
|
}
|
|
|
|
function validateExtractorMatcher(
|
|
expectation: Record<string, unknown>,
|
|
allowedFields: Set<string>,
|
|
path: string,
|
|
targetName?: string,
|
|
): ConfigValidationIssue[] {
|
|
const matcher: Record<string, unknown> = {};
|
|
const issues: ConfigValidationIssue[] = [];
|
|
for (const [key, value] of Object.entries(expectation)) {
|
|
if (allowedFields.has(key)) continue;
|
|
matcher[key] = value;
|
|
}
|
|
issues.push(...validateRawValueExpectation(matcher, path, targetName, { requireAtLeastOne: false }));
|
|
return issues;
|
|
}
|
|
|
|
function validateJsonExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
|
|
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
|
|
const issues: ConfigValidationIssue[] = [];
|
|
|
|
if (!isString(expectation["path"])) {
|
|
issues.push(issue("invalid-type", joinPath(path, "path"), "必须为字符串", targetName));
|
|
} else {
|
|
issues.push(...validateJsonPath(expectation["path"], path, targetName));
|
|
}
|
|
issues.push(...validateExtractorMatcher(expectation, new Set(["path"]), path, targetName));
|
|
return issues;
|
|
}
|
|
|
|
function validateMatcherValue(key: string, value: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
|
|
switch (key) {
|
|
case "contains":
|
|
return isString(value) ? [] : [issue("invalid-type", path, "必须为字符串", targetName)];
|
|
case "empty":
|
|
case "exists":
|
|
return isBoolean(value) ? [] : [issue("invalid-type", path, "必须为布尔值", targetName)];
|
|
case "equals":
|
|
return isJsonValue(value) ? [] : [issue("invalid-type", path, "必须为 JSON value", targetName)];
|
|
case "gt":
|
|
case "gte":
|
|
case "lt":
|
|
case "lte":
|
|
return isNumber(value) && Number.isFinite(value)
|
|
? []
|
|
: [issue("invalid-type", path, "必须为有限数字", targetName)];
|
|
case "regex":
|
|
if (!isString(value)) return [issue("invalid-type", path, "必须为字符串", targetName)];
|
|
try {
|
|
new RegExp(value);
|
|
} catch {
|
|
return [issue("invalid-regex", path, "正则不合法", targetName)];
|
|
}
|
|
return isUnsafeRegex(value) ? [issue("unsafe-regex", path, "正则存在 ReDoS 风险", targetName)] : [];
|
|
default:
|
|
return [issue("unknown-matcher", path, "是未知 matcher", targetName)];
|
|
}
|
|
}
|
|
|
|
function validateNormalizedContentExpectation(
|
|
expectation: Record<string, unknown>,
|
|
path: string,
|
|
targetName?: string,
|
|
): ConfigValidationIssue[] {
|
|
const kind = expectation["kind"];
|
|
const matcherPath = joinPath(path, "matcher");
|
|
const issues = validateRawValueExpectation(expectation["matcher"], matcherPath, targetName);
|
|
switch (kind) {
|
|
case "css":
|
|
if (!isString(expectation["selector"]) || expectation["selector"].trim() === "") {
|
|
issues.push(issue("invalid-type", joinPath(path, "selector"), "必须为非空字符串", targetName));
|
|
}
|
|
if ("attr" in expectation && !isString(expectation["attr"])) {
|
|
issues.push(issue("invalid-type", joinPath(path, "attr"), "必须为字符串", targetName));
|
|
}
|
|
return issues;
|
|
case "json":
|
|
return isString(expectation["path"])
|
|
? [...issues, ...validateJsonPath(expectation["path"], path, targetName)]
|
|
: [...issues, issue("invalid-type", joinPath(path, "path"), "必须为字符串", targetName)];
|
|
case "value":
|
|
return issues;
|
|
case "xpath":
|
|
return isString(expectation["path"])
|
|
? [...issues, ...validateXpathExpectation({ path: expectation["path"] }, path, targetName)]
|
|
: [...issues, issue("invalid-type", joinPath(path, "path"), "必须为非空字符串", targetName)];
|
|
default:
|
|
return [...issues, issue("invalid-type", joinPath(path, "kind"), "必须为 value、json、css 或 xpath", targetName)];
|
|
}
|
|
}
|
|
|
|
function validateNormalizedKeyedExpectations(
|
|
value: unknown[],
|
|
path: string,
|
|
targetName?: string,
|
|
options?: { caseInsensitive?: boolean },
|
|
): ConfigValidationIssue[] {
|
|
const issues: ConfigValidationIssue[] = [];
|
|
const seen = new Map<string, string>();
|
|
for (let i = 0; i < value.length; i++) {
|
|
const itemPath = `${path}[${i}]`;
|
|
const item = value[i];
|
|
if (!isPlainRecord(item)) {
|
|
issues.push(issue("invalid-type", itemPath, "必须为对象", targetName));
|
|
continue;
|
|
}
|
|
if (!isString(item["key"])) {
|
|
issues.push(issue("invalid-type", joinPath(itemPath, "key"), "必须为字符串", targetName));
|
|
} else if (options?.caseInsensitive) {
|
|
const normalized = item["key"].toLowerCase();
|
|
const prev = seen.get(normalized);
|
|
if (prev !== undefined) {
|
|
issues.push(issue("duplicate-key", joinPath(itemPath, "key"), `与 "${prev}" 大小写归一化后重复`, targetName));
|
|
} else {
|
|
seen.set(normalized, item["key"]);
|
|
}
|
|
}
|
|
issues.push(...validateRawValueExpectation(item["matcher"], joinPath(itemPath, "matcher"), targetName));
|
|
}
|
|
return issues;
|
|
}
|
|
|
|
function validateRawContentExpectation(
|
|
expectation: unknown,
|
|
path: string,
|
|
targetName?: string,
|
|
): ConfigValidationIssue[] {
|
|
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
|
|
if (isString(expectation["kind"])) return validateNormalizedContentExpectation(expectation, path, targetName);
|
|
|
|
const issues: ConfigValidationIssue[] = [];
|
|
const extractors = Object.keys(expectation).filter((key) => CONTENT_EXTRACTOR_KEY_SET.has(key));
|
|
const directMatchers = Object.keys(expectation).filter((key) => MATCHER_KEY_SET.has(key));
|
|
|
|
for (const key of Object.keys(expectation)) {
|
|
if (!MATCHER_KEY_SET.has(key) && !CONTENT_EXTRACTOR_KEY_SET.has(key)) {
|
|
issues.push(issue("unknown-field", joinPath(path, key), "是未知字段", targetName));
|
|
}
|
|
}
|
|
|
|
if (extractors.length > 1) {
|
|
issues.push(
|
|
issue("multiple-content-expectations", path, "一条 expectation 不能同时包含多个 extractor", targetName),
|
|
);
|
|
}
|
|
if (extractors.length === 1 && directMatchers.length > 0) {
|
|
issues.push(issue("invalid-content-expectation", path, "直接 matcher 不能与 extractor 混用", targetName));
|
|
}
|
|
if (issues.length > 0) return issues;
|
|
|
|
if (extractors.length === 0) return validateRawValueExpectation(expectation, path, targetName);
|
|
|
|
const extractor = extractors[0]!;
|
|
switch (extractor) {
|
|
case "css":
|
|
return validateCssExpectation(expectation["css"], joinPath(path, "css"), targetName);
|
|
case "json":
|
|
return validateJsonExpectation(expectation["json"], joinPath(path, "json"), targetName);
|
|
case "xpath":
|
|
return validateXpathExpectation(expectation["xpath"], joinPath(path, "xpath"), targetName);
|
|
}
|
|
|
|
return [];
|
|
}
|
|
|
|
function validateXpathExpectation(expectation: unknown, path: string, targetName?: string): ConfigValidationIssue[] {
|
|
if (!isPlainRecord(expectation)) return [issue("invalid-type", path, "必须为对象", targetName)];
|
|
const issues: ConfigValidationIssue[] = [];
|
|
|
|
if (!isString(expectation["path"]) || expectation["path"].trim() === "") {
|
|
issues.push(issue("invalid-type", joinPath(path, "path"), "必须为非空字符串", targetName));
|
|
} else {
|
|
try {
|
|
const doc = new DOMParser().parseFromString("<x/>", "text/xml");
|
|
xpath.select(expectation["path"], doc as unknown as Node);
|
|
} catch {
|
|
issues.push(issue("invalid-xpath", joinPath(path, "path"), "xpath 不合法", targetName));
|
|
}
|
|
}
|
|
issues.push(...validateExtractorMatcher(expectation, new Set(["path"]), path, targetName));
|
|
return issues;
|
|
}
|