feat: 第三期 — Tier 3 LLM-as-Judge 集成
This commit is contained in:
56
tests/agent/e2e-llm-judge.test.ts
Normal file
56
tests/agent/e2e-llm-judge.test.ts
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
|
||||||
|
import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts";
|
||||||
|
import {
|
||||||
|
setupTempDir,
|
||||||
|
cleanupTempDir,
|
||||||
|
getTempDir,
|
||||||
|
createFreshProject,
|
||||||
|
writeDoc,
|
||||||
|
} from "./fixtures.ts";
|
||||||
|
|
||||||
|
const tier3Available = isLLMAvailable();
|
||||||
|
|
||||||
|
if (!tier3Available) {
|
||||||
|
console.log("RUNE_E2E_LLM_ 环境变量未配置,Tier 3 测试已跳过");
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("e2e: Tier 3", () => {
|
||||||
|
const runner = new LLMJudgeRunner();
|
||||||
|
const testFn = tier3Available ? it : it.skip;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
await setupTempDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await cleanupTempDir();
|
||||||
|
});
|
||||||
|
|
||||||
|
testFn(
|
||||||
|
"plan: 单文档输出有效行动计划",
|
||||||
|
async () => {
|
||||||
|
const config = await createFreshProject();
|
||||||
|
const result = await runner.runPlan(getTempDir(), "user-auth", "design", config);
|
||||||
|
|
||||||
|
expect(result.rawPlan).toBeDefined();
|
||||||
|
expect(result.rawPlan).toHaveProperty("actions");
|
||||||
|
const plan = result.rawPlan as { actions: unknown[] };
|
||||||
|
expect(plan.actions.length).toBeGreaterThan(0);
|
||||||
|
},
|
||||||
|
120_000,
|
||||||
|
);
|
||||||
|
|
||||||
|
testFn(
|
||||||
|
"build: 单任务输出有效行动计划",
|
||||||
|
async () => {
|
||||||
|
const config = await createFreshProject();
|
||||||
|
await writeDoc("auth", "task", "- [ ] 实现登录 API\n");
|
||||||
|
|
||||||
|
const result = await runner.runBuild(getTempDir(), "auth", config);
|
||||||
|
|
||||||
|
expect(result.rawPlan).toBeDefined();
|
||||||
|
expect(result.rawPlan).toHaveProperty("actions");
|
||||||
|
},
|
||||||
|
120_000,
|
||||||
|
);
|
||||||
|
});
|
||||||
268
tests/agent/tier3-llm-judge.ts
Normal file
268
tests/agent/tier3-llm-judge.ts
Normal file
@@ -0,0 +1,268 @@
|
|||||||
|
import type { RuneConfig } from "../../src/types.ts";
|
||||||
|
import type { AgentRunner, AgentResult } from "./runner.ts";
|
||||||
|
import {
|
||||||
|
assemblePlanPrompt,
|
||||||
|
assembleBuildPrompt,
|
||||||
|
assembleArchivePrompt,
|
||||||
|
} from "../../src/core/assembler.ts";
|
||||||
|
|
||||||
|
export interface LLMAction {
|
||||||
|
type: "write_file" | "check_task" | "done";
|
||||||
|
path?: string;
|
||||||
|
content?: string;
|
||||||
|
taskIndex?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LLMPlan {
|
||||||
|
actions: LLMAction[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isLLMAvailable(): boolean {
|
||||||
|
return !!process.env.RUNE_E2E_LLM_API_KEY;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getLLMEnv() {
|
||||||
|
return {
|
||||||
|
provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai",
|
||||||
|
model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini",
|
||||||
|
apiKey: process.env.RUNE_E2E_LLM_API_KEY || "",
|
||||||
|
baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function callLLM(prompt: string): Promise<LLMPlan> {
|
||||||
|
const { provider, model, apiKey, baseUrl } = getLLMEnv();
|
||||||
|
|
||||||
|
if (provider === "openai" || provider === "openrouter") {
|
||||||
|
const response = await fetch(`${baseUrl}/chat/completions`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
|
||||||
|
|
||||||
|
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
|
||||||
|
{
|
||||||
|
"actions": [
|
||||||
|
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
|
||||||
|
{ "type": "check_task", "taskIndex": 0 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
可用的 action 类型:
|
||||||
|
- write_file: 写入文件,path 和 content 必填
|
||||||
|
- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引
|
||||||
|
- done: 表示所有操作已完成
|
||||||
|
|
||||||
|
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: prompt,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
temperature: undefined,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text();
|
||||||
|
throw new Error(`LLM API error ${response.status}: ${body}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const text = data.choices?.[0]?.message?.content || "";
|
||||||
|
return parseLLMResponse(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (provider === "anthropic") {
|
||||||
|
const response = await fetch(`https://api.anthropic.com/v1/messages`, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"x-api-key": apiKey,
|
||||||
|
"anthropic-version": "2023-06-01",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
max_tokens: 4096,
|
||||||
|
system: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
|
||||||
|
|
||||||
|
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
|
||||||
|
{
|
||||||
|
"actions": [
|
||||||
|
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
|
||||||
|
{ "type": "check_task", "taskIndex": 0 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
可用的 action 类型:
|
||||||
|
- write_file: 写入文件,path 和 content 必填
|
||||||
|
- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引
|
||||||
|
- done: 表示所有操作已完成
|
||||||
|
|
||||||
|
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
|
||||||
|
messages: [{ role: "user", content: prompt }],
|
||||||
|
temperature: undefined,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const body = await response.text();
|
||||||
|
throw new Error(`LLM API error ${response.status}: ${body}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const text = data.content?.[0]?.text || "";
|
||||||
|
return parseLLMResponse(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`不支持的 LLM provider: ${provider}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseLLMResponse(text: string): LLMPlan {
|
||||||
|
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||||
|
if (!jsonMatch) {
|
||||||
|
throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const plan: LLMPlan = JSON.parse(jsonMatch[0]);
|
||||||
|
if (!plan.actions || !Array.isArray(plan.actions)) {
|
||||||
|
throw new Error("LLM 输出缺少 actions 数组");
|
||||||
|
}
|
||||||
|
return plan;
|
||||||
|
} catch (e) {
|
||||||
|
throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
import { mkdir, writeFile, readFile, rename } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { getChangeDir, getArchiveDir } from "../../src/core/config.ts";
|
||||||
|
import { parseTasks } from "../../src/core/task-parser.ts";
|
||||||
|
|
||||||
|
async function executeActions(projectDir: string, plan: LLMPlan): Promise<string[]> {
|
||||||
|
const files: string[] = [];
|
||||||
|
|
||||||
|
for (const action of plan.actions) {
|
||||||
|
switch (action.type) {
|
||||||
|
case "write_file": {
|
||||||
|
if (!action.path || action.content === undefined) {
|
||||||
|
throw new Error("write_file action 缺少 path 或 content");
|
||||||
|
}
|
||||||
|
const fullPath = join(projectDir, action.path);
|
||||||
|
await mkdir(join(fullPath, ".."), { recursive: true });
|
||||||
|
await writeFile(fullPath, action.content);
|
||||||
|
files.push(action.path);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case "check_task": {
|
||||||
|
if (action.taskIndex === undefined) {
|
||||||
|
throw new Error("check_task action 缺少 taskIndex");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case "done":
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTaskCheckActions(plan: LLMPlan): number[] {
|
||||||
|
return plan.actions
|
||||||
|
.filter((a) => a.type === "check_task" && a.taskIndex !== undefined)
|
||||||
|
.map((a) => a.taskIndex!);
|
||||||
|
}
|
||||||
|
|
||||||
|
export class LLMJudgeRunner implements AgentRunner {
|
||||||
|
readonly tier = 3;
|
||||||
|
|
||||||
|
async runPlan(
|
||||||
|
projectDir: string,
|
||||||
|
changeName: string,
|
||||||
|
docName: string,
|
||||||
|
config: RuneConfig,
|
||||||
|
): Promise<AgentResult> {
|
||||||
|
if (!isLLMAvailable()) {
|
||||||
|
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||||
|
}
|
||||||
|
|
||||||
|
const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName);
|
||||||
|
const plan = await callLLM(prompt);
|
||||||
|
|
||||||
|
return {
|
||||||
|
files: [],
|
||||||
|
rawPlan: plan,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise<AgentResult> {
|
||||||
|
if (!isLLMAvailable()) {
|
||||||
|
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||||
|
}
|
||||||
|
|
||||||
|
const changeDir = getChangeDir(projectDir, changeName);
|
||||||
|
const prompt = await assembleBuildPrompt(config, projectDir, changeName);
|
||||||
|
const plan = await callLLM(prompt);
|
||||||
|
|
||||||
|
const files = await executeActions(projectDir, plan);
|
||||||
|
|
||||||
|
const taskIndices = extractTaskCheckActions(plan);
|
||||||
|
if (taskIndices.length > 0) {
|
||||||
|
const taskPath = join(changeDir, "task.md");
|
||||||
|
let taskContent = await readFile(taskPath, "utf-8");
|
||||||
|
const tasks = parseTasks(taskContent);
|
||||||
|
|
||||||
|
for (const index of taskIndices) {
|
||||||
|
if (index < tasks.length) {
|
||||||
|
const task = tasks[index];
|
||||||
|
const oldLine = `- [ ] ${task.text}`;
|
||||||
|
const newLine = `- [x] ${task.text}`;
|
||||||
|
taskContent = taskContent.replace(oldLine, newLine);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeFile(taskPath, taskContent);
|
||||||
|
files.push("task.md");
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
files: [...new Set(files)],
|
||||||
|
rawPlan: plan,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async runArchive(
|
||||||
|
projectDir: string,
|
||||||
|
changeName: string,
|
||||||
|
config: RuneConfig,
|
||||||
|
): Promise<AgentResult> {
|
||||||
|
if (!isLLMAvailable()) {
|
||||||
|
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||||
|
}
|
||||||
|
|
||||||
|
const prompt = await assembleArchivePrompt(config, projectDir, changeName);
|
||||||
|
const changeDir = getChangeDir(projectDir, changeName);
|
||||||
|
const plan = await callLLM(prompt);
|
||||||
|
|
||||||
|
const today = new Date().toISOString().slice(0, 10);
|
||||||
|
const archiveDir = getArchiveDir(projectDir);
|
||||||
|
await mkdir(archiveDir, { recursive: true });
|
||||||
|
const dest = join(archiveDir, `${today}-${changeName}`);
|
||||||
|
await rename(changeDir, dest);
|
||||||
|
|
||||||
|
return {
|
||||||
|
files: [],
|
||||||
|
rawPlan: plan,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user