feat: 第三期 — Tier 3 LLM-as-Judge 集成
This commit is contained in:
56
tests/agent/e2e-llm-judge.test.ts
Normal file
56
tests/agent/e2e-llm-judge.test.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
|
||||
import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts";
|
||||
import {
|
||||
setupTempDir,
|
||||
cleanupTempDir,
|
||||
getTempDir,
|
||||
createFreshProject,
|
||||
writeDoc,
|
||||
} from "./fixtures.ts";
|
||||
|
||||
const tier3Available = isLLMAvailable();
|
||||
|
||||
if (!tier3Available) {
|
||||
console.log("RUNE_E2E_LLM_ 环境变量未配置,Tier 3 测试已跳过");
|
||||
}
|
||||
|
||||
describe("e2e: Tier 3", () => {
|
||||
const runner = new LLMJudgeRunner();
|
||||
const testFn = tier3Available ? it : it.skip;
|
||||
|
||||
beforeEach(async () => {
|
||||
await setupTempDir();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await cleanupTempDir();
|
||||
});
|
||||
|
||||
testFn(
|
||||
"plan: 单文档输出有效行动计划",
|
||||
async () => {
|
||||
const config = await createFreshProject();
|
||||
const result = await runner.runPlan(getTempDir(), "user-auth", "design", config);
|
||||
|
||||
expect(result.rawPlan).toBeDefined();
|
||||
expect(result.rawPlan).toHaveProperty("actions");
|
||||
const plan = result.rawPlan as { actions: unknown[] };
|
||||
expect(plan.actions.length).toBeGreaterThan(0);
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
|
||||
testFn(
|
||||
"build: 单任务输出有效行动计划",
|
||||
async () => {
|
||||
const config = await createFreshProject();
|
||||
await writeDoc("auth", "task", "- [ ] 实现登录 API\n");
|
||||
|
||||
const result = await runner.runBuild(getTempDir(), "auth", config);
|
||||
|
||||
expect(result.rawPlan).toBeDefined();
|
||||
expect(result.rawPlan).toHaveProperty("actions");
|
||||
},
|
||||
120_000,
|
||||
);
|
||||
});
|
||||
268
tests/agent/tier3-llm-judge.ts
Normal file
268
tests/agent/tier3-llm-judge.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
import type { RuneConfig } from "../../src/types.ts";
|
||||
import type { AgentRunner, AgentResult } from "./runner.ts";
|
||||
import {
|
||||
assemblePlanPrompt,
|
||||
assembleBuildPrompt,
|
||||
assembleArchivePrompt,
|
||||
} from "../../src/core/assembler.ts";
|
||||
|
||||
export interface LLMAction {
|
||||
type: "write_file" | "check_task" | "done";
|
||||
path?: string;
|
||||
content?: string;
|
||||
taskIndex?: number;
|
||||
}
|
||||
|
||||
export interface LLMPlan {
|
||||
actions: LLMAction[];
|
||||
}
|
||||
|
||||
export function isLLMAvailable(): boolean {
|
||||
return !!process.env.RUNE_E2E_LLM_API_KEY;
|
||||
}
|
||||
|
||||
export function getLLMEnv() {
|
||||
return {
|
||||
provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai",
|
||||
model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini",
|
||||
apiKey: process.env.RUNE_E2E_LLM_API_KEY || "",
|
||||
baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1",
|
||||
};
|
||||
}
|
||||
|
||||
async function callLLM(prompt: string): Promise<LLMPlan> {
|
||||
const { provider, model, apiKey, baseUrl } = getLLMEnv();
|
||||
|
||||
if (provider === "openai" || provider === "openrouter") {
|
||||
const response = await fetch(`${baseUrl}/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
|
||||
|
||||
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
|
||||
{
|
||||
"actions": [
|
||||
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
|
||||
{ "type": "check_task", "taskIndex": 0 }
|
||||
]
|
||||
}
|
||||
|
||||
可用的 action 类型:
|
||||
- write_file: 写入文件,path 和 content 必填
|
||||
- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引
|
||||
- done: 表示所有操作已完成
|
||||
|
||||
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: prompt,
|
||||
},
|
||||
],
|
||||
temperature: undefined,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`LLM API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.choices?.[0]?.message?.content || "";
|
||||
return parseLLMResponse(text);
|
||||
}
|
||||
|
||||
if (provider === "anthropic") {
|
||||
const response = await fetch(`https://api.anthropic.com/v1/messages`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": apiKey,
|
||||
"anthropic-version": "2023-06-01",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
max_tokens: 4096,
|
||||
system: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
|
||||
|
||||
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
|
||||
{
|
||||
"actions": [
|
||||
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
|
||||
{ "type": "check_task", "taskIndex": 0 }
|
||||
]
|
||||
}
|
||||
|
||||
可用的 action 类型:
|
||||
- write_file: 写入文件,path 和 content 必填
|
||||
- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引
|
||||
- done: 表示所有操作已完成
|
||||
|
||||
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
temperature: undefined,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const body = await response.text();
|
||||
throw new Error(`LLM API error ${response.status}: ${body}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const text = data.content?.[0]?.text || "";
|
||||
return parseLLMResponse(text);
|
||||
}
|
||||
|
||||
throw new Error(`不支持的 LLM provider: ${provider}`);
|
||||
}
|
||||
|
||||
function parseLLMResponse(text: string): LLMPlan {
|
||||
const jsonMatch = text.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`);
|
||||
}
|
||||
|
||||
try {
|
||||
const plan: LLMPlan = JSON.parse(jsonMatch[0]);
|
||||
if (!plan.actions || !Array.isArray(plan.actions)) {
|
||||
throw new Error("LLM 输出缺少 actions 数组");
|
||||
}
|
||||
return plan;
|
||||
} catch (e) {
|
||||
throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e });
|
||||
}
|
||||
}
|
||||
|
||||
import { mkdir, writeFile, readFile, rename } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { getChangeDir, getArchiveDir } from "../../src/core/config.ts";
|
||||
import { parseTasks } from "../../src/core/task-parser.ts";
|
||||
|
||||
async function executeActions(projectDir: string, plan: LLMPlan): Promise<string[]> {
|
||||
const files: string[] = [];
|
||||
|
||||
for (const action of plan.actions) {
|
||||
switch (action.type) {
|
||||
case "write_file": {
|
||||
if (!action.path || action.content === undefined) {
|
||||
throw new Error("write_file action 缺少 path 或 content");
|
||||
}
|
||||
const fullPath = join(projectDir, action.path);
|
||||
await mkdir(join(fullPath, ".."), { recursive: true });
|
||||
await writeFile(fullPath, action.content);
|
||||
files.push(action.path);
|
||||
break;
|
||||
}
|
||||
case "check_task": {
|
||||
if (action.taskIndex === undefined) {
|
||||
throw new Error("check_task action 缺少 taskIndex");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "done":
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
function extractTaskCheckActions(plan: LLMPlan): number[] {
|
||||
return plan.actions
|
||||
.filter((a) => a.type === "check_task" && a.taskIndex !== undefined)
|
||||
.map((a) => a.taskIndex!);
|
||||
}
|
||||
|
||||
export class LLMJudgeRunner implements AgentRunner {
|
||||
readonly tier = 3;
|
||||
|
||||
async runPlan(
|
||||
projectDir: string,
|
||||
changeName: string,
|
||||
docName: string,
|
||||
config: RuneConfig,
|
||||
): Promise<AgentResult> {
|
||||
if (!isLLMAvailable()) {
|
||||
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||
}
|
||||
|
||||
const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName);
|
||||
const plan = await callLLM(prompt);
|
||||
|
||||
return {
|
||||
files: [],
|
||||
rawPlan: plan,
|
||||
};
|
||||
}
|
||||
|
||||
async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise<AgentResult> {
|
||||
if (!isLLMAvailable()) {
|
||||
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||
}
|
||||
|
||||
const changeDir = getChangeDir(projectDir, changeName);
|
||||
const prompt = await assembleBuildPrompt(config, projectDir, changeName);
|
||||
const plan = await callLLM(prompt);
|
||||
|
||||
const files = await executeActions(projectDir, plan);
|
||||
|
||||
const taskIndices = extractTaskCheckActions(plan);
|
||||
if (taskIndices.length > 0) {
|
||||
const taskPath = join(changeDir, "task.md");
|
||||
let taskContent = await readFile(taskPath, "utf-8");
|
||||
const tasks = parseTasks(taskContent);
|
||||
|
||||
for (const index of taskIndices) {
|
||||
if (index < tasks.length) {
|
||||
const task = tasks[index];
|
||||
const oldLine = `- [ ] ${task.text}`;
|
||||
const newLine = `- [x] ${task.text}`;
|
||||
taskContent = taskContent.replace(oldLine, newLine);
|
||||
}
|
||||
}
|
||||
|
||||
await writeFile(taskPath, taskContent);
|
||||
files.push("task.md");
|
||||
}
|
||||
|
||||
return {
|
||||
files: [...new Set(files)],
|
||||
rawPlan: plan,
|
||||
};
|
||||
}
|
||||
|
||||
async runArchive(
|
||||
projectDir: string,
|
||||
changeName: string,
|
||||
config: RuneConfig,
|
||||
): Promise<AgentResult> {
|
||||
if (!isLLMAvailable()) {
|
||||
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
|
||||
}
|
||||
|
||||
const prompt = await assembleArchivePrompt(config, projectDir, changeName);
|
||||
const changeDir = getChangeDir(projectDir, changeName);
|
||||
const plan = await callLLM(prompt);
|
||||
|
||||
const today = new Date().toISOString().slice(0, 10);
|
||||
const archiveDir = getArchiveDir(projectDir);
|
||||
await mkdir(archiveDir, { recursive: true });
|
||||
const dest = join(archiveDir, `${today}-${changeName}`);
|
||||
await rename(changeDir, dest);
|
||||
|
||||
return {
|
||||
files: [],
|
||||
rawPlan: plan,
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user