feat: 第三期 — Tier 3 LLM-as-Judge 集成

This commit is contained in:
2026-06-09 16:11:48 +08:00
parent bb7d5e740c
commit 073b9c1e47
2 changed files with 324 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
import { describe, it, expect, beforeEach, afterEach } from "bun:test";
import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts";
import {
setupTempDir,
cleanupTempDir,
getTempDir,
createFreshProject,
writeDoc,
} from "./fixtures.ts";
const tier3Available = isLLMAvailable();
if (!tier3Available) {
console.log("RUNE_E2E_LLM_ 环境变量未配置Tier 3 测试已跳过");
}
describe("e2e: Tier 3", () => {
const runner = new LLMJudgeRunner();
const testFn = tier3Available ? it : it.skip;
beforeEach(async () => {
await setupTempDir();
});
afterEach(async () => {
await cleanupTempDir();
});
testFn(
"plan: 单文档输出有效行动计划",
async () => {
const config = await createFreshProject();
const result = await runner.runPlan(getTempDir(), "user-auth", "design", config);
expect(result.rawPlan).toBeDefined();
expect(result.rawPlan).toHaveProperty("actions");
const plan = result.rawPlan as { actions: unknown[] };
expect(plan.actions.length).toBeGreaterThan(0);
},
120_000,
);
testFn(
"build: 单任务输出有效行动计划",
async () => {
const config = await createFreshProject();
await writeDoc("auth", "task", "- [ ] 实现登录 API\n");
const result = await runner.runBuild(getTempDir(), "auth", config);
expect(result.rawPlan).toBeDefined();
expect(result.rawPlan).toHaveProperty("actions");
},
120_000,
);
});

View File

@@ -0,0 +1,268 @@
import type { RuneConfig } from "../../src/types.ts";
import type { AgentRunner, AgentResult } from "./runner.ts";
import {
assemblePlanPrompt,
assembleBuildPrompt,
assembleArchivePrompt,
} from "../../src/core/assembler.ts";
export interface LLMAction {
type: "write_file" | "check_task" | "done";
path?: string;
content?: string;
taskIndex?: number;
}
export interface LLMPlan {
actions: LLMAction[];
}
export function isLLMAvailable(): boolean {
return !!process.env.RUNE_E2E_LLM_API_KEY;
}
export function getLLMEnv() {
return {
provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai",
model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini",
apiKey: process.env.RUNE_E2E_LLM_API_KEY || "",
baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1",
};
}
async function callLLM(prompt: string): Promise<LLMPlan> {
const { provider, model, apiKey, baseUrl } = getLLMEnv();
if (provider === "openai" || provider === "openrouter") {
const response = await fetch(`${baseUrl}/chat/completions`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify({
model,
messages: [
{
role: "system",
content: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
{
"actions": [
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
{ "type": "check_task", "taskIndex": 0 }
]
}
可用的 action 类型:
- write_file: 写入文件path 和 content 必填
- check_task: 标记任务为已完成taskIndex 是任务列表中从 0 开始的索引
- done: 表示所有操作已完成
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
},
{
role: "user",
content: prompt,
},
],
temperature: undefined,
}),
});
if (!response.ok) {
const body = await response.text();
throw new Error(`LLM API error ${response.status}: ${body}`);
}
const data = await response.json();
const text = data.choices?.[0]?.message?.content || "";
return parseLLMResponse(text);
}
if (provider === "anthropic") {
const response = await fetch(`https://api.anthropic.com/v1/messages`, {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-api-key": apiKey,
"anthropic-version": "2023-06-01",
},
body: JSON.stringify({
model,
max_tokens: 4096,
system: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。
请严格按以下 JSON 格式输出行动计划(不要包含其他内容):
{
"actions": [
{ "type": "write_file", "path": "相对路径", "content": "文件内容" },
{ "type": "check_task", "taskIndex": 0 }
]
}
可用的 action 类型:
- write_file: 写入文件path 和 content 必填
- check_task: 标记任务为已完成taskIndex 是任务列表中从 0 开始的索引
- done: 表示所有操作已完成
根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`,
messages: [{ role: "user", content: prompt }],
temperature: undefined,
}),
});
if (!response.ok) {
const body = await response.text();
throw new Error(`LLM API error ${response.status}: ${body}`);
}
const data = await response.json();
const text = data.content?.[0]?.text || "";
return parseLLMResponse(text);
}
throw new Error(`不支持的 LLM provider: ${provider}`);
}
function parseLLMResponse(text: string): LLMPlan {
const jsonMatch = text.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`);
}
try {
const plan: LLMPlan = JSON.parse(jsonMatch[0]);
if (!plan.actions || !Array.isArray(plan.actions)) {
throw new Error("LLM 输出缺少 actions 数组");
}
return plan;
} catch (e) {
throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e });
}
}
import { mkdir, writeFile, readFile, rename } from "node:fs/promises";
import { join } from "node:path";
import { getChangeDir, getArchiveDir } from "../../src/core/config.ts";
import { parseTasks } from "../../src/core/task-parser.ts";
async function executeActions(projectDir: string, plan: LLMPlan): Promise<string[]> {
const files: string[] = [];
for (const action of plan.actions) {
switch (action.type) {
case "write_file": {
if (!action.path || action.content === undefined) {
throw new Error("write_file action 缺少 path 或 content");
}
const fullPath = join(projectDir, action.path);
await mkdir(join(fullPath, ".."), { recursive: true });
await writeFile(fullPath, action.content);
files.push(action.path);
break;
}
case "check_task": {
if (action.taskIndex === undefined) {
throw new Error("check_task action 缺少 taskIndex");
}
break;
}
case "done":
break;
}
}
return files;
}
function extractTaskCheckActions(plan: LLMPlan): number[] {
return plan.actions
.filter((a) => a.type === "check_task" && a.taskIndex !== undefined)
.map((a) => a.taskIndex!);
}
export class LLMJudgeRunner implements AgentRunner {
readonly tier = 3;
async runPlan(
projectDir: string,
changeName: string,
docName: string,
config: RuneConfig,
): Promise<AgentResult> {
if (!isLLMAvailable()) {
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
}
const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName);
const plan = await callLLM(prompt);
return {
files: [],
rawPlan: plan,
};
}
async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise<AgentResult> {
if (!isLLMAvailable()) {
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
}
const changeDir = getChangeDir(projectDir, changeName);
const prompt = await assembleBuildPrompt(config, projectDir, changeName);
const plan = await callLLM(prompt);
const files = await executeActions(projectDir, plan);
const taskIndices = extractTaskCheckActions(plan);
if (taskIndices.length > 0) {
const taskPath = join(changeDir, "task.md");
let taskContent = await readFile(taskPath, "utf-8");
const tasks = parseTasks(taskContent);
for (const index of taskIndices) {
if (index < tasks.length) {
const task = tasks[index];
const oldLine = `- [ ] ${task.text}`;
const newLine = `- [x] ${task.text}`;
taskContent = taskContent.replace(oldLine, newLine);
}
}
await writeFile(taskPath, taskContent);
files.push("task.md");
}
return {
files: [...new Set(files)],
rawPlan: plan,
};
}
async runArchive(
projectDir: string,
changeName: string,
config: RuneConfig,
): Promise<AgentResult> {
if (!isLLMAvailable()) {
throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
}
const prompt = await assembleArchivePrompt(config, projectDir, changeName);
const changeDir = getChangeDir(projectDir, changeName);
const plan = await callLLM(prompt);
const today = new Date().toISOString().slice(0, 10);
const archiveDir = getArchiveDir(projectDir);
await mkdir(archiveDir, { recursive: true });
const dest = join(archiveDir, `${today}-${changeName}`);
await rename(changeDir, dest);
return {
files: [],
rawPlan: plan,
};
}
}