diff --git a/tests/agent/e2e-llm-judge.test.ts b/tests/agent/e2e-llm-judge.test.ts new file mode 100644 index 0000000..67cb252 --- /dev/null +++ b/tests/agent/e2e-llm-judge.test.ts @@ -0,0 +1,56 @@ +import { describe, it, expect, beforeEach, afterEach } from "bun:test"; +import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts"; +import { + setupTempDir, + cleanupTempDir, + getTempDir, + createFreshProject, + writeDoc, +} from "./fixtures.ts"; + +const tier3Available = isLLMAvailable(); + +if (!tier3Available) { + console.log("RUNE_E2E_LLM_ 环境变量未配置,Tier 3 测试已跳过"); +} + +describe("e2e: Tier 3", () => { + const runner = new LLMJudgeRunner(); + const testFn = tier3Available ? it : it.skip; + + beforeEach(async () => { + await setupTempDir(); + }); + + afterEach(async () => { + await cleanupTempDir(); + }); + + testFn( + "plan: 单文档输出有效行动计划", + async () => { + const config = await createFreshProject(); + const result = await runner.runPlan(getTempDir(), "user-auth", "design", config); + + expect(result.rawPlan).toBeDefined(); + expect(result.rawPlan).toHaveProperty("actions"); + const plan = result.rawPlan as { actions: unknown[] }; + expect(plan.actions.length).toBeGreaterThan(0); + }, + 120_000, + ); + + testFn( + "build: 单任务输出有效行动计划", + async () => { + const config = await createFreshProject(); + await writeDoc("auth", "task", "- [ ] 实现登录 API\n"); + + const result = await runner.runBuild(getTempDir(), "auth", config); + + expect(result.rawPlan).toBeDefined(); + expect(result.rawPlan).toHaveProperty("actions"); + }, + 120_000, + ); +}); diff --git a/tests/agent/tier3-llm-judge.ts b/tests/agent/tier3-llm-judge.ts new file mode 100644 index 0000000..71d3b1c --- /dev/null +++ b/tests/agent/tier3-llm-judge.ts @@ -0,0 +1,268 @@ +import type { RuneConfig } from "../../src/types.ts"; +import type { AgentRunner, AgentResult } from "./runner.ts"; +import { + assemblePlanPrompt, + assembleBuildPrompt, + assembleArchivePrompt, +} from "../../src/core/assembler.ts"; + +export interface LLMAction { + type: "write_file" | "check_task" | "done"; + path?: string; + content?: string; + taskIndex?: number; +} + +export interface LLMPlan { + actions: LLMAction[]; +} + +export function isLLMAvailable(): boolean { + return !!process.env.RUNE_E2E_LLM_API_KEY; +} + +export function getLLMEnv() { + return { + provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai", + model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini", + apiKey: process.env.RUNE_E2E_LLM_API_KEY || "", + baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1", + }; +} + +async function callLLM(prompt: string): Promise { + const { provider, model, apiKey, baseUrl } = getLLMEnv(); + + if (provider === "openai" || provider === "openrouter") { + const response = await fetch(`${baseUrl}/chat/completions`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${apiKey}`, + }, + body: JSON.stringify({ + model, + messages: [ + { + role: "system", + content: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。 + +请严格按以下 JSON 格式输出行动计划(不要包含其他内容): +{ + "actions": [ + { "type": "write_file", "path": "相对路径", "content": "文件内容" }, + { "type": "check_task", "taskIndex": 0 } + ] +} + +可用的 action 类型: +- write_file: 写入文件,path 和 content 必填 +- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引 +- done: 表示所有操作已完成 + +根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`, + }, + { + role: "user", + content: prompt, + }, + ], + temperature: undefined, + }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`LLM API error ${response.status}: ${body}`); + } + + const data = await response.json(); + const text = data.choices?.[0]?.message?.content || ""; + return parseLLMResponse(text); + } + + if (provider === "anthropic") { + const response = await fetch(`https://api.anthropic.com/v1/messages`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + }, + body: JSON.stringify({ + model, + max_tokens: 4096, + system: `你是一个自动化构建工具,负责根据提示词生成精确的文件操作计划。 + +请严格按以下 JSON 格式输出行动计划(不要包含其他内容): +{ + "actions": [ + { "type": "write_file", "path": "相对路径", "content": "文件内容" }, + { "type": "check_task", "taskIndex": 0 } + ] +} + +可用的 action 类型: +- write_file: 写入文件,path 和 content 必填 +- check_task: 标记任务为已完成,taskIndex 是任务列表中从 0 开始的索引 +- done: 表示所有操作已完成 + +根据提示词要求,生成完整的操作计划。不要跳过任何步骤。`, + messages: [{ role: "user", content: prompt }], + temperature: undefined, + }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`LLM API error ${response.status}: ${body}`); + } + + const data = await response.json(); + const text = data.content?.[0]?.text || ""; + return parseLLMResponse(text); + } + + throw new Error(`不支持的 LLM provider: ${provider}`); +} + +function parseLLMResponse(text: string): LLMPlan { + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`); + } + + try { + const plan: LLMPlan = JSON.parse(jsonMatch[0]); + if (!plan.actions || !Array.isArray(plan.actions)) { + throw new Error("LLM 输出缺少 actions 数组"); + } + return plan; + } catch (e) { + throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e }); + } +} + +import { mkdir, writeFile, readFile, rename } from "node:fs/promises"; +import { join } from "node:path"; +import { getChangeDir, getArchiveDir } from "../../src/core/config.ts"; +import { parseTasks } from "../../src/core/task-parser.ts"; + +async function executeActions(projectDir: string, plan: LLMPlan): Promise { + const files: string[] = []; + + for (const action of plan.actions) { + switch (action.type) { + case "write_file": { + if (!action.path || action.content === undefined) { + throw new Error("write_file action 缺少 path 或 content"); + } + const fullPath = join(projectDir, action.path); + await mkdir(join(fullPath, ".."), { recursive: true }); + await writeFile(fullPath, action.content); + files.push(action.path); + break; + } + case "check_task": { + if (action.taskIndex === undefined) { + throw new Error("check_task action 缺少 taskIndex"); + } + break; + } + case "done": + break; + } + } + + return files; +} + +function extractTaskCheckActions(plan: LLMPlan): number[] { + return plan.actions + .filter((a) => a.type === "check_task" && a.taskIndex !== undefined) + .map((a) => a.taskIndex!); +} + +export class LLMJudgeRunner implements AgentRunner { + readonly tier = 3; + + async runPlan( + projectDir: string, + changeName: string, + docName: string, + config: RuneConfig, + ): Promise { + if (!isLLMAvailable()) { + throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置"); + } + + const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName); + const plan = await callLLM(prompt); + + return { + files: [], + rawPlan: plan, + }; + } + + async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise { + if (!isLLMAvailable()) { + throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置"); + } + + const changeDir = getChangeDir(projectDir, changeName); + const prompt = await assembleBuildPrompt(config, projectDir, changeName); + const plan = await callLLM(prompt); + + const files = await executeActions(projectDir, plan); + + const taskIndices = extractTaskCheckActions(plan); + if (taskIndices.length > 0) { + const taskPath = join(changeDir, "task.md"); + let taskContent = await readFile(taskPath, "utf-8"); + const tasks = parseTasks(taskContent); + + for (const index of taskIndices) { + if (index < tasks.length) { + const task = tasks[index]; + const oldLine = `- [ ] ${task.text}`; + const newLine = `- [x] ${task.text}`; + taskContent = taskContent.replace(oldLine, newLine); + } + } + + await writeFile(taskPath, taskContent); + files.push("task.md"); + } + + return { + files: [...new Set(files)], + rawPlan: plan, + }; + } + + async runArchive( + projectDir: string, + changeName: string, + config: RuneConfig, + ): Promise { + if (!isLLMAvailable()) { + throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置"); + } + + const prompt = await assembleArchivePrompt(config, projectDir, changeName); + const changeDir = getChangeDir(projectDir, changeName); + const plan = await callLLM(prompt); + + const today = new Date().toISOString().slice(0, 10); + const archiveDir = getArchiveDir(projectDir); + await mkdir(archiveDir, { recursive: true }); + const dest = join(archiveDir, `${today}-${changeName}`); + await rename(changeDir, dest); + + return { + files: [], + rawPlan: plan, + }; + } +}