feat: 第三期 — Tier 3 LLM-as-Judge 集成

2026-06-09 16:11:48 +08:00
parent bb7d5e740c
commit 073b9c1e47
2 changed files with 324 additions and 0 deletions
--- a/tests/agent/e2e-llm-judge.test.ts
+++ b/tests/agent/e2e-llm-judge.test.ts
@@ -0,0 +1,56 @@
+import { describe, it, expect, beforeEach, afterEach } from "bun:test";
+import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts";
+import {
+  setupTempDir,
+  cleanupTempDir,
+  getTempDir,
+  createFreshProject,
+  writeDoc,
+} from "./fixtures.ts";
+
+const tier3Available = isLLMAvailable();
+
+if (!tier3Available) {
+  console.log("RUNE_E2E_LLM_ 环境变量未配置，Tier 3 测试已跳过");
+}
+
+describe("e2e: Tier 3", () => {
+  const runner = new LLMJudgeRunner();
+  const testFn = tier3Available ? it : it.skip;
+
+  beforeEach(async () => {
+    await setupTempDir();
+  });
+
+  afterEach(async () => {
+    await cleanupTempDir();
+  });
+
+  testFn(
+    "plan: 单文档输出有效行动计划",
+    async () => {
+      const config = await createFreshProject();
+      const result = await runner.runPlan(getTempDir(), "user-auth", "design", config);
+
+      expect(result.rawPlan).toBeDefined();
+      expect(result.rawPlan).toHaveProperty("actions");
+      const plan = result.rawPlan as { actions: unknown[] };
+      expect(plan.actions.length).toBeGreaterThan(0);
+    },
+    120_000,
+  );
+
+  testFn(
+    "build: 单任务输出有效行动计划",
+    async () => {
+      const config = await createFreshProject();
+      await writeDoc("auth", "task", "- [ ] 实现登录 API\n");
+
+      const result = await runner.runBuild(getTempDir(), "auth", config);
+
+      expect(result.rawPlan).toBeDefined();
+      expect(result.rawPlan).toHaveProperty("actions");
+    },
+    120_000,
+  );
+});
--- a/tests/agent/tier3-llm-judge.ts
+++ b/tests/agent/tier3-llm-judge.ts
@@ -0,0 +1,268 @@
+import type { RuneConfig } from "../../src/types.ts";
+import type { AgentRunner, AgentResult } from "./runner.ts";
+import {
+  assemblePlanPrompt,
+  assembleBuildPrompt,
+  assembleArchivePrompt,
+} from "../../src/core/assembler.ts";
+
+export interface LLMAction {
+  type: "write_file" | "check_task" | "done";
+  path?: string;
+  content?: string;
+  taskIndex?: number;
+}
+
+export interface LLMPlan {
+  actions: LLMAction[];
+}
+
+export function isLLMAvailable(): boolean {
+  return !!process.env.RUNE_E2E_LLM_API_KEY;
+}
+
+export function getLLMEnv() {
+  return {
+    provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai",
+    model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini",
+    apiKey: process.env.RUNE_E2E_LLM_API_KEY || "",
+    baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1",
+  };
+}
+
+async function callLLM(prompt: string): Promise<LLMPlan> {
+  const { provider, model, apiKey, baseUrl } = getLLMEnv();
+
+  if (provider === "openai" || provider === "openrouter") {
+    const response = await fetch(`${baseUrl}/chat/completions`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${apiKey}`,
+      },
+      body: JSON.stringify({
+        model,
+        messages: [
+          {
+            role: "system",
+            content: `你是一个自动化构建工具，负责根据提示词生成精确的文件操作计划。
+
+请严格按以下 JSON 格式输出行动计划（不要包含其他内容）：
+{
+  "actions": [
+    { "type": "write_file", "path": "相对路径", "content": "文件内容" },
+    { "type": "check_task", "taskIndex": 0 }
+  ]
+}
+
+可用的 action 类型：
+- write_file: 写入文件，path 和 content 必填
+- check_task: 标记任务为已完成，taskIndex 是任务列表中从 0 开始的索引
+- done: 表示所有操作已完成
+
+根据提示词要求，生成完整的操作计划。不要跳过任何步骤。`,
+          },
+          {
+            role: "user",
+            content: prompt,
+          },
+        ],
+        temperature: undefined,
+      }),
+    });
+
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`LLM API error ${response.status}: ${body}`);
+    }
+
+    const data = await response.json();
+    const text = data.choices?.[0]?.message?.content || "";
+    return parseLLMResponse(text);
+  }
+
+  if (provider === "anthropic") {
+    const response = await fetch(`https://api.anthropic.com/v1/messages`, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "x-api-key": apiKey,
+        "anthropic-version": "2023-06-01",
+      },
+      body: JSON.stringify({
+        model,
+        max_tokens: 4096,
+        system: `你是一个自动化构建工具，负责根据提示词生成精确的文件操作计划。
+
+请严格按以下 JSON 格式输出行动计划（不要包含其他内容）：
+{
+  "actions": [
+    { "type": "write_file", "path": "相对路径", "content": "文件内容" },
+    { "type": "check_task", "taskIndex": 0 }
+  ]
+}
+
+可用的 action 类型：
+- write_file: 写入文件，path 和 content 必填  
+- check_task: 标记任务为已完成，taskIndex 是任务列表中从 0 开始的索引
+- done: 表示所有操作已完成
+
+根据提示词要求，生成完整的操作计划。不要跳过任何步骤。`,
+        messages: [{ role: "user", content: prompt }],
+        temperature: undefined,
+      }),
+    });
+
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`LLM API error ${response.status}: ${body}`);
+    }
+
+    const data = await response.json();
+    const text = data.content?.[0]?.text || "";
+    return parseLLMResponse(text);
+  }
+
+  throw new Error(`不支持的 LLM provider: ${provider}`);
+}
+
+function parseLLMResponse(text: string): LLMPlan {
+  const jsonMatch = text.match(/\{[\s\S]*\}/);
+  if (!jsonMatch) {
+    throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`);
+  }
+
+  try {
+    const plan: LLMPlan = JSON.parse(jsonMatch[0]);
+    if (!plan.actions || !Array.isArray(plan.actions)) {
+      throw new Error("LLM 输出缺少 actions 数组");
+    }
+    return plan;
+  } catch (e) {
+    throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e });
+  }
+}
+
+import { mkdir, writeFile, readFile, rename } from "node:fs/promises";
+import { join } from "node:path";
+import { getChangeDir, getArchiveDir } from "../../src/core/config.ts";
+import { parseTasks } from "../../src/core/task-parser.ts";
+
+async function executeActions(projectDir: string, plan: LLMPlan): Promise<string[]> {
+  const files: string[] = [];
+
+  for (const action of plan.actions) {
+    switch (action.type) {
+      case "write_file": {
+        if (!action.path || action.content === undefined) {
+          throw new Error("write_file action 缺少 path 或 content");
+        }
+        const fullPath = join(projectDir, action.path);
+        await mkdir(join(fullPath, ".."), { recursive: true });
+        await writeFile(fullPath, action.content);
+        files.push(action.path);
+        break;
+      }
+      case "check_task": {
+        if (action.taskIndex === undefined) {
+          throw new Error("check_task action 缺少 taskIndex");
+        }
+        break;
+      }
+      case "done":
+        break;
+    }
+  }
+
+  return files;
+}
+
+function extractTaskCheckActions(plan: LLMPlan): number[] {
+  return plan.actions
+    .filter((a) => a.type === "check_task" && a.taskIndex !== undefined)
+    .map((a) => a.taskIndex!);
+}
+
+export class LLMJudgeRunner implements AgentRunner {
+  readonly tier = 3;
+
+  async runPlan(
+    projectDir: string,
+    changeName: string,
+    docName: string,
+    config: RuneConfig,
+  ): Promise<AgentResult> {
+    if (!isLLMAvailable()) {
+      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
+    }
+
+    const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName);
+    const plan = await callLLM(prompt);
+
+    return {
+      files: [],
+      rawPlan: plan,
+    };
+  }
+
+  async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise<AgentResult> {
+    if (!isLLMAvailable()) {
+      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
+    }
+
+    const changeDir = getChangeDir(projectDir, changeName);
+    const prompt = await assembleBuildPrompt(config, projectDir, changeName);
+    const plan = await callLLM(prompt);
+
+    const files = await executeActions(projectDir, plan);
+
+    const taskIndices = extractTaskCheckActions(plan);
+    if (taskIndices.length > 0) {
+      const taskPath = join(changeDir, "task.md");
+      let taskContent = await readFile(taskPath, "utf-8");
+      const tasks = parseTasks(taskContent);
+
+      for (const index of taskIndices) {
+        if (index < tasks.length) {
+          const task = tasks[index];
+          const oldLine = `- [ ] ${task.text}`;
+          const newLine = `- [x] ${task.text}`;
+          taskContent = taskContent.replace(oldLine, newLine);
+        }
+      }
+
+      await writeFile(taskPath, taskContent);
+      files.push("task.md");
+    }
+
+    return {
+      files: [...new Set(files)],
+      rawPlan: plan,
+    };
+  }
+
+  async runArchive(
+    projectDir: string,
+    changeName: string,
+    config: RuneConfig,
+  ): Promise<AgentResult> {
+    if (!isLLMAvailable()) {
+      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
+    }
+
+    const prompt = await assembleArchivePrompt(config, projectDir, changeName);
+    const changeDir = getChangeDir(projectDir, changeName);
+    const plan = await callLLM(prompt);
+
+    const today = new Date().toISOString().slice(0, 10);
+    const archiveDir = getArchiveDir(projectDir);
+    await mkdir(archiveDir, { recursive: true });
+    const dest = join(archiveDir, `${today}-${changeName}`);
+    await rename(changeDir, dest);
+
+    return {
+      files: [],
+      rawPlan: plan,
+    };
+  }
+}