feat: 第三期 — Tier 3 LLM-as-Judge 集成

2026-06-09 16:11:48 +08:00
parent bb7d5e740c
commit 073b9c1e47
2 changed files with 324 additions and 0 deletions
--- a/tests/agent/e2e-llm-judge.test.ts
+++ b/tests/agent/e2e-llm-judge.test.ts
@@ -0,0 +1,56 @@
 import { describe, it, expect, beforeEach, afterEach } from "bun:test";
 import { LLMJudgeRunner, isLLMAvailable } from "./tier3-llm-judge.ts";
 import {
  setupTempDir,
  cleanupTempDir,
  getTempDir,
  createFreshProject,
  writeDoc,
 } from "./fixtures.ts";
 const tier3Available = isLLMAvailable();
 if (!tier3Available) {
  console.log("RUNE_E2E_LLM_ 环境变量未配置，Tier 3 测试已跳过");
 }
 describe("e2e: Tier 3", () => {
  const runner = new LLMJudgeRunner();
  const testFn = tier3Available ? it : it.skip;
  beforeEach(async () => {
    await setupTempDir();
  });
  afterEach(async () => {
    await cleanupTempDir();
  });
  testFn(
    "plan: 单文档输出有效行动计划",
    async () => {
      const config = await createFreshProject();
      const result = await runner.runPlan(getTempDir(), "user-auth", "design", config);
      expect(result.rawPlan).toBeDefined();
      expect(result.rawPlan).toHaveProperty("actions");
      const plan = result.rawPlan as { actions: unknown[] };
      expect(plan.actions.length).toBeGreaterThan(0);
    },
    120_000,
  );
  testFn(
    "build: 单任务输出有效行动计划",
    async () => {
      const config = await createFreshProject();
      await writeDoc("auth", "task", "- [ ] 实现登录 API\n");
      const result = await runner.runBuild(getTempDir(), "auth", config);
      expect(result.rawPlan).toBeDefined();
      expect(result.rawPlan).toHaveProperty("actions");
    },
    120_000,
  );
 });
--- a/tests/agent/tier3-llm-judge.ts
+++ b/tests/agent/tier3-llm-judge.ts
@@ -0,0 +1,268 @@
 import type { RuneConfig } from "../../src/types.ts";
 import type { AgentRunner, AgentResult } from "./runner.ts";
 import {
  assemblePlanPrompt,
  assembleBuildPrompt,
  assembleArchivePrompt,
 } from "../../src/core/assembler.ts";
 export interface LLMAction {
  type: "write_file" | "check_task" | "done";
  path?: string;
  content?: string;
  taskIndex?: number;
 }
 export interface LLMPlan {
  actions: LLMAction[];
 }
 export function isLLMAvailable(): boolean {
  return !!process.env.RUNE_E2E_LLM_API_KEY;
 }
 export function getLLMEnv() {
  return {
    provider: process.env.RUNE_E2E_LLM_PROVIDER || "openai",
    model: process.env.RUNE_E2E_LLM_MODEL || "gpt-4o-mini",
    apiKey: process.env.RUNE_E2E_LLM_API_KEY || "",
    baseUrl: process.env.RUNE_E2E_LLM_BASE_URL || "https://api.openai.com/v1",
  };
 }
 async function callLLM(prompt: string): Promise<LLMPlan> {
  const { provider, model, apiKey, baseUrl } = getLLMEnv();
  if (provider === "openai" || provider === "openrouter") {
    const response = await fetch(`${baseUrl}/chat/completions`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        Authorization: `Bearer ${apiKey}`,
      },
      body: JSON.stringify({
        model,
        messages: [
          {
            role: "system",
            content: `你是一个自动化构建工具，负责根据提示词生成精确的文件操作计划。
 请严格按以下 JSON 格式输出行动计划（不要包含其他内容）：
 {
  "actions": [
    { "type": "write_file", "path": "相对路径", "content": "文件内容" },
    { "type": "check_task", "taskIndex": 0 }
  ]
 }
 可用的 action 类型：
 - write_file: 写入文件，path 和 content 必填
 - check_task: 标记任务为已完成，taskIndex 是任务列表中从 0 开始的索引
 - done: 表示所有操作已完成
 根据提示词要求，生成完整的操作计划。不要跳过任何步骤。`,
          },
          {
            role: "user",
            content: prompt,
          },
        ],
        temperature: undefined,
      }),
    });
    if (!response.ok) {
      const body = await response.text();
      throw new Error(`LLM API error ${response.status}: ${body}`);
    }
    const data = await response.json();
    const text = data.choices?.[0]?.message?.content || "";
    return parseLLMResponse(text);
  }
  if (provider === "anthropic") {
    const response = await fetch(`https://api.anthropic.com/v1/messages`, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        "x-api-key": apiKey,
        "anthropic-version": "2023-06-01",
      },
      body: JSON.stringify({
        model,
        max_tokens: 4096,
        system: `你是一个自动化构建工具，负责根据提示词生成精确的文件操作计划。
 请严格按以下 JSON 格式输出行动计划（不要包含其他内容）：
 {
  "actions": [
    { "type": "write_file", "path": "相对路径", "content": "文件内容" },
    { "type": "check_task", "taskIndex": 0 }
  ]
 }
 可用的 action 类型：
 - write_file: 写入文件，path 和 content 必填  
 - check_task: 标记任务为已完成，taskIndex 是任务列表中从 0 开始的索引
 - done: 表示所有操作已完成
 根据提示词要求，生成完整的操作计划。不要跳过任何步骤。`,
        messages: [{ role: "user", content: prompt }],
        temperature: undefined,
      }),
    });
    if (!response.ok) {
      const body = await response.text();
      throw new Error(`LLM API error ${response.status}: ${body}`);
    }
    const data = await response.json();
    const text = data.content?.[0]?.text || "";
    return parseLLMResponse(text);
  }
  throw new Error(`不支持的 LLM provider: ${provider}`);
 }
 function parseLLMResponse(text: string): LLMPlan {
  const jsonMatch = text.match(/\{[\s\S]*\}/);
  if (!jsonMatch) {
    throw new Error(`LLM 输出中未找到 JSON: ${text.slice(0, 200)}`);
  }
  try {
    const plan: LLMPlan = JSON.parse(jsonMatch[0]);
    if (!plan.actions || !Array.isArray(plan.actions)) {
      throw new Error("LLM 输出缺少 actions 数组");
    }
    return plan;
  } catch (e) {
    throw new Error(`LLM 输出 JSON 解析失败: ${text.slice(0, 200)}`, { cause: e });
  }
 }
 import { mkdir, writeFile, readFile, rename } from "node:fs/promises";
 import { join } from "node:path";
 import { getChangeDir, getArchiveDir } from "../../src/core/config.ts";
 import { parseTasks } from "../../src/core/task-parser.ts";
 async function executeActions(projectDir: string, plan: LLMPlan): Promise<string[]> {
  const files: string[] = [];
  for (const action of plan.actions) {
    switch (action.type) {
      case "write_file": {
        if (!action.path || action.content === undefined) {
          throw new Error("write_file action 缺少 path 或 content");
        }
        const fullPath = join(projectDir, action.path);
        await mkdir(join(fullPath, ".."), { recursive: true });
        await writeFile(fullPath, action.content);
        files.push(action.path);
        break;
      }
      case "check_task": {
        if (action.taskIndex === undefined) {
          throw new Error("check_task action 缺少 taskIndex");
        }
        break;
      }
      case "done":
        break;
    }
  }
  return files;
 }
 function extractTaskCheckActions(plan: LLMPlan): number[] {
  return plan.actions
    .filter((a) => a.type === "check_task" && a.taskIndex !== undefined)
    .map((a) => a.taskIndex!);
 }
 export class LLMJudgeRunner implements AgentRunner {
  readonly tier = 3;
  async runPlan(
    projectDir: string,
    changeName: string,
    docName: string,
    config: RuneConfig,
  ): Promise<AgentResult> {
    if (!isLLMAvailable()) {
      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
    }
    const prompt = await assemblePlanPrompt(config, projectDir, changeName, docName);
    const plan = await callLLM(prompt);
    return {
      files: [],
      rawPlan: plan,
    };
  }
  async runBuild(projectDir: string, changeName: string, config: RuneConfig): Promise<AgentResult> {
    if (!isLLMAvailable()) {
      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
    }
    const changeDir = getChangeDir(projectDir, changeName);
    const prompt = await assembleBuildPrompt(config, projectDir, changeName);
    const plan = await callLLM(prompt);
    const files = await executeActions(projectDir, plan);
    const taskIndices = extractTaskCheckActions(plan);
    if (taskIndices.length > 0) {
      const taskPath = join(changeDir, "task.md");
      let taskContent = await readFile(taskPath, "utf-8");
      const tasks = parseTasks(taskContent);
      for (const index of taskIndices) {
        if (index < tasks.length) {
          const task = tasks[index];
          const oldLine = `- [ ] ${task.text}`;
          const newLine = `- [x] ${task.text}`;
          taskContent = taskContent.replace(oldLine, newLine);
        }
      }
      await writeFile(taskPath, taskContent);
      files.push("task.md");
    }
    return {
      files: [...new Set(files)],
      rawPlan: plan,
    };
  }
  async runArchive(
    projectDir: string,
    changeName: string,
    config: RuneConfig,
  ): Promise<AgentResult> {
    if (!isLLMAvailable()) {
      throw new Error("RUNE_E2E_LLM_PROVIDER 和 RUNE_E2E_LLM_API_KEY 未设置");
    }
    const prompt = await assembleArchivePrompt(config, projectDir, changeName);
    const changeDir = getChangeDir(projectDir, changeName);
    const plan = await callLLM(prompt);
    const today = new Date().toISOString().slice(0, 10);
    const archiveDir = getArchiveDir(projectDir);
    await mkdir(archiveDir, { recursive: true });
    const dest = join(archiveDir, `${today}-${changeName}`);
    await rename(changeDir, dest);
    return {
      files: [],
      rawPlan: plan,
    };
  }
 }