From a23c1736629537f1e1e27e8087d81d1a42e22bc6 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:05:42 +0000 Subject: [PATCH 1/8] =?UTF-8?q?=F0=9F=A4=96=20fix:=20add=20deterministic?= =?UTF-8?q?=20stream=20guardrails=20for=20verification=20and=20doom=20loop?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/utils/tools/tools.ts | 6 + src/node/services/aiService.ts | 9 ++ .../StreamEditTracker.test.ts | 64 +++++++++ .../streamGuardrails/StreamEditTracker.ts | 45 ++++++ .../StreamVerificationTracker.test.ts | 30 ++++ .../StreamVerificationTracker.ts | 27 ++++ src/node/services/tools/agent_report.test.ts | 134 +++++++++++++++++- src/node/services/tools/agent_report.ts | 14 ++ src/node/services/tools/bash.test.ts | 21 ++- src/node/services/tools/bash.ts | 23 +++ .../tools/file_edit_operation.test.ts | 83 +++++++++++ .../services/tools/file_edit_operation.ts | 23 ++- 12 files changed, 470 insertions(+), 9 deletions(-) create mode 100644 src/node/services/streamGuardrails/StreamEditTracker.test.ts create mode 100644 src/node/services/streamGuardrails/StreamEditTracker.ts create mode 100644 src/node/services/streamGuardrails/StreamVerificationTracker.test.ts create mode 100644 src/node/services/streamGuardrails/StreamVerificationTracker.ts diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index d78fff8269..0ac75fb4e6 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -42,6 +42,8 @@ import type { WorkspaceChatMessage } from "@/common/orpc/types"; import type { FileState } from "@/node/services/agentSession"; import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker"; +import type { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; /** * Configuration for tools that need runtime context @@ -80,6 +82,10 @@ export interface ToolConfiguration { taskService?: TaskService; /** Enable agent_report tool (only valid for child task workspaces) */ enableAgentReport?: boolean; + /** Per-stream edit tracker for doom-loop detection (not set for IPC tool calls) */ + editTracker?: StreamEditTracker; + /** Per-stream verification tracker for completion guard (not set for IPC tool calls) */ + verificationTracker?: StreamVerificationTracker; /** Experiments inherited from parent (for subagent spawning) */ experiments?: { programmaticToolCalling?: boolean; diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 8ba84526ab..b57e20a66e 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -68,6 +68,8 @@ import { } from "./streamSimulation"; import { applyToolPolicyAndExperiments, captureMcpToolTelemetry } from "./toolAssembly"; import { getErrorMessage } from "@/common/utils/errors"; +import { StreamEditTracker } from "./streamGuardrails/StreamEditTracker"; +import { StreamVerificationTracker } from "./streamGuardrails/StreamVerificationTracker"; // --------------------------------------------------------------------------- // streamMessage options @@ -699,6 +701,10 @@ export class AIService extends EventEmitter { } } + // Guardrail trackers are per-stream and only enabled for AI tool execution. + const editTracker = new StreamEditTracker(); + const verificationTracker = new StreamVerificationTracker(); + // Get model-specific tools with workspace path (correct for local or remote) const allTools = await getToolsForModel( modelString, @@ -734,6 +740,9 @@ export class AIService extends EventEmitter { workspaceId, // Only child workspaces (tasks) can report to a parent. enableAgentReport: Boolean(metadata.parentWorkspaceId), + // Per-stream deterministic guardrails for completion + doom-loop detection. + editTracker, + verificationTracker, // External edit detection callback recordFileState, taskService: this.taskService, diff --git a/src/node/services/streamGuardrails/StreamEditTracker.test.ts b/src/node/services/streamGuardrails/StreamEditTracker.test.ts new file mode 100644 index 0000000000..7c0f8297a8 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamEditTracker.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from "bun:test"; + +import { DOOM_LOOP_EDIT_THRESHOLD, StreamEditTracker } from "./StreamEditTracker"; + +describe("StreamEditTracker", () => { + test("recordEdit increments edit count for the same file", () => { + const tracker = new StreamEditTracker(); + + expect(tracker.recordEdit("/tmp/file.ts")).toBe(1); + expect(tracker.recordEdit("/tmp/file.ts")).toBe(2); + expect(tracker.recordEdit("/tmp/file.ts")).toBe(3); + }); + + test("hasAnyEdits is false before edits and true after first edit", () => { + const tracker = new StreamEditTracker(); + + expect(tracker.hasAnyEdits()).toBe(false); + tracker.recordEdit("/tmp/file.ts"); + expect(tracker.hasAnyEdits()).toBe(true); + }); + + test("shouldNudge is false below threshold and true at threshold", () => { + const tracker = new StreamEditTracker(); + const filePath = "/tmp/file.ts"; + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD - 1; i += 1) { + tracker.recordEdit(filePath); + } + + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + + tracker.recordEdit(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + }); + + test("shouldNudge is once per file after markNudged", () => { + const tracker = new StreamEditTracker(); + const filePath = "/tmp/file.ts"; + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + tracker.recordEdit(filePath); + } + + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + + tracker.markNudged(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + + tracker.recordEdit(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + }); + + test("tracks edit counts independently per file", () => { + const tracker = new StreamEditTracker(); + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + tracker.recordEdit("/tmp/a.ts"); + } + tracker.recordEdit("/tmp/b.ts"); + + expect(tracker.shouldNudge("/tmp/a.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + expect(tracker.shouldNudge("/tmp/b.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + }); +}); diff --git a/src/node/services/streamGuardrails/StreamEditTracker.ts b/src/node/services/streamGuardrails/StreamEditTracker.ts new file mode 100644 index 0000000000..d6a6acdd03 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamEditTracker.ts @@ -0,0 +1,45 @@ +import assert from "@/common/utils/assert"; + +export const DOOM_LOOP_EDIT_THRESHOLD = 7; + +/** + * Tracks file edit frequency for a single stream to detect potential doom loops. + */ +export class StreamEditTracker { + private readonly editCountsByFile = new Map(); + private readonly nudgedFiles = new Set(); + + recordEdit(filePath: string): number { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + + const nextCount = (this.editCountsByFile.get(filePath) ?? 0) + 1; + this.editCountsByFile.set(filePath, nextCount); + return nextCount; + } + + hasAnyEdits(): boolean { + return this.editCountsByFile.size > 0; + } + + shouldNudge(filePath: string, threshold: number): boolean { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + assert(Number.isFinite(threshold) && threshold > 0, "threshold must be a positive number"); + + const editCount = this.editCountsByFile.get(filePath) ?? 0; + return editCount >= threshold && !this.nudgedFiles.has(filePath); + } + + markNudged(filePath: string): void { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + this.nudgedFiles.add(filePath); + } +} diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts new file mode 100644 index 0000000000..a4c0ce9294 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, test } from "bun:test"; + +import { StreamVerificationTracker } from "./StreamVerificationTracker"; + +describe("StreamVerificationTracker", () => { + test("hasValidationAttempt is false initially and true after markValidationAttempt", () => { + const tracker = new StreamVerificationTracker(); + + expect(tracker.hasValidationAttempt()).toBe(false); + + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + }); + + test("nudge lifecycle for completion guard", () => { + const tracker = new StreamVerificationTracker(); + + expect(tracker.hasBeenNudged()).toBe(false); + expect(tracker.shouldNudgeBeforeAllowingReport(false)).toBe(false); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true); + + tracker.markNudged(); + expect(tracker.hasBeenNudged()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + }); +}); diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.ts new file mode 100644 index 0000000000..c40224305a --- /dev/null +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.ts @@ -0,0 +1,27 @@ +/** + * Tracks whether a stream attempted validation commands before completion. + */ +export class StreamVerificationTracker { + private validationAttempted = false; + private nudgedBeforeReport = false; + + markValidationAttempt(): void { + this.validationAttempted = true; + } + + hasValidationAttempt(): boolean { + return this.validationAttempted; + } + + hasBeenNudged(): boolean { + return this.nudgedBeforeReport; + } + + markNudged(): void { + this.nudgedBeforeReport = true; + } + + shouldNudgeBeforeAllowingReport(hasEdits: boolean): boolean { + return hasEdits && !this.validationAttempted && !this.nudgedBeforeReport; + } +} diff --git a/src/node/services/tools/agent_report.test.ts b/src/node/services/tools/agent_report.test.ts index 7b4d98388b..1c09ae5274 100644 --- a/src/node/services/tools/agent_report.test.ts +++ b/src/node/services/tools/agent_report.test.ts @@ -4,20 +4,25 @@ import type { ToolExecutionOptions } from "ai"; import { createAgentReportTool } from "./agent_report"; import { TestTempDir, createTestToolConfig } from "./testHelpers"; import type { TaskService } from "@/node/services/taskService"; +import { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker"; +import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; const mockToolCallOptions: ToolExecutionOptions = { toolCallId: "test-call-id", messages: [], }; +function createTaskService(hasActiveDescendants: boolean): TaskService { + return { + hasActiveDescendantAgentTasksForWorkspace: mock(() => hasActiveDescendants), + } as unknown as TaskService; +} describe("agent_report tool", () => { it("throws when the task has active descendants", async () => { using tempDir = new TestTempDir("test-agent-report-tool"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); - const taskService = { - hasActiveDescendantAgentTasksForWorkspace: mock(() => true), - } as unknown as TaskService; + const taskService = createTaskService(true); const tool = createAgentReportTool({ ...baseConfig, taskService }); @@ -40,9 +45,7 @@ describe("agent_report tool", () => { using tempDir = new TestTempDir("test-agent-report-tool-ok"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); - const taskService = { - hasActiveDescendantAgentTasksForWorkspace: mock(() => false), - } as unknown as TaskService; + const taskService = createTaskService(false); const tool = createAgentReportTool({ ...baseConfig, taskService }); @@ -55,4 +58,123 @@ describe("agent_report tool", () => { message: "Report submitted successfully.", }); }); + + it("allows report when trackers are present but no edits occurred", async () => { + using tempDir = new TestTempDir("test-agent-report-no-edits"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + const verificationTracker = new StreamVerificationTracker(); + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + const result = (await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + )) as unknown; + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("rejects first report when edits occurred but no validation was attempted", async () => { + using tempDir = new TestTempDir("test-agent-report-missing-validation"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + let caught: unknown = null; + try { + await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + } catch (error: unknown) { + caught = error; + } + + expect(caught).toBeInstanceOf(Error); + if (caught instanceof Error) { + expect(caught.message).toMatch(/no validation commands detected/i); + } + expect(verificationTracker.hasBeenNudged()).toBe(true); + }); + + it("allows a second report without validation as an explicit escape hatch", async () => { + using tempDir = new TestTempDir("test-agent-report-escape-hatch"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + let firstError: unknown = null; + try { + await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + } catch (error: unknown) { + firstError = error; + } + + expect(firstError).toBeInstanceOf(Error); + if (firstError instanceof Error) { + expect(firstError.message).toMatch(/no validation commands detected/i); + } + + const secondResult: unknown = await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + + expect(secondResult).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("allows report after validation attempt", async () => { + using tempDir = new TestTempDir("test-agent-report-validated"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + verificationTracker.markValidationAttempt(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); }); diff --git a/src/node/services/tools/agent_report.ts b/src/node/services/tools/agent_report.ts index 99f949a1cb..e762c55a32 100644 --- a/src/node/services/tools/agent_report.ts +++ b/src/node/services/tools/agent_report.ts @@ -20,6 +20,20 @@ export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => ); } + // Guard: if edits were made but no validation was attempted, nudge the agent to verify. + const hasEdits = config.editTracker?.hasAnyEdits() ?? false; + const hasValidated = config.verificationTracker?.hasValidationAttempt() ?? false; + if (hasEdits && !hasValidated) { + if (!config.verificationTracker?.hasBeenNudged()) { + config.verificationTracker?.markNudged(); + throw new Error( + "agent_report rejected: no validation commands detected after file edits. " + + "Run the most relevant check (tests, typecheck, lint) and then call agent_report again. " + + "If validation is not applicable, call agent_report again to confirm." + ); + } + } + // Intentionally no side-effects. The backend orchestrator consumes the tool-call args // via persisted history/partial state once the tool call completes successfully. // The stream continues after this so the SDK can record usage, while StreamManager diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index bf1d13f0d6..dcf77444fc 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from "bun:test"; import { LocalRuntime } from "@/node/runtime/LocalRuntime"; -import { createBashTool } from "./bash"; +import { createBashTool, looksLikeValidationCommand } from "./bash"; import type { BashOutputEvent } from "@/common/types/stream"; import type { BashToolArgs, BashToolResult } from "@/common/types/tools"; import { BASH_MAX_TOTAL_BYTES } from "@/common/constants/toolLimits"; @@ -43,6 +43,25 @@ function createTestBashTool() { }; } +describe("looksLikeValidationCommand", () => { + it("matches common validation command patterns", () => { + expect(looksLikeValidationCommand("make test")).toBe(true); + expect(looksLikeValidationCommand("make typecheck")).toBe(true); + expect(looksLikeValidationCommand("bun test")).toBe(true); + expect(looksLikeValidationCommand("bun run lint")).toBe(true); + expect(looksLikeValidationCommand("vitest --run")).toBe(true); + expect(looksLikeValidationCommand("run_and_report typecheck make typecheck")).toBe(true); + expect(looksLikeValidationCommand("npm run test\npnpm run lint")).toBe(true); + }); + + it("does not match non-validation commands", () => { + expect(looksLikeValidationCommand("make build")).toBe(false); + expect(looksLikeValidationCommand("bun install")).toBe(false); + expect(looksLikeValidationCommand("echo test")).toBe(false); + expect(looksLikeValidationCommand("cargo build")).toBe(false); + }); +}); + describe("bash tool", () => { it("should execute a simple command successfully", async () => { using testEnv = createTestBashTool(); diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index b51b70c5ad..42c969165a 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -522,6 +522,24 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu return null; // Valid } +const VALIDATION_PATTERNS: RegExp[] = [ + /(^|\n)\s*run_and_report\b/, + /(^|\n)\s*make\s+(test|typecheck|lint|static-check|fmt-check)\b/, + /(^|\n)\s*bun\s+(test|run\s+test|run\s+typecheck|run\s+lint)\b/, + /(^|\n)\s*npm\s+(test|run\s+(test|typecheck|lint))\b/, + /(^|\n)\s*pnpm\s+(test|run\s+(test|typecheck|lint))\b/, + /(^|\n)\s*yarn\s+(test|run\s+(test|typecheck|lint))\b/, + /(^|\n)\s*tsc\b/, + /(^|\n)\s*eslint\b/, + /(^|\n)\s*vitest\b/, + /(^|\n)\s*pytest\b/, + /(^|\n)\s*cargo\s+(test|check|clippy)\b/, +]; + +export function looksLikeValidationCommand(script: string): boolean { + return VALIDATION_PATTERNS.some((pattern) => pattern.test(script)); +} + /** * Rewrite cmd.exe-style null-device redirects (e.g. `>nul`, `2>nul`) into `/dev/null`. * @@ -861,6 +879,11 @@ export const createBashTool: ToolFactory = (config: ToolConfiguration) => { const validationError = validateScript(script, config); if (validationError) return validationError; + // Mark validation attempts for the pre-completion verification guard. + if (looksLikeValidationCommand(script)) { + config.verificationTracker?.markValidationAttempt(); + } + // Warn when the model appears to be reading files via bash output (cat/rg/grep). // Reading files via bash output is fragile (may be truncated or auto-filtered); // file_read supports paging and avoids silent context loss. diff --git a/src/node/services/tools/file_edit_operation.test.ts b/src/node/services/tools/file_edit_operation.test.ts index d5d66b7fe0..f3da90c34e 100644 --- a/src/node/services/tools/file_edit_operation.test.ts +++ b/src/node/services/tools/file_edit_operation.test.ts @@ -4,6 +4,11 @@ import * as path from "path"; import { executeFileEditOperation } from "./file_edit_operation"; import type { Runtime } from "@/node/runtime/Runtime"; import { LocalRuntime } from "@/node/runtime/LocalRuntime"; +import { MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD } from "@/common/utils/tools/internalToolResultFields"; +import { + DOOM_LOOP_EDIT_THRESHOLD, + StreamEditTracker, +} from "@/node/services/streamGuardrails/StreamEditTracker"; import { getTestDeps, TestTempDir } from "./testHelpers"; @@ -282,3 +287,81 @@ describe("executeFileEditOperation plan mode enforcement", () => { expect(resolvePathCalls).toContain("/home/user/.mux/sessions/ws/plan.md"); }); }); + +describe("executeFileEditOperation doom-loop guard", () => { + test("attaches a model-only notification when edit threshold is reached", async () => { + using tempDir = new TestTempDir("doom-loop-guard-test"); + + const filePath = path.join(tempDir.path, "main.ts"); + await fs.writeFile(filePath, "const x = 0;\n"); + + const runtime = new LocalRuntime(tempDir.path); + const editTracker = new StreamEditTracker(); + + let thresholdResult: unknown; + for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + thresholdResult = await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }), + }); + } + + expect(thresholdResult).toBeDefined(); + const resultRecord = thresholdResult as Record; + + expect(resultRecord.success).toBe(true); + expect(Array.isArray(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD])).toBe(true); + + const notifications = resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD] as string[]; + expect(notifications[0]).toContain("Potential doom loop"); + expect(notifications[0]).toContain(filePath); + }); + + test("nudges at most once per file for a stream", async () => { + using tempDir = new TestTempDir("doom-loop-guard-once-test"); + + const filePath = path.join(tempDir.path, "main.ts"); + await fs.writeFile(filePath, "const x = 0;\n"); + + const runtime = new LocalRuntime(tempDir.path); + const editTracker = new StreamEditTracker(); + + for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }), + }); + } + + const postThresholdResult = await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ + success: true, + newContent: `const x = ${DOOM_LOOP_EDIT_THRESHOLD + 1};\n`, + metadata: {}, + }), + }); + + const resultRecord = postThresholdResult as unknown as Record; + expect(resultRecord.success).toBe(true); + expect(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD]).toBeUndefined(); + }); +}); diff --git a/src/node/services/tools/file_edit_operation.ts b/src/node/services/tools/file_edit_operation.ts index 50a334c029..be401a9c60 100644 --- a/src/node/services/tools/file_edit_operation.ts +++ b/src/node/services/tools/file_edit_operation.ts @@ -13,6 +13,8 @@ import { import { RuntimeError } from "@/node/runtime/Runtime"; import { readFileString, writeFileString } from "@/node/utils/runtime/helpers"; import { getErrorMessage } from "@/common/utils/errors"; +import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields"; +import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker"; type FileEditOperationResult = | { @@ -131,6 +133,8 @@ export async function executeFileEditOperation({ throw err; } + let doomLoopNudge: string | undefined; + // Record file state for post-compaction attachment tracking if (config.recordFileState) { try { @@ -144,9 +148,18 @@ export async function executeFileEditOperation({ } } + // Track repeated edits to detect potential doom loops in exec mode. + if (!config.planFileOnly && config.editTracker) { + const editCount = config.editTracker.recordEdit(resolvedPath); + if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { + config.editTracker.markNudged(resolvedPath); + doomLoopNudge = `Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n- Consider a fundamentally different approach (not a small variation of what you've been trying).`; + } + } + const diff = generateDiff(resolvedPath, originalContent, operationResult.newContent); - return { + const baseResult: FileEditDiffSuccessBase & TMetadata = { success: true, diff: FILE_EDIT_DIFF_OMITTED_MESSAGE, ui_only: { @@ -155,8 +168,14 @@ export async function executeFileEditOperation({ }, }, ...operationResult.metadata, - ...(pathWarning && { warning: pathWarning }), + ...(pathWarning ? { warning: pathWarning } : {}), }; + + if (doomLoopNudge) { + return attachModelOnlyToolNotifications(baseResult, [doomLoopNudge]) as typeof baseResult; + } + + return baseResult; } catch (error) { if (error && typeof error === "object" && "code" in error) { const nodeError = error as { code?: string }; From 60ec04a72dd6ca4d3145f6aef554993b7762f57e Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:14:55 +0000 Subject: [PATCH 2/8] fix: tighten run_and_report validation detection to check wrapped command run_and_report is a generic wrapper, not inherently a validation command. Only match when the wrapped command itself is a validation command (e.g., run_and_report typecheck make typecheck). --- src/node/services/tools/bash.test.ts | 2 ++ src/node/services/tools/bash.ts | 21 ++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index dcf77444fc..bdcd778996 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -59,6 +59,8 @@ describe("looksLikeValidationCommand", () => { expect(looksLikeValidationCommand("bun install")).toBe(false); expect(looksLikeValidationCommand("echo test")).toBe(false); expect(looksLikeValidationCommand("cargo build")).toBe(false); + // run_and_report wrapping a non-validation command should NOT trigger + expect(looksLikeValidationCommand("run_and_report install bun install")).toBe(false); }); }); diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index 42c969165a..12774d1168 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -522,18 +522,17 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu return null; // Valid } +// Patterns for commands that are inherently validation (tests, typecheck, lint). +// `run_and_report` is a generic wrapper (`run_and_report `), so we +// match it only when the wrapped command itself is a validation command. +const VALIDATION_COMMAND_RE = + /(?:make\s+(?:test|typecheck|lint|static-check|fmt-check)|bun\s+(?:test|run\s+(?:test|typecheck|lint))|npm\s+(?:test|run\s+(?:test|typecheck|lint))|pnpm\s+(?:test|run\s+(?:test|typecheck|lint))|yarn\s+(?:test|run\s+(?:test|typecheck|lint))|tsc|eslint|vitest|pytest|cargo\s+(?:test|check|clippy))\b/; + const VALIDATION_PATTERNS: RegExp[] = [ - /(^|\n)\s*run_and_report\b/, - /(^|\n)\s*make\s+(test|typecheck|lint|static-check|fmt-check)\b/, - /(^|\n)\s*bun\s+(test|run\s+test|run\s+typecheck|run\s+lint)\b/, - /(^|\n)\s*npm\s+(test|run\s+(test|typecheck|lint))\b/, - /(^|\n)\s*pnpm\s+(test|run\s+(test|typecheck|lint))\b/, - /(^|\n)\s*yarn\s+(test|run\s+(test|typecheck|lint))\b/, - /(^|\n)\s*tsc\b/, - /(^|\n)\s*eslint\b/, - /(^|\n)\s*vitest\b/, - /(^|\n)\s*pytest\b/, - /(^|\n)\s*cargo\s+(test|check|clippy)\b/, + // run_and_report + new RegExp(`(^|\\n)\\s*run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`), + // Standalone validation commands at the start of a line + new RegExp(`(^|\\n)\\s*${VALIDATION_COMMAND_RE.source}`), ]; export function looksLikeValidationCommand(script: string): boolean { From abb5fb5da4ad15ee3c3dba2d7c92e6837cc60691 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:22:26 +0000 Subject: [PATCH 3/8] fix: track new-file creation in edit tracker for verification guard The create-file branch in file_edit_insert.ts bypassed executeFileEditOperation, so new files weren't counted by the edit tracker. This meant a stream that only created files could skip verification. --- src/node/services/tools/file_edit_insert.ts | 23 ++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/node/services/tools/file_edit_insert.ts b/src/node/services/tools/file_edit_insert.ts index f0a15783d9..2ec6b1a84e 100644 --- a/src/node/services/tools/file_edit_insert.ts +++ b/src/node/services/tools/file_edit_insert.ts @@ -14,6 +14,8 @@ import { fileExists } from "@/node/utils/runtime/fileExists"; import { writeFileString } from "@/node/utils/runtime/helpers"; import { RuntimeError } from "@/node/runtime/Runtime"; import { getErrorMessage } from "@/common/utils/errors"; +import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields"; +import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker"; const READ_AND_RETRY_NOTE = `${EDIT_FAILED_NOTE_PREFIX} ${NOTE_READ_FILE_RETRY}`; @@ -97,8 +99,21 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration) } } + // Track new-file creation for doom-loop detection and verification guard + let doomLoopNudge: string | undefined; + if (!config.planFileOnly && config.editTracker) { + const editCount = config.editTracker.recordEdit(resolvedPath); + if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { + config.editTracker.markNudged(resolvedPath); + doomLoopNudge = + `Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. ` + + `Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n` + + `- Consider a fundamentally different approach (not a small variation of what you've been trying).`; + } + } + const diff = generateDiff(resolvedPath, "", content); - return { + const baseResult: FileEditInsertToolResult = { success: true, diff: FILE_EDIT_DIFF_OMITTED_MESSAGE, ui_only: { @@ -108,6 +123,12 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration) }, ...(pathWarning && { warning: pathWarning }), }; + if (doomLoopNudge) { + return attachModelOnlyToolNotifications(baseResult, [ + doomLoopNudge, + ]) as FileEditInsertToolResult; + } + return baseResult; } return executeFileEditOperation({ From 6f07a38a6d5f8e5d0fe06945d0bcfb0dc8b4ddde Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:28:32 +0000 Subject: [PATCH 4/8] fix: recognize validation commands after shell operators (&&, ;, |) Match validation commands like 'cd packages/app && make test' and 'source .env; bun test' by accepting shell operators as command prefixes in addition to line start. --- src/node/services/tools/bash.test.ts | 7 +++++++ src/node/services/tools/bash.ts | 12 ++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index bdcd778996..136e18d92c 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -52,6 +52,13 @@ describe("looksLikeValidationCommand", () => { expect(looksLikeValidationCommand("vitest --run")).toBe(true); expect(looksLikeValidationCommand("run_and_report typecheck make typecheck")).toBe(true); expect(looksLikeValidationCommand("npm run test\npnpm run lint")).toBe(true); + // Validation commands after shell operators (monorepo/subdirectory workflows) + expect(looksLikeValidationCommand("cd packages/app && make test")).toBe(true); + expect(looksLikeValidationCommand("cd packages/app && bun test")).toBe(true); + expect(looksLikeValidationCommand("source .env; make typecheck")).toBe(true); + expect(looksLikeValidationCommand("run_and_report unit cd packages/app && bun test")).toBe( + true + ); }); it("does not match non-validation commands", () => { diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index 12774d1168..32eabbdfa3 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -528,11 +528,15 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu const VALIDATION_COMMAND_RE = /(?:make\s+(?:test|typecheck|lint|static-check|fmt-check)|bun\s+(?:test|run\s+(?:test|typecheck|lint))|npm\s+(?:test|run\s+(?:test|typecheck|lint))|pnpm\s+(?:test|run\s+(?:test|typecheck|lint))|yarn\s+(?:test|run\s+(?:test|typecheck|lint))|tsc|eslint|vitest|pytest|cargo\s+(?:test|check|clippy))\b/; +// Match validation commands at line start, after shell operators (&&, ||, ;, |), +// or after run_and_report wrappers, to handle monorepo/subdirectory workflows +// like `cd packages/app && make test`. +const CMD_PREFIX = String.raw`(?:^|\n|&&|\|\||[;|])\s*`; const VALIDATION_PATTERNS: RegExp[] = [ - // run_and_report - new RegExp(`(^|\\n)\\s*run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`), - // Standalone validation commands at the start of a line - new RegExp(`(^|\\n)\\s*${VALIDATION_COMMAND_RE.source}`), + // run_and_report (may include cd/shell before the command) + new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+.*?${VALIDATION_COMMAND_RE.source}`), + // Validation commands at line start or after shell operators + new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`), ]; export function looksLikeValidationCommand(script: string): boolean { From 46f3848ba74c2e0f1ca82922c8e0525a58daccab Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:34:57 +0000 Subject: [PATCH 5/8] fix: narrow run_and_report regex to match only the actual command Remove .*? from the run_and_report pattern so that only the third word (the actual command) is checked against validation patterns. Chained commands after && are already caught by the standalone pattern. --- src/node/services/tools/bash.test.ts | 2 ++ src/node/services/tools/bash.ts | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index 136e18d92c..acccb44296 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -68,6 +68,8 @@ describe("looksLikeValidationCommand", () => { expect(looksLikeValidationCommand("cargo build")).toBe(false); // run_and_report wrapping a non-validation command should NOT trigger expect(looksLikeValidationCommand("run_and_report install bun install")).toBe(false); + // run_and_report with validation text appearing as arguments, not the actual command + expect(looksLikeValidationCommand("run_and_report note echo make test")).toBe(false); }); }); diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index 32eabbdfa3..917f8b02e4 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -533,8 +533,10 @@ const VALIDATION_COMMAND_RE = // like `cd packages/app && make test`. const CMD_PREFIX = String.raw`(?:^|\n|&&|\|\||[;|])\s*`; const VALIDATION_PATTERNS: RegExp[] = [ - // run_and_report (may include cd/shell before the command) - new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+.*?${VALIDATION_COMMAND_RE.source}`), + // run_and_report — only when the actual command + // (third word) is a validation command. Chained commands like + // `run_and_report unit cd app && bun test` are caught by the standalone pattern below. + new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`), // Validation commands at line start or after shell operators new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`), ]; From b5757d4d7ec16b0333925dc751220ea0af130415 Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 00:41:20 +0000 Subject: [PATCH 6/8] docs: add comment explaining validation heuristic limitations Shell command parsing with regex is inherently imperfect. The escape hatch (second agent_report always passes) covers false negatives from env prefixes and shell wrappers. --- src/node/services/tools/bash.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index 917f8b02e4..24a851632c 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -536,6 +536,10 @@ const VALIDATION_PATTERNS: RegExp[] = [ // run_and_report — only when the actual command // (third word) is a validation command. Chained commands like // `run_and_report unit cd app && bun test` are caught by the standalone pattern below. + // NOTE: This is a heuristic. Environment prefixes like `env CI=1 bun test` or + // `bash -c "make test"` won't match the run_and_report rule, but may be caught by + // the standalone pattern if chained with &&/;. The agent_report escape hatch (second + // attempt always passes) covers any remaining false negatives. new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`), // Validation commands at line start or after shell operators new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`), From 07e9e67fcd06b9108d456e39fa1361c762ac292c Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 09:17:39 +0000 Subject: [PATCH 7/8] fix: exclude background commands from verification credit Background bash commands (run_in_background=true) haven't produced results yet when they start, so they shouldn't count as 'validation attempted' for the pre-completion verification guard. --- src/node/services/tools/bash.test.ts | 41 ++++++++++++++++++++++++++++ src/node/services/tools/bash.ts | 4 ++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index acccb44296..df5d24a1a4 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -19,6 +19,7 @@ function isForegroundSuccess( } import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; +import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; // Mock ToolCallOptions for testing const mockToolCallOptions: ToolExecutionOptions = { @@ -1921,3 +1922,43 @@ describe("bash tool - background execution", () => { tempDir[Symbol.dispose](); }); }); + +describe("bash tool - verification tracker", () => { + it("should mark verification for foreground validation commands", async () => { + const tempDir = new TestTempDir("test-bash-verify"); + const config = createTestToolConfig(tempDir.path); + const tracker = new StreamVerificationTracker(); + config.verificationTracker = tracker; + const tool = createBashTool(config); + + await tool.execute!( + { script: "make test", timeout_secs: 5, run_in_background: false, display_name: "test" }, + mockToolCallOptions + ); + + expect(tracker.hasValidationAttempt()).toBe(true); + tempDir[Symbol.dispose](); + }); + + it("should not mark verification for background validation commands", async () => { + const manager = new BackgroundProcessManager("/tmp/mux-test-verify-bg"); + const tempDir = new TestTempDir("test-bash-verify-bg"); + const config = createTestToolConfig(tempDir.path); + const tracker = new StreamVerificationTracker(); + config.verificationTracker = tracker; + config.backgroundProcessManager = manager; + const tool = createBashTool(config); + + await tool.execute!( + { script: "make test", timeout_secs: 5, run_in_background: true, display_name: "test-bg" }, + mockToolCallOptions + ); + + // Background commands haven't produced results yet, so they shouldn't + // count as "validation attempted" + expect(tracker.hasValidationAttempt()).toBe(false); + + await manager.terminateAll(); + tempDir[Symbol.dispose](); + }); +}); diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index 24a851632c..f8f94b9b61 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -889,7 +889,9 @@ export const createBashTool: ToolFactory = (config: ToolConfiguration) => { if (validationError) return validationError; // Mark validation attempts for the pre-completion verification guard. - if (looksLikeValidationCommand(script)) { + // Only count foreground commands — background processes haven't produced + // results yet, so they don't count as "validation attempted". + if (!run_in_background && looksLikeValidationCommand(script)) { config.verificationTracker?.markValidationAttempt(); } From f8b64a38ffa88cc415379f2ef61e6173a8bd72ab Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Wed, 18 Feb 2026 09:33:37 +0000 Subject: [PATCH 8/8] fix: reset validation state on file edits so pre-edit validation doesn't count Previously, running make test before editing files would permanently satisfy the verification guard. Now file edits reset the validation tracker, ensuring only post-edit validation counts. --- .../StreamVerificationTracker.test.ts | 29 +++++++++++++++++++ .../StreamVerificationTracker.ts | 9 ++++++ src/node/services/tools/file_edit_insert.ts | 4 ++- .../services/tools/file_edit_operation.ts | 2 ++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts index a4c0ce9294..1a4cb315ed 100644 --- a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts @@ -27,4 +27,33 @@ describe("StreamVerificationTracker", () => { expect(tracker.hasValidationAttempt()).toBe(true); expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); }); + + test("resetValidation clears validation state so pre-edit validation doesn't count", () => { + const tracker = new StreamVerificationTracker(); + + // Validate first + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + + // Then an edit happens — validation should be reset + tracker.resetValidation(); + expect(tracker.hasValidationAttempt()).toBe(false); + + // Guard should now nudge because validation was pre-edit + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true); + }); + + test("post-edit validation still counts after reset", () => { + const tracker = new StreamVerificationTracker(); + + // Validate, then edit resets it + tracker.markValidationAttempt(); + tracker.resetValidation(); + expect(tracker.hasValidationAttempt()).toBe(false); + + // Validate again (post-edit) — should count + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + }); }); diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.ts index c40224305a..937d4fe5b2 100644 --- a/src/node/services/streamGuardrails/StreamVerificationTracker.ts +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.ts @@ -1,5 +1,8 @@ /** * Tracks whether a stream attempted validation commands before completion. + * + * Validation state is reset whenever a file edit is recorded, so only + * post-edit validation counts towards the pre-completion guard. */ export class StreamVerificationTracker { private validationAttempted = false; @@ -9,6 +12,12 @@ export class StreamVerificationTracker { this.validationAttempted = true; } + /** Reset validation state — called when new edits are recorded so that + * pre-edit validation doesn't satisfy the post-edit verification guard. */ + resetValidation(): void { + this.validationAttempted = false; + } + hasValidationAttempt(): boolean { return this.validationAttempted; } diff --git a/src/node/services/tools/file_edit_insert.ts b/src/node/services/tools/file_edit_insert.ts index 2ec6b1a84e..5bcc276aff 100644 --- a/src/node/services/tools/file_edit_insert.ts +++ b/src/node/services/tools/file_edit_insert.ts @@ -99,10 +99,12 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration) } } - // Track new-file creation for doom-loop detection and verification guard + // Track new-file creation for doom-loop detection and verification guard. + // Reset verification state so pre-edit validation doesn't count. let doomLoopNudge: string | undefined; if (!config.planFileOnly && config.editTracker) { const editCount = config.editTracker.recordEdit(resolvedPath); + config.verificationTracker?.resetValidation(); if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { config.editTracker.markNudged(resolvedPath); doomLoopNudge = diff --git a/src/node/services/tools/file_edit_operation.ts b/src/node/services/tools/file_edit_operation.ts index be401a9c60..4e33745235 100644 --- a/src/node/services/tools/file_edit_operation.ts +++ b/src/node/services/tools/file_edit_operation.ts @@ -149,8 +149,10 @@ export async function executeFileEditOperation({ } // Track repeated edits to detect potential doom loops in exec mode. + // Reset verification state so pre-edit validation doesn't count. if (!config.planFileOnly && config.editTracker) { const editCount = config.editTracker.recordEdit(resolvedPath); + config.verificationTracker?.resetValidation(); if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { config.editTracker.markNudged(resolvedPath); doomLoopNudge = `Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n- Consider a fundamentally different approach (not a small variation of what you've been trying).`;