From a23c1736629537f1e1e27e8087d81d1a42e22bc6 Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:05:42 +0000
Subject: [PATCH 1/8] =?UTF-8?q?=F0=9F=A4=96=20fix:=20add=20deterministic?=
 =?UTF-8?q?=20stream=20guardrails=20for=20verification=20and=20doom=20loop?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/common/utils/tools/tools.ts               |   6 +
 src/node/services/aiService.ts                |   9 ++
 .../StreamEditTracker.test.ts                 |  64 +++++++++
 .../streamGuardrails/StreamEditTracker.ts     |  45 ++++++
 .../StreamVerificationTracker.test.ts         |  30 ++++
 .../StreamVerificationTracker.ts              |  27 ++++
 src/node/services/tools/agent_report.test.ts  | 134 +++++++++++++++++-
 src/node/services/tools/agent_report.ts       |  14 ++
 src/node/services/tools/bash.test.ts          |  21 ++-
 src/node/services/tools/bash.ts               |  23 +++
 .../tools/file_edit_operation.test.ts         |  83 +++++++++++
 .../services/tools/file_edit_operation.ts     |  23 ++-
 12 files changed, 470 insertions(+), 9 deletions(-)
 create mode 100644 src/node/services/streamGuardrails/StreamEditTracker.test.ts
 create mode 100644 src/node/services/streamGuardrails/StreamEditTracker.ts
 create mode 100644 src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
 create mode 100644 src/node/services/streamGuardrails/StreamVerificationTracker.ts

diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts
index d78fff8269..0ac75fb4e6 100644
--- a/src/common/utils/tools/tools.ts
+++ b/src/common/utils/tools/tools.ts
@@ -42,6 +42,8 @@ import type { WorkspaceChatMessage } from "@/common/orpc/types";
 import type { FileState } from "@/node/services/agentSession";
 import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition";
 import type { AgentSkillDescriptor } from "@/common/types/agentSkill";
+import type { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker";
+import type { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker";
 
 /**
  * Configuration for tools that need runtime context
@@ -80,6 +82,10 @@ export interface ToolConfiguration {
   taskService?: TaskService;
   /** Enable agent_report tool (only valid for child task workspaces) */
   enableAgentReport?: boolean;
+  /** Per-stream edit tracker for doom-loop detection (not set for IPC tool calls) */
+  editTracker?: StreamEditTracker;
+  /** Per-stream verification tracker for completion guard (not set for IPC tool calls) */
+  verificationTracker?: StreamVerificationTracker;
   /** Experiments inherited from parent (for subagent spawning) */
   experiments?: {
     programmaticToolCalling?: boolean;
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index 8ba84526ab..b57e20a66e 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -68,6 +68,8 @@ import {
 } from "./streamSimulation";
 import { applyToolPolicyAndExperiments, captureMcpToolTelemetry } from "./toolAssembly";
 import { getErrorMessage } from "@/common/utils/errors";
+import { StreamEditTracker } from "./streamGuardrails/StreamEditTracker";
+import { StreamVerificationTracker } from "./streamGuardrails/StreamVerificationTracker";
 
 // ---------------------------------------------------------------------------
 // streamMessage options
@@ -699,6 +701,10 @@ export class AIService extends EventEmitter {
         }
       }
 
+      // Guardrail trackers are per-stream and only enabled for AI tool execution.
+      const editTracker = new StreamEditTracker();
+      const verificationTracker = new StreamVerificationTracker();
+
       // Get model-specific tools with workspace path (correct for local or remote)
       const allTools = await getToolsForModel(
         modelString,
@@ -734,6 +740,9 @@ export class AIService extends EventEmitter {
           workspaceId,
           // Only child workspaces (tasks) can report to a parent.
           enableAgentReport: Boolean(metadata.parentWorkspaceId),
+          // Per-stream deterministic guardrails for completion + doom-loop detection.
+          editTracker,
+          verificationTracker,
           // External edit detection callback
           recordFileState,
           taskService: this.taskService,
diff --git a/src/node/services/streamGuardrails/StreamEditTracker.test.ts b/src/node/services/streamGuardrails/StreamEditTracker.test.ts
new file mode 100644
index 0000000000..7c0f8297a8
--- /dev/null
+++ b/src/node/services/streamGuardrails/StreamEditTracker.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, test } from "bun:test";
+
+import { DOOM_LOOP_EDIT_THRESHOLD, StreamEditTracker } from "./StreamEditTracker";
+
+describe("StreamEditTracker", () => {
+  test("recordEdit increments edit count for the same file", () => {
+    const tracker = new StreamEditTracker();
+
+    expect(tracker.recordEdit("/tmp/file.ts")).toBe(1);
+    expect(tracker.recordEdit("/tmp/file.ts")).toBe(2);
+    expect(tracker.recordEdit("/tmp/file.ts")).toBe(3);
+  });
+
+  test("hasAnyEdits is false before edits and true after first edit", () => {
+    const tracker = new StreamEditTracker();
+
+    expect(tracker.hasAnyEdits()).toBe(false);
+    tracker.recordEdit("/tmp/file.ts");
+    expect(tracker.hasAnyEdits()).toBe(true);
+  });
+
+  test("shouldNudge is false below threshold and true at threshold", () => {
+    const tracker = new StreamEditTracker();
+    const filePath = "/tmp/file.ts";
+
+    for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD - 1; i += 1) {
+      tracker.recordEdit(filePath);
+    }
+
+    expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false);
+
+    tracker.recordEdit(filePath);
+    expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true);
+  });
+
+  test("shouldNudge is once per file after markNudged", () => {
+    const tracker = new StreamEditTracker();
+    const filePath = "/tmp/file.ts";
+
+    for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) {
+      tracker.recordEdit(filePath);
+    }
+
+    expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true);
+
+    tracker.markNudged(filePath);
+    expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false);
+
+    tracker.recordEdit(filePath);
+    expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false);
+  });
+
+  test("tracks edit counts independently per file", () => {
+    const tracker = new StreamEditTracker();
+
+    for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) {
+      tracker.recordEdit("/tmp/a.ts");
+    }
+    tracker.recordEdit("/tmp/b.ts");
+
+    expect(tracker.shouldNudge("/tmp/a.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(true);
+    expect(tracker.shouldNudge("/tmp/b.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(false);
+  });
+});
diff --git a/src/node/services/streamGuardrails/StreamEditTracker.ts b/src/node/services/streamGuardrails/StreamEditTracker.ts
new file mode 100644
index 0000000000..d6a6acdd03
--- /dev/null
+++ b/src/node/services/streamGuardrails/StreamEditTracker.ts
@@ -0,0 +1,45 @@
+import assert from "@/common/utils/assert";
+
+export const DOOM_LOOP_EDIT_THRESHOLD = 7;
+
+/**
+ * Tracks file edit frequency for a single stream to detect potential doom loops.
+ */
+export class StreamEditTracker {
+  private readonly editCountsByFile = new Map<string, number>();
+  private readonly nudgedFiles = new Set<string>();
+
+  recordEdit(filePath: string): number {
+    assert(
+      typeof filePath === "string" && filePath.length > 0,
+      "filePath must be a non-empty string"
+    );
+
+    const nextCount = (this.editCountsByFile.get(filePath) ?? 0) + 1;
+    this.editCountsByFile.set(filePath, nextCount);
+    return nextCount;
+  }
+
+  hasAnyEdits(): boolean {
+    return this.editCountsByFile.size > 0;
+  }
+
+  shouldNudge(filePath: string, threshold: number): boolean {
+    assert(
+      typeof filePath === "string" && filePath.length > 0,
+      "filePath must be a non-empty string"
+    );
+    assert(Number.isFinite(threshold) && threshold > 0, "threshold must be a positive number");
+
+    const editCount = this.editCountsByFile.get(filePath) ?? 0;
+    return editCount >= threshold && !this.nudgedFiles.has(filePath);
+  }
+
+  markNudged(filePath: string): void {
+    assert(
+      typeof filePath === "string" && filePath.length > 0,
+      "filePath must be a non-empty string"
+    );
+    this.nudgedFiles.add(filePath);
+  }
+}
diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
new file mode 100644
index 0000000000..a4c0ce9294
--- /dev/null
+++ b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
@@ -0,0 +1,30 @@
+import { describe, expect, test } from "bun:test";
+
+import { StreamVerificationTracker } from "./StreamVerificationTracker";
+
+describe("StreamVerificationTracker", () => {
+  test("hasValidationAttempt is false initially and true after markValidationAttempt", () => {
+    const tracker = new StreamVerificationTracker();
+
+    expect(tracker.hasValidationAttempt()).toBe(false);
+
+    tracker.markValidationAttempt();
+    expect(tracker.hasValidationAttempt()).toBe(true);
+  });
+
+  test("nudge lifecycle for completion guard", () => {
+    const tracker = new StreamVerificationTracker();
+
+    expect(tracker.hasBeenNudged()).toBe(false);
+    expect(tracker.shouldNudgeBeforeAllowingReport(false)).toBe(false);
+    expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true);
+
+    tracker.markNudged();
+    expect(tracker.hasBeenNudged()).toBe(true);
+    expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false);
+
+    tracker.markValidationAttempt();
+    expect(tracker.hasValidationAttempt()).toBe(true);
+    expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false);
+  });
+});
diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.ts
new file mode 100644
index 0000000000..c40224305a
--- /dev/null
+++ b/src/node/services/streamGuardrails/StreamVerificationTracker.ts
@@ -0,0 +1,27 @@
+/**
+ * Tracks whether a stream attempted validation commands before completion.
+ */
+export class StreamVerificationTracker {
+  private validationAttempted = false;
+  private nudgedBeforeReport = false;
+
+  markValidationAttempt(): void {
+    this.validationAttempted = true;
+  }
+
+  hasValidationAttempt(): boolean {
+    return this.validationAttempted;
+  }
+
+  hasBeenNudged(): boolean {
+    return this.nudgedBeforeReport;
+  }
+
+  markNudged(): void {
+    this.nudgedBeforeReport = true;
+  }
+
+  shouldNudgeBeforeAllowingReport(hasEdits: boolean): boolean {
+    return hasEdits && !this.validationAttempted && !this.nudgedBeforeReport;
+  }
+}
diff --git a/src/node/services/tools/agent_report.test.ts b/src/node/services/tools/agent_report.test.ts
index 7b4d98388b..1c09ae5274 100644
--- a/src/node/services/tools/agent_report.test.ts
+++ b/src/node/services/tools/agent_report.test.ts
@@ -4,20 +4,25 @@ import type { ToolExecutionOptions } from "ai";
 import { createAgentReportTool } from "./agent_report";
 import { TestTempDir, createTestToolConfig } from "./testHelpers";
 import type { TaskService } from "@/node/services/taskService";
+import { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker";
+import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker";
 
 const mockToolCallOptions: ToolExecutionOptions = {
   toolCallId: "test-call-id",
   messages: [],
 };
+function createTaskService(hasActiveDescendants: boolean): TaskService {
+  return {
+    hasActiveDescendantAgentTasksForWorkspace: mock(() => hasActiveDescendants),
+  } as unknown as TaskService;
+}
 
 describe("agent_report tool", () => {
   it("throws when the task has active descendants", async () => {
     using tempDir = new TestTempDir("test-agent-report-tool");
     const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
 
-    const taskService = {
-      hasActiveDescendantAgentTasksForWorkspace: mock(() => true),
-    } as unknown as TaskService;
+    const taskService = createTaskService(true);
 
     const tool = createAgentReportTool({ ...baseConfig, taskService });
 
@@ -40,9 +45,7 @@ describe("agent_report tool", () => {
     using tempDir = new TestTempDir("test-agent-report-tool-ok");
     const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
 
-    const taskService = {
-      hasActiveDescendantAgentTasksForWorkspace: mock(() => false),
-    } as unknown as TaskService;
+    const taskService = createTaskService(false);
 
     const tool = createAgentReportTool({ ...baseConfig, taskService });
 
@@ -55,4 +58,123 @@ describe("agent_report tool", () => {
       message: "Report submitted successfully.",
     });
   });
+
+  it("allows report when trackers are present but no edits occurred", async () => {
+    using tempDir = new TestTempDir("test-agent-report-no-edits");
+    const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
+
+    const editTracker = new StreamEditTracker();
+    const verificationTracker = new StreamVerificationTracker();
+    const tool = createAgentReportTool({
+      ...baseConfig,
+      taskService: createTaskService(false),
+      editTracker,
+      verificationTracker,
+    });
+
+    const result = (await Promise.resolve(
+      tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions)
+    )) as unknown;
+
+    expect(result).toEqual({
+      success: true,
+      message: "Report submitted successfully.",
+    });
+  });
+
+  it("rejects first report when edits occurred but no validation was attempted", async () => {
+    using tempDir = new TestTempDir("test-agent-report-missing-validation");
+    const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
+
+    const editTracker = new StreamEditTracker();
+    editTracker.recordEdit("/tmp/file.ts");
+    const verificationTracker = new StreamVerificationTracker();
+
+    const tool = createAgentReportTool({
+      ...baseConfig,
+      taskService: createTaskService(false),
+      editTracker,
+      verificationTracker,
+    });
+
+    let caught: unknown = null;
+    try {
+      await Promise.resolve(
+        tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions)
+      );
+    } catch (error: unknown) {
+      caught = error;
+    }
+
+    expect(caught).toBeInstanceOf(Error);
+    if (caught instanceof Error) {
+      expect(caught.message).toMatch(/no validation commands detected/i);
+    }
+    expect(verificationTracker.hasBeenNudged()).toBe(true);
+  });
+
+  it("allows a second report without validation as an explicit escape hatch", async () => {
+    using tempDir = new TestTempDir("test-agent-report-escape-hatch");
+    const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
+
+    const editTracker = new StreamEditTracker();
+    editTracker.recordEdit("/tmp/file.ts");
+    const verificationTracker = new StreamVerificationTracker();
+
+    const tool = createAgentReportTool({
+      ...baseConfig,
+      taskService: createTaskService(false),
+      editTracker,
+      verificationTracker,
+    });
+
+    let firstError: unknown = null;
+    try {
+      await Promise.resolve(
+        tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions)
+      );
+    } catch (error: unknown) {
+      firstError = error;
+    }
+
+    expect(firstError).toBeInstanceOf(Error);
+    if (firstError instanceof Error) {
+      expect(firstError.message).toMatch(/no validation commands detected/i);
+    }
+
+    const secondResult: unknown = await Promise.resolve(
+      tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions)
+    );
+
+    expect(secondResult).toEqual({
+      success: true,
+      message: "Report submitted successfully.",
+    });
+  });
+
+  it("allows report after validation attempt", async () => {
+    using tempDir = new TestTempDir("test-agent-report-validated");
+    const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" });
+
+    const editTracker = new StreamEditTracker();
+    editTracker.recordEdit("/tmp/file.ts");
+    const verificationTracker = new StreamVerificationTracker();
+    verificationTracker.markValidationAttempt();
+
+    const tool = createAgentReportTool({
+      ...baseConfig,
+      taskService: createTaskService(false),
+      editTracker,
+      verificationTracker,
+    });
+
+    const result: unknown = await Promise.resolve(
+      tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions)
+    );
+
+    expect(result).toEqual({
+      success: true,
+      message: "Report submitted successfully.",
+    });
+  });
 });
diff --git a/src/node/services/tools/agent_report.ts b/src/node/services/tools/agent_report.ts
index 99f949a1cb..e762c55a32 100644
--- a/src/node/services/tools/agent_report.ts
+++ b/src/node/services/tools/agent_report.ts
@@ -20,6 +20,20 @@ export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) =>
         );
       }
 
+      // Guard: if edits were made but no validation was attempted, nudge the agent to verify.
+      const hasEdits = config.editTracker?.hasAnyEdits() ?? false;
+      const hasValidated = config.verificationTracker?.hasValidationAttempt() ?? false;
+      if (hasEdits && !hasValidated) {
+        if (!config.verificationTracker?.hasBeenNudged()) {
+          config.verificationTracker?.markNudged();
+          throw new Error(
+            "agent_report rejected: no validation commands detected after file edits. " +
+              "Run the most relevant check (tests, typecheck, lint) and then call agent_report again. " +
+              "If validation is not applicable, call agent_report again to confirm."
+          );
+        }
+      }
+
       // Intentionally no side-effects. The backend orchestrator consumes the tool-call args
       // via persisted history/partial state once the tool call completes successfully.
       // The stream continues after this so the SDK can record usage, while StreamManager
diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts
index bf1d13f0d6..dcf77444fc 100644
--- a/src/node/services/tools/bash.test.ts
+++ b/src/node/services/tools/bash.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, expect } from "bun:test";
 import { LocalRuntime } from "@/node/runtime/LocalRuntime";
-import { createBashTool } from "./bash";
+import { createBashTool, looksLikeValidationCommand } from "./bash";
 import type { BashOutputEvent } from "@/common/types/stream";
 import type { BashToolArgs, BashToolResult } from "@/common/types/tools";
 import { BASH_MAX_TOTAL_BYTES } from "@/common/constants/toolLimits";
@@ -43,6 +43,25 @@ function createTestBashTool() {
   };
 }
 
+describe("looksLikeValidationCommand", () => {
+  it("matches common validation command patterns", () => {
+    expect(looksLikeValidationCommand("make test")).toBe(true);
+    expect(looksLikeValidationCommand("make typecheck")).toBe(true);
+    expect(looksLikeValidationCommand("bun test")).toBe(true);
+    expect(looksLikeValidationCommand("bun run lint")).toBe(true);
+    expect(looksLikeValidationCommand("vitest --run")).toBe(true);
+    expect(looksLikeValidationCommand("run_and_report typecheck make typecheck")).toBe(true);
+    expect(looksLikeValidationCommand("npm run test\npnpm run lint")).toBe(true);
+  });
+
+  it("does not match non-validation commands", () => {
+    expect(looksLikeValidationCommand("make build")).toBe(false);
+    expect(looksLikeValidationCommand("bun install")).toBe(false);
+    expect(looksLikeValidationCommand("echo test")).toBe(false);
+    expect(looksLikeValidationCommand("cargo build")).toBe(false);
+  });
+});
+
 describe("bash tool", () => {
   it("should execute a simple command successfully", async () => {
     using testEnv = createTestBashTool();
diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index b51b70c5ad..42c969165a 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -522,6 +522,24 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu
   return null; // Valid
 }
 
+const VALIDATION_PATTERNS: RegExp[] = [
+  /(^|\n)\s*run_and_report\b/,
+  /(^|\n)\s*make\s+(test|typecheck|lint|static-check|fmt-check)\b/,
+  /(^|\n)\s*bun\s+(test|run\s+test|run\s+typecheck|run\s+lint)\b/,
+  /(^|\n)\s*npm\s+(test|run\s+(test|typecheck|lint))\b/,
+  /(^|\n)\s*pnpm\s+(test|run\s+(test|typecheck|lint))\b/,
+  /(^|\n)\s*yarn\s+(test|run\s+(test|typecheck|lint))\b/,
+  /(^|\n)\s*tsc\b/,
+  /(^|\n)\s*eslint\b/,
+  /(^|\n)\s*vitest\b/,
+  /(^|\n)\s*pytest\b/,
+  /(^|\n)\s*cargo\s+(test|check|clippy)\b/,
+];
+
+export function looksLikeValidationCommand(script: string): boolean {
+  return VALIDATION_PATTERNS.some((pattern) => pattern.test(script));
+}
+
 /**
  * Rewrite cmd.exe-style null-device redirects (e.g. `>nul`, `2>nul`) into `/dev/null`.
  *
@@ -861,6 +879,11 @@ export const createBashTool: ToolFactory = (config: ToolConfiguration) => {
       const validationError = validateScript(script, config);
       if (validationError) return validationError;
 
+      // Mark validation attempts for the pre-completion verification guard.
+      if (looksLikeValidationCommand(script)) {
+        config.verificationTracker?.markValidationAttempt();
+      }
+
       // Warn when the model appears to be reading files via bash output (cat/rg/grep).
       // Reading files via bash output is fragile (may be truncated or auto-filtered);
       // file_read supports paging and avoids silent context loss.
diff --git a/src/node/services/tools/file_edit_operation.test.ts b/src/node/services/tools/file_edit_operation.test.ts
index d5d66b7fe0..f3da90c34e 100644
--- a/src/node/services/tools/file_edit_operation.test.ts
+++ b/src/node/services/tools/file_edit_operation.test.ts
@@ -4,6 +4,11 @@ import * as path from "path";
 import { executeFileEditOperation } from "./file_edit_operation";
 import type { Runtime } from "@/node/runtime/Runtime";
 import { LocalRuntime } from "@/node/runtime/LocalRuntime";
+import { MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD } from "@/common/utils/tools/internalToolResultFields";
+import {
+  DOOM_LOOP_EDIT_THRESHOLD,
+  StreamEditTracker,
+} from "@/node/services/streamGuardrails/StreamEditTracker";
 
 import { getTestDeps, TestTempDir } from "./testHelpers";
 
@@ -282,3 +287,81 @@ describe("executeFileEditOperation plan mode enforcement", () => {
     expect(resolvePathCalls).toContain("/home/user/.mux/sessions/ws/plan.md");
   });
 });
+
+describe("executeFileEditOperation doom-loop guard", () => {
+  test("attaches a model-only notification when edit threshold is reached", async () => {
+    using tempDir = new TestTempDir("doom-loop-guard-test");
+
+    const filePath = path.join(tempDir.path, "main.ts");
+    await fs.writeFile(filePath, "const x = 0;\n");
+
+    const runtime = new LocalRuntime(tempDir.path);
+    const editTracker = new StreamEditTracker();
+
+    let thresholdResult: unknown;
+    for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) {
+      thresholdResult = await executeFileEditOperation({
+        config: {
+          cwd: tempDir.path,
+          runtime,
+          runtimeTempDir: tempDir.path,
+          editTracker,
+        },
+        filePath,
+        operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }),
+      });
+    }
+
+    expect(thresholdResult).toBeDefined();
+    const resultRecord = thresholdResult as Record<string, unknown>;
+
+    expect(resultRecord.success).toBe(true);
+    expect(Array.isArray(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD])).toBe(true);
+
+    const notifications = resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD] as string[];
+    expect(notifications[0]).toContain("Potential doom loop");
+    expect(notifications[0]).toContain(filePath);
+  });
+
+  test("nudges at most once per file for a stream", async () => {
+    using tempDir = new TestTempDir("doom-loop-guard-once-test");
+
+    const filePath = path.join(tempDir.path, "main.ts");
+    await fs.writeFile(filePath, "const x = 0;\n");
+
+    const runtime = new LocalRuntime(tempDir.path);
+    const editTracker = new StreamEditTracker();
+
+    for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) {
+      await executeFileEditOperation({
+        config: {
+          cwd: tempDir.path,
+          runtime,
+          runtimeTempDir: tempDir.path,
+          editTracker,
+        },
+        filePath,
+        operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }),
+      });
+    }
+
+    const postThresholdResult = await executeFileEditOperation({
+      config: {
+        cwd: tempDir.path,
+        runtime,
+        runtimeTempDir: tempDir.path,
+        editTracker,
+      },
+      filePath,
+      operation: () => ({
+        success: true,
+        newContent: `const x = ${DOOM_LOOP_EDIT_THRESHOLD + 1};\n`,
+        metadata: {},
+      }),
+    });
+
+    const resultRecord = postThresholdResult as unknown as Record<string, unknown>;
+    expect(resultRecord.success).toBe(true);
+    expect(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD]).toBeUndefined();
+  });
+});
diff --git a/src/node/services/tools/file_edit_operation.ts b/src/node/services/tools/file_edit_operation.ts
index 50a334c029..be401a9c60 100644
--- a/src/node/services/tools/file_edit_operation.ts
+++ b/src/node/services/tools/file_edit_operation.ts
@@ -13,6 +13,8 @@ import {
 import { RuntimeError } from "@/node/runtime/Runtime";
 import { readFileString, writeFileString } from "@/node/utils/runtime/helpers";
 import { getErrorMessage } from "@/common/utils/errors";
+import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields";
+import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker";
 
 type FileEditOperationResult<TMetadata> =
   | {
@@ -131,6 +133,8 @@ export async function executeFileEditOperation<TMetadata>({
       throw err;
     }
 
+    let doomLoopNudge: string | undefined;
+
     // Record file state for post-compaction attachment tracking
     if (config.recordFileState) {
       try {
@@ -144,9 +148,18 @@ export async function executeFileEditOperation<TMetadata>({
       }
     }
 
+    // Track repeated edits to detect potential doom loops in exec mode.
+    if (!config.planFileOnly && config.editTracker) {
+      const editCount = config.editTracker.recordEdit(resolvedPath);
+      if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) {
+        config.editTracker.markNudged(resolvedPath);
+        doomLoopNudge = `<notification>Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n- Consider a fundamentally different approach (not a small variation of what you've been trying).</notification>`;
+      }
+    }
+
     const diff = generateDiff(resolvedPath, originalContent, operationResult.newContent);
 
-    return {
+    const baseResult: FileEditDiffSuccessBase & TMetadata = {
       success: true,
       diff: FILE_EDIT_DIFF_OMITTED_MESSAGE,
       ui_only: {
@@ -155,8 +168,14 @@ export async function executeFileEditOperation<TMetadata>({
         },
       },
       ...operationResult.metadata,
-      ...(pathWarning && { warning: pathWarning }),
+      ...(pathWarning ? { warning: pathWarning } : {}),
     };
+
+    if (doomLoopNudge) {
+      return attachModelOnlyToolNotifications(baseResult, [doomLoopNudge]) as typeof baseResult;
+    }
+
+    return baseResult;
   } catch (error) {
     if (error && typeof error === "object" && "code" in error) {
       const nodeError = error as { code?: string };

From 60ec04a72dd6ca4d3145f6aef554993b7762f57e Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:14:55 +0000
Subject: [PATCH 2/8] fix: tighten run_and_report validation detection to check
 wrapped command

run_and_report is a generic wrapper, not inherently a validation command.
Only match when the wrapped command itself is a validation command
(e.g., run_and_report typecheck make typecheck).
---
 src/node/services/tools/bash.test.ts |  2 ++
 src/node/services/tools/bash.ts      | 21 ++++++++++-----------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts
index dcf77444fc..bdcd778996 100644
--- a/src/node/services/tools/bash.test.ts
+++ b/src/node/services/tools/bash.test.ts
@@ -59,6 +59,8 @@ describe("looksLikeValidationCommand", () => {
     expect(looksLikeValidationCommand("bun install")).toBe(false);
     expect(looksLikeValidationCommand("echo test")).toBe(false);
     expect(looksLikeValidationCommand("cargo build")).toBe(false);
+    // run_and_report wrapping a non-validation command should NOT trigger
+    expect(looksLikeValidationCommand("run_and_report install bun install")).toBe(false);
   });
 });
 
diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index 42c969165a..12774d1168 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -522,18 +522,17 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu
   return null; // Valid
 }
 
+// Patterns for commands that are inherently validation (tests, typecheck, lint).
+// `run_and_report` is a generic wrapper (`run_and_report <step> <cmd...>`), so we
+// match it only when the wrapped command itself is a validation command.
+const VALIDATION_COMMAND_RE =
+  /(?:make\s+(?:test|typecheck|lint|static-check|fmt-check)|bun\s+(?:test|run\s+(?:test|typecheck|lint))|npm\s+(?:test|run\s+(?:test|typecheck|lint))|pnpm\s+(?:test|run\s+(?:test|typecheck|lint))|yarn\s+(?:test|run\s+(?:test|typecheck|lint))|tsc|eslint|vitest|pytest|cargo\s+(?:test|check|clippy))\b/;
+
 const VALIDATION_PATTERNS: RegExp[] = [
-  /(^|\n)\s*run_and_report\b/,
-  /(^|\n)\s*make\s+(test|typecheck|lint|static-check|fmt-check)\b/,
-  /(^|\n)\s*bun\s+(test|run\s+test|run\s+typecheck|run\s+lint)\b/,
-  /(^|\n)\s*npm\s+(test|run\s+(test|typecheck|lint))\b/,
-  /(^|\n)\s*pnpm\s+(test|run\s+(test|typecheck|lint))\b/,
-  /(^|\n)\s*yarn\s+(test|run\s+(test|typecheck|lint))\b/,
-  /(^|\n)\s*tsc\b/,
-  /(^|\n)\s*eslint\b/,
-  /(^|\n)\s*vitest\b/,
-  /(^|\n)\s*pytest\b/,
-  /(^|\n)\s*cargo\s+(test|check|clippy)\b/,
+  // run_and_report <step_name> <validation_command...>
+  new RegExp(`(^|\\n)\\s*run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`),
+  // Standalone validation commands at the start of a line
+  new RegExp(`(^|\\n)\\s*${VALIDATION_COMMAND_RE.source}`),
 ];
 
 export function looksLikeValidationCommand(script: string): boolean {

From abb5fb5da4ad15ee3c3dba2d7c92e6837cc60691 Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:22:26 +0000
Subject: [PATCH 3/8] fix: track new-file creation in edit tracker for
 verification guard

The create-file branch in file_edit_insert.ts bypassed executeFileEditOperation,
so new files weren't counted by the edit tracker. This meant a stream that only
created files could skip verification.
---
 src/node/services/tools/file_edit_insert.ts | 23 ++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/node/services/tools/file_edit_insert.ts b/src/node/services/tools/file_edit_insert.ts
index f0a15783d9..2ec6b1a84e 100644
--- a/src/node/services/tools/file_edit_insert.ts
+++ b/src/node/services/tools/file_edit_insert.ts
@@ -14,6 +14,8 @@ import { fileExists } from "@/node/utils/runtime/fileExists";
 import { writeFileString } from "@/node/utils/runtime/helpers";
 import { RuntimeError } from "@/node/runtime/Runtime";
 import { getErrorMessage } from "@/common/utils/errors";
+import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields";
+import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker";
 
 const READ_AND_RETRY_NOTE = `${EDIT_FAILED_NOTE_PREFIX} ${NOTE_READ_FILE_RETRY}`;
 
@@ -97,8 +99,21 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration)
             }
           }
 
+          // Track new-file creation for doom-loop detection and verification guard
+          let doomLoopNudge: string | undefined;
+          if (!config.planFileOnly && config.editTracker) {
+            const editCount = config.editTracker.recordEdit(resolvedPath);
+            if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) {
+              config.editTracker.markNudged(resolvedPath);
+              doomLoopNudge =
+                `<notification>Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. ` +
+                `Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n` +
+                `- Consider a fundamentally different approach (not a small variation of what you've been trying).</notification>`;
+            }
+          }
+
           const diff = generateDiff(resolvedPath, "", content);
-          return {
+          const baseResult: FileEditInsertToolResult = {
             success: true,
             diff: FILE_EDIT_DIFF_OMITTED_MESSAGE,
             ui_only: {
@@ -108,6 +123,12 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration)
             },
             ...(pathWarning && { warning: pathWarning }),
           };
+          if (doomLoopNudge) {
+            return attachModelOnlyToolNotifications(baseResult, [
+              doomLoopNudge,
+            ]) as FileEditInsertToolResult;
+          }
+          return baseResult;
         }
 
         return executeFileEditOperation({

From 6f07a38a6d5f8e5d0fe06945d0bcfb0dc8b4ddde Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:28:32 +0000
Subject: [PATCH 4/8] fix: recognize validation commands after shell operators
 (&&, ;, |)

Match validation commands like 'cd packages/app && make test' and
'source .env; bun test' by accepting shell operators as command prefixes
in addition to line start.
---
 src/node/services/tools/bash.test.ts |  7 +++++++
 src/node/services/tools/bash.ts      | 12 ++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts
index bdcd778996..136e18d92c 100644
--- a/src/node/services/tools/bash.test.ts
+++ b/src/node/services/tools/bash.test.ts
@@ -52,6 +52,13 @@ describe("looksLikeValidationCommand", () => {
     expect(looksLikeValidationCommand("vitest --run")).toBe(true);
     expect(looksLikeValidationCommand("run_and_report typecheck make typecheck")).toBe(true);
     expect(looksLikeValidationCommand("npm run test\npnpm run lint")).toBe(true);
+    // Validation commands after shell operators (monorepo/subdirectory workflows)
+    expect(looksLikeValidationCommand("cd packages/app && make test")).toBe(true);
+    expect(looksLikeValidationCommand("cd packages/app && bun test")).toBe(true);
+    expect(looksLikeValidationCommand("source .env; make typecheck")).toBe(true);
+    expect(looksLikeValidationCommand("run_and_report unit cd packages/app && bun test")).toBe(
+      true
+    );
   });
 
   it("does not match non-validation commands", () => {
diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index 12774d1168..32eabbdfa3 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -528,11 +528,15 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu
 const VALIDATION_COMMAND_RE =
   /(?:make\s+(?:test|typecheck|lint|static-check|fmt-check)|bun\s+(?:test|run\s+(?:test|typecheck|lint))|npm\s+(?:test|run\s+(?:test|typecheck|lint))|pnpm\s+(?:test|run\s+(?:test|typecheck|lint))|yarn\s+(?:test|run\s+(?:test|typecheck|lint))|tsc|eslint|vitest|pytest|cargo\s+(?:test|check|clippy))\b/;
 
+// Match validation commands at line start, after shell operators (&&, ||, ;, |),
+// or after run_and_report wrappers, to handle monorepo/subdirectory workflows
+// like `cd packages/app && make test`.
+const CMD_PREFIX = String.raw`(?:^|\n|&&|\|\||[;|])\s*`;
 const VALIDATION_PATTERNS: RegExp[] = [
-  // run_and_report <step_name> <validation_command...>
-  new RegExp(`(^|\\n)\\s*run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`),
-  // Standalone validation commands at the start of a line
-  new RegExp(`(^|\\n)\\s*${VALIDATION_COMMAND_RE.source}`),
+  // run_and_report <step_name> <validation_command...> (may include cd/shell before the command)
+  new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+.*?${VALIDATION_COMMAND_RE.source}`),
+  // Validation commands at line start or after shell operators
+  new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`),
 ];
 
 export function looksLikeValidationCommand(script: string): boolean {

From 46f3848ba74c2e0f1ca82922c8e0525a58daccab Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:34:57 +0000
Subject: [PATCH 5/8] fix: narrow run_and_report regex to match only the actual
 command

Remove .*? from the run_and_report pattern so that only the third word
(the actual command) is checked against validation patterns. Chained
commands after && are already caught by the standalone pattern.
---
 src/node/services/tools/bash.test.ts | 2 ++
 src/node/services/tools/bash.ts      | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts
index 136e18d92c..acccb44296 100644
--- a/src/node/services/tools/bash.test.ts
+++ b/src/node/services/tools/bash.test.ts
@@ -68,6 +68,8 @@ describe("looksLikeValidationCommand", () => {
     expect(looksLikeValidationCommand("cargo build")).toBe(false);
     // run_and_report wrapping a non-validation command should NOT trigger
     expect(looksLikeValidationCommand("run_and_report install bun install")).toBe(false);
+    // run_and_report with validation text appearing as arguments, not the actual command
+    expect(looksLikeValidationCommand("run_and_report note echo make test")).toBe(false);
   });
 });
 
diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index 32eabbdfa3..917f8b02e4 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -533,8 +533,10 @@ const VALIDATION_COMMAND_RE =
 // like `cd packages/app && make test`.
 const CMD_PREFIX = String.raw`(?:^|\n|&&|\|\||[;|])\s*`;
 const VALIDATION_PATTERNS: RegExp[] = [
-  // run_and_report <step_name> <validation_command...> (may include cd/shell before the command)
-  new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+.*?${VALIDATION_COMMAND_RE.source}`),
+  // run_and_report <step_name> <validation_command> — only when the actual command
+  // (third word) is a validation command. Chained commands like
+  // `run_and_report unit cd app && bun test` are caught by the standalone pattern below.
+  new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`),
   // Validation commands at line start or after shell operators
   new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`),
 ];

From b5757d4d7ec16b0333925dc751220ea0af130415 Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 00:41:20 +0000
Subject: [PATCH 6/8] docs: add comment explaining validation heuristic
 limitations

Shell command parsing with regex is inherently imperfect. The escape hatch
(second agent_report always passes) covers false negatives from env prefixes
and shell wrappers.
---
 src/node/services/tools/bash.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index 917f8b02e4..24a851632c 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -536,6 +536,10 @@ const VALIDATION_PATTERNS: RegExp[] = [
   // run_and_report <step_name> <validation_command> — only when the actual command
   // (third word) is a validation command. Chained commands like
   // `run_and_report unit cd app && bun test` are caught by the standalone pattern below.
+  // NOTE: This is a heuristic. Environment prefixes like `env CI=1 bun test` or
+  // `bash -c "make test"` won't match the run_and_report rule, but may be caught by
+  // the standalone pattern if chained with &&/;. The agent_report escape hatch (second
+  // attempt always passes) covers any remaining false negatives.
   new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`),
   // Validation commands at line start or after shell operators
   new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`),

From 07e9e67fcd06b9108d456e39fa1361c762ac292c Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 09:17:39 +0000
Subject: [PATCH 7/8] fix: exclude background commands from verification credit

Background bash commands (run_in_background=true) haven't produced
results yet when they start, so they shouldn't count as 'validation
attempted' for the pre-completion verification guard.
---
 src/node/services/tools/bash.test.ts | 41 ++++++++++++++++++++++++++++
 src/node/services/tools/bash.ts      |  4 ++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts
index acccb44296..df5d24a1a4 100644
--- a/src/node/services/tools/bash.test.ts
+++ b/src/node/services/tools/bash.test.ts
@@ -19,6 +19,7 @@ function isForegroundSuccess(
 }
 
 import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager";
+import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker";
 
 // Mock ToolCallOptions for testing
 const mockToolCallOptions: ToolExecutionOptions = {
@@ -1921,3 +1922,43 @@ describe("bash tool - background execution", () => {
     tempDir[Symbol.dispose]();
   });
 });
+
+describe("bash tool - verification tracker", () => {
+  it("should mark verification for foreground validation commands", async () => {
+    const tempDir = new TestTempDir("test-bash-verify");
+    const config = createTestToolConfig(tempDir.path);
+    const tracker = new StreamVerificationTracker();
+    config.verificationTracker = tracker;
+    const tool = createBashTool(config);
+
+    await tool.execute!(
+      { script: "make test", timeout_secs: 5, run_in_background: false, display_name: "test" },
+      mockToolCallOptions
+    );
+
+    expect(tracker.hasValidationAttempt()).toBe(true);
+    tempDir[Symbol.dispose]();
+  });
+
+  it("should not mark verification for background validation commands", async () => {
+    const manager = new BackgroundProcessManager("/tmp/mux-test-verify-bg");
+    const tempDir = new TestTempDir("test-bash-verify-bg");
+    const config = createTestToolConfig(tempDir.path);
+    const tracker = new StreamVerificationTracker();
+    config.verificationTracker = tracker;
+    config.backgroundProcessManager = manager;
+    const tool = createBashTool(config);
+
+    await tool.execute!(
+      { script: "make test", timeout_secs: 5, run_in_background: true, display_name: "test-bg" },
+      mockToolCallOptions
+    );
+
+    // Background commands haven't produced results yet, so they shouldn't
+    // count as "validation attempted"
+    expect(tracker.hasValidationAttempt()).toBe(false);
+
+    await manager.terminateAll();
+    tempDir[Symbol.dispose]();
+  });
+});
diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts
index 24a851632c..f8f94b9b61 100644
--- a/src/node/services/tools/bash.ts
+++ b/src/node/services/tools/bash.ts
@@ -889,7 +889,9 @@ export const createBashTool: ToolFactory = (config: ToolConfiguration) => {
       if (validationError) return validationError;
 
       // Mark validation attempts for the pre-completion verification guard.
-      if (looksLikeValidationCommand(script)) {
+      // Only count foreground commands — background processes haven't produced
+      // results yet, so they don't count as "validation attempted".
+      if (!run_in_background && looksLikeValidationCommand(script)) {
         config.verificationTracker?.markValidationAttempt();
       }
 

From f8b64a38ffa88cc415379f2ef61e6173a8bd72ab Mon Sep 17 00:00:00 2001
From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com>
Date: Wed, 18 Feb 2026 09:33:37 +0000
Subject: [PATCH 8/8] fix: reset validation state on file edits so pre-edit
 validation doesn't count

Previously, running make test before editing files would permanently
satisfy the verification guard. Now file edits reset the validation
tracker, ensuring only post-edit validation counts.
---
 .../StreamVerificationTracker.test.ts         | 29 +++++++++++++++++++
 .../StreamVerificationTracker.ts              |  9 ++++++
 src/node/services/tools/file_edit_insert.ts   |  4 ++-
 .../services/tools/file_edit_operation.ts     |  2 ++
 4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
index a4c0ce9294..1a4cb315ed 100644
--- a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
+++ b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts
@@ -27,4 +27,33 @@ describe("StreamVerificationTracker", () => {
     expect(tracker.hasValidationAttempt()).toBe(true);
     expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false);
   });
+
+  test("resetValidation clears validation state so pre-edit validation doesn't count", () => {
+    const tracker = new StreamVerificationTracker();
+
+    // Validate first
+    tracker.markValidationAttempt();
+    expect(tracker.hasValidationAttempt()).toBe(true);
+
+    // Then an edit happens — validation should be reset
+    tracker.resetValidation();
+    expect(tracker.hasValidationAttempt()).toBe(false);
+
+    // Guard should now nudge because validation was pre-edit
+    expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true);
+  });
+
+  test("post-edit validation still counts after reset", () => {
+    const tracker = new StreamVerificationTracker();
+
+    // Validate, then edit resets it
+    tracker.markValidationAttempt();
+    tracker.resetValidation();
+    expect(tracker.hasValidationAttempt()).toBe(false);
+
+    // Validate again (post-edit) — should count
+    tracker.markValidationAttempt();
+    expect(tracker.hasValidationAttempt()).toBe(true);
+    expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false);
+  });
 });
diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.ts
index c40224305a..937d4fe5b2 100644
--- a/src/node/services/streamGuardrails/StreamVerificationTracker.ts
+++ b/src/node/services/streamGuardrails/StreamVerificationTracker.ts
@@ -1,5 +1,8 @@
 /**
  * Tracks whether a stream attempted validation commands before completion.
+ *
+ * Validation state is reset whenever a file edit is recorded, so only
+ * post-edit validation counts towards the pre-completion guard.
  */
 export class StreamVerificationTracker {
   private validationAttempted = false;
@@ -9,6 +12,12 @@ export class StreamVerificationTracker {
     this.validationAttempted = true;
   }
 
+  /** Reset validation state — called when new edits are recorded so that
+   *  pre-edit validation doesn't satisfy the post-edit verification guard. */
+  resetValidation(): void {
+    this.validationAttempted = false;
+  }
+
   hasValidationAttempt(): boolean {
     return this.validationAttempted;
   }
diff --git a/src/node/services/tools/file_edit_insert.ts b/src/node/services/tools/file_edit_insert.ts
index 2ec6b1a84e..5bcc276aff 100644
--- a/src/node/services/tools/file_edit_insert.ts
+++ b/src/node/services/tools/file_edit_insert.ts
@@ -99,10 +99,12 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration)
             }
           }
 
-          // Track new-file creation for doom-loop detection and verification guard
+          // Track new-file creation for doom-loop detection and verification guard.
+          // Reset verification state so pre-edit validation doesn't count.
           let doomLoopNudge: string | undefined;
           if (!config.planFileOnly && config.editTracker) {
             const editCount = config.editTracker.recordEdit(resolvedPath);
+            config.verificationTracker?.resetValidation();
             if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) {
               config.editTracker.markNudged(resolvedPath);
               doomLoopNudge =
diff --git a/src/node/services/tools/file_edit_operation.ts b/src/node/services/tools/file_edit_operation.ts
index be401a9c60..4e33745235 100644
--- a/src/node/services/tools/file_edit_operation.ts
+++ b/src/node/services/tools/file_edit_operation.ts
@@ -149,8 +149,10 @@ export async function executeFileEditOperation<TMetadata>({
     }
 
     // Track repeated edits to detect potential doom loops in exec mode.
+    // Reset verification state so pre-edit validation doesn't count.
     if (!config.planFileOnly && config.editTracker) {
       const editCount = config.editTracker.recordEdit(resolvedPath);
+      config.verificationTracker?.resetValidation();
       if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) {
         config.editTracker.markNudged(resolvedPath);
         doomLoopNudge = `<notification>Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n- Consider a fundamentally different approach (not a small variation of what you've been trying).</notification>`;